Source code for tcutility.formula

from typing import Union
from scm import plams


[docs] def parse_molecule(molecule: plams.Molecule) -> str: """ Analyse a molecule and return the molstring describing its parts. Each part will then be separated by a ``+`` sign in the new string. Args: molecule: ``plams.Molecule`` object to be parsed. Returns: A string that contains each part of the molecule separated by a ``+`` sign, for use in :func:`molecule` function for further formatting. """ # to separate a molecule we need to have bonds molecule.guess_bonds() parts = [] # go through each part of the molecule for part in molecule.separate(): # get a dictionary of counts for each element form = part.get_formula(True) # add all elements with their number, but only if the number is larger than 1. # this prevents the creation of strings containing e.g. C1H3Cl2 parts.append("".join([sym + (str(num) if num > 1 else "") for sym, num in form.items()])) return " + ".join(parts)
[docs] def molecule(molecule: Union[str, plams.Molecule], mode: str = "unicode") -> str: """ Parse and return a string containing a molecular formula that will show up properly in LaTeX, HTML or unicode. Args: molecule: ``plams.Molecule`` object or a string that contains the molecular formula to be parsed. It can be either single molecule or a reaction. Molecules should be separated by ``+`` or ``->``. mode: the formatter to convert the string to. Should be ``unicode``, ``html``, ``latex``, ``pyplot``. Returns: A string that is formatted to be rendered nicely in either HTML or LaTeX. In the returned strings any numbers will be subscripted and ``+``, ``-``, ``*`` and ``•`` will be superscripted. For ``latex`` and ``pyplot`` modes we apply ``\\mathrm`` to letters. Examples: >>> molecule('C9H18NO*') 'C₉H₁₈NO•' >>> molecule('C2H2 + CH3* -> C2H2CH3', mode='html') 'C<sub>2</sub>H<sub>2</sub> + CH<sub>3</sub><sup>•</sup> -> C<sub>2</sub>H<sub>2</sub>CH3' .. seealso:: The :func:`parse_molecule` function is used to convert ``plams.Molecule`` objects to a molecular formula. """ # to take care of plus-signs used to denote reactions we have to first split # the molstring into its parts. if isinstance(molecule, plams.Molecule): molstring = parse_molecule(molecule) else: molstring = molecule molstring = molstring.replace("*", "•") for part in molstring.split(): # if part is only a plus-sign we skip this part. This is only true when the plus-sign # is used to denote a reaction if part in ["+", "->"]: continue # parse the part partret = part # numbers should be subscript for num in "0123456789": if mode in ["latex", "pyplot"]: partret = partret.replace(num, f"_{num}") if mode == "html": partret = partret.replace(num, f"<sub>{num}</sub>") if mode == "unicode": partret = partret.replace(num, "₀₁₂₃₄₅₆₇₈₉"[int(num)]) partret_ = partret partret = '' for char in partret_: if char.isalpha() and mode in ["latex", "pyplot"]: partret += f"\mathrm{{{char}}}" else: partret += char # signs should be superscript for sign in "+-•": # negative charges should be denoted by em dash and not a normal dash if mode in ["latex", "pyplot"]: partret = partret.replace(sign, f'^{sign.replace("-", "—")}') if mode == "html": partret = partret.replace(sign, f'<sup>{sign.replace("-", "—")}</sup>') if mode == "unicode": partret = partret.replace(sign, "⁺⁻•"["+-•".index(sign)]) # replace the part in the original string molstring = molstring.replace(part, partret) if mode == 'pyplot': return fr"${molstring}$" return molstring
if __name__ == "__main__": # print(molecule("F- + CH3Cl", "html")) # mol = plams.Molecule(r"D:\Users\Yuman\Desktop\PhD\TCutility\test\fixtures\chloromethane_sn2_ts\ts sn2.results\output.xyz") # print(molecule(mol)) print(molecule('NMe2*', mode='latex'))