Source code for tcutility.formula

from typing import Union
from scm import plams



[docs]
def parse_molecule(molecule: plams.Molecule) -> str:
    """
    Analyse a molecule and return the molstring describing its parts. Each part will then be separated by a ``+`` sign in the new string.

    Args:
        molecule: ``plams.Molecule`` object to be parsed.

    Returns:
        A string that contains each part of the molecule separated by a ``+`` sign, for use in :func:`molecule` function for further formatting.
    """
    # to separate a molecule we need to have bonds
    molecule.guess_bonds()
    parts = []
    # go through each part of the molecule
    for part in molecule.separate():
        # get a dictionary of counts for each element
        form = part.get_formula(True)
        # add all elements with their number, but only if the number is larger than 1.
        # this prevents the creation of strings containing e.g. C1H3Cl2
        parts.append("".join([sym + (str(num) if num > 1 else "") for sym, num in form.items()]))

    return " + ".join(parts)




[docs]
def molecule(molecule: Union[str, plams.Molecule], mode: str = "unicode") -> str:
    """
    Parse and return a string containing a molecular formula that will show up properly in LaTeX, HTML or unicode.

    Args:
        molecule: ``plams.Molecule`` object or a string that contains the molecular formula to be parsed. 
            It can be either single molecule or a reaction. Molecules should be separated by ``+`` or ``->``.
        mode: the formatter to convert the string to. Should be ``unicode``, ``html``, ``latex``, ``pyplot``.

    Returns:
        A string that is formatted to be rendered nicely in either HTML or LaTeX. 
        In the returned strings any numbers will be subscripted and ``+``, ``-``, ``*`` and ``•`` will be superscripted. 
        For ``latex`` and ``pyplot`` modes we apply ``\\mathrm`` to letters.

    Examples:
        >>> molecule('C9H18NO*')
        'C₉H₁₈NO•'

        >>> molecule('C2H2 + CH3* -> C2H2CH3', mode='html')
        'C<sub>2</sub>H<sub>2</sub> + CH<sub>3</sub><sup>•</sup> -> C<sub>2</sub>H<sub>2</sub>CH3'

    .. seealso::
        The :func:`parse_molecule` function is used to convert ``plams.Molecule`` objects to a molecular formula.

    """
    # to take care of plus-signs used to denote reactions we have to first split
    # the molstring into its parts.
    if isinstance(molecule, plams.Molecule):
        molstring = parse_molecule(molecule)
    else:
        molstring = molecule

    molstring = molstring.replace("*", "•")
    for part in molstring.split():
        # if part is only a plus-sign we skip this part. This is only true when the plus-sign
        # is used to denote a reaction
        if part in ["+", "->"]:
            continue

        # parse the part
        partret = part
        # numbers should be subscript
        for num in "0123456789":
            if mode in ["latex", "pyplot"]:
                partret = partret.replace(num, f"_{num}")
            if mode == "html":
                partret = partret.replace(num, f"<sub>{num}</sub>")
            if mode == "unicode":
                partret = partret.replace(num, "₀₁₂₃₄₅₆₇₈₉"[int(num)])

        partret_ = partret
        partret = ''
        for char in partret_:
            if char.isalpha() and mode in ["latex", "pyplot"]:
                partret += f"\mathrm{{{char}}}"
            else:
                partret += char

        # signs should be superscript
        for sign in "+-•":
            # negative charges should be denoted by em dash and not a normal dash
            if mode in ["latex", "pyplot"]:
                partret = partret.replace(sign, f'^{sign.replace("-", "—")}')
            if mode == "html":
                partret = partret.replace(sign, f'<sup>{sign.replace("-", "—")}</sup>')
            if mode == "unicode":
                partret = partret.replace(sign, "⁺⁻•"["+-•".index(sign)])
        # replace the part in the original string
        molstring = molstring.replace(part, partret)

    if mode == 'pyplot':
        return fr"${molstring}$"

    return molstring



if __name__ == "__main__":
    # print(molecule("F- + CH3Cl", "html"))
    # mol = plams.Molecule(r"D:\Users\Yuman\Desktop\PhD\TCutility\test\fixtures\chloromethane_sn2_ts\ts sn2.results\output.xyz")
    # print(molecule(mol))

    print(molecule('NMe2*', mode='latex'))