Source code for tcutility.formula
from typing import Union
from scm import plams
[docs]
def parse_molecule(molecule: plams.Molecule) -> str:
"""
Analyse a molecule and return the molstring describing its parts. Each part will then be separated by a ``+`` sign in the new string.
Args:
molecule: ``plams.Molecule`` object to be parsed.
Returns:
A string that contains each part of the molecule separated by a ``+`` sign, for use in :func:`molecule` function for further formatting.
"""
# to separate a molecule we need to have bonds
molecule.guess_bonds()
parts = []
# go through each part of the molecule
for part in molecule.separate():
# get a dictionary of counts for each element
form = part.get_formula(True)
# add all elements with their number, but only if the number is larger than 1.
# this prevents the creation of strings containing e.g. C1H3Cl2
parts.append("".join([sym + (str(num) if num > 1 else "") for sym, num in form.items()]))
return " + ".join(parts)
[docs]
def molecule(molecule: Union[str, plams.Molecule], mode: str = "unicode") -> str:
"""
Parse and return a string containing a molecular formula that will show up properly in LaTeX, HTML or unicode.
Args:
molecule: ``plams.Molecule`` object or a string that contains the molecular formula to be parsed.
It can be either single molecule or a reaction. Molecules should be separated by ``+`` or ``->``.
mode: the formatter to convert the string to. Should be ``unicode``, ``html``, ``latex``, ``pyplot``.
Returns:
A string that is formatted to be rendered nicely in either HTML or LaTeX.
In the returned strings any numbers will be subscripted and ``+``, ``-``, ``*`` and ``•`` will be superscripted.
For ``latex`` and ``pyplot`` modes we apply ``\\mathrm`` to letters.
Examples:
>>> molecule('C9H18NO*')
'C₉H₁₈NO•'
>>> molecule('C2H2 + CH3* -> C2H2CH3', mode='html')
'C<sub>2</sub>H<sub>2</sub> + CH<sub>3</sub><sup>•</sup> -> C<sub>2</sub>H<sub>2</sub>CH3'
.. seealso::
The :func:`parse_molecule` function is used to convert ``plams.Molecule`` objects to a molecular formula.
"""
# to take care of plus-signs used to denote reactions we have to first split
# the molstring into its parts.
if isinstance(molecule, plams.Molecule):
molstring = parse_molecule(molecule)
else:
molstring = molecule
molstring = molstring.replace("*", "•")
for part in molstring.split():
# if part is only a plus-sign we skip this part. This is only true when the plus-sign
# is used to denote a reaction
if part in ["+", "->"]:
continue
# parse the part
partret = part
# numbers should be subscript
for num in "0123456789":
if mode in ["latex", "pyplot"]:
partret = partret.replace(num, f"_{num}")
if mode == "html":
partret = partret.replace(num, f"<sub>{num}</sub>")
if mode == "unicode":
partret = partret.replace(num, "₀₁₂₃₄₅₆₇₈₉"[int(num)])
partret_ = partret
partret = ''
for char in partret_:
if char.isalpha() and mode in ["latex", "pyplot"]:
partret += f"\mathrm{{{char}}}"
else:
partret += char
# signs should be superscript
for sign in "+-•":
# negative charges should be denoted by em dash and not a normal dash
if mode in ["latex", "pyplot"]:
partret = partret.replace(sign, f'^{sign.replace("-", "—")}')
if mode == "html":
partret = partret.replace(sign, f'<sup>{sign.replace("-", "—")}</sup>')
if mode == "unicode":
partret = partret.replace(sign, "⁺⁻•"["+-•".index(sign)])
# replace the part in the original string
molstring = molstring.replace(part, partret)
if mode == 'pyplot':
return fr"${molstring}$"
return molstring
if __name__ == "__main__":
# print(molecule("F- + CH3Cl", "html"))
# mol = plams.Molecule(r"D:\Users\Yuman\Desktop\PhD\TCutility\test\fixtures\chloromethane_sn2_ts\ts sn2.results\output.xyz")
# print(molecule(mol))
print(molecule('NMe2*', mode='latex'))