import pathlib as pl
from typing import Dict, List, Union
from scm import plams
from tcutility import ensure_list
from tcutility.results import result
from tcutility.data import atom
[docs]
def number_of_electrons(mol: plams.Molecule) -> int:
nel = 0
for at in mol:
nel += atom.atom_number(at.symbol)
return nel
[docs]
def parse_str(s: str):
# checks if string should be an int, float, bool or string
# sanitization first
# empty s returns None
# if s is not a str return it
if s == "":
return None
if not isinstance(s, str):
return s
if "," in s:
return [parse_str(part.strip()) for part in s.split(",")]
# to parse the string we use try/except method
try:
return int(s)
except ValueError:
pass
try:
return float(s)
except ValueError:
pass
# bool type casting always works, so we have to specifically check for correct strings
if s in ["True", "False"]:
return bool(s)
return s
[docs]
def load(path) -> plams.Molecule:
"""
Load a molecule from a given xyz file path.
The xyz file is structured as follows:
.. code-block::
[int]
Comment line
[str] [float] [float] [float] atom_tag1 atom_tag2 atom_key1=...
[str] [float] [float] [float] atom_tag1 atom_tag2 atom_key1=...
[str] [float] [float] [float]
mol_tag1
mol_tag2
mol_key1=...
mol_key2 = ...
The xyz file is parsed and returned as a :class:`plams.Molecule <plams.mol.molecule.Molecule>` object.
Flags and tags are given as ``mol.flags`` and ``mol.flags.tags`` respectively.
Similarly for the atoms, the flags and tags are given as ``mol.atoms[i].flags`` and ``mol.atoms[i].flags.tags``
"""
def parse_flags(args):
ret = result.Result()
ret.tags = set()
for arg in args:
# flags are given as key=value pairs
# tags are given as loose keys
if "=" in arg:
key, value = arg.split("=")
ret[key.strip()] = parse_str(value.strip())
else:
ret.tags.add(parse_str(arg.strip()))
return ret
with open(path) as f:
lines = [line.strip() for line in f.readlines()]
mol = plams.Molecule()
natoms = int(lines[0])
mol.comment = lines[1]
# not all lines in the file will define atoms. Lines after line natoms+2 will be used for flags
atom_lines = lines[2 : natoms + 2]
for line in atom_lines:
# parse every atom first
symbol, x, y, z, *args = line.split()
at = plams.Atom(symbol=symbol, coords=(float(x), float(y), float(z)))
at.flags = parse_flags(args)
mol.add_atom(at)
# after the atoms we parse the flags for the molecule
flag_lines = lines[natoms + 2 :]
flag_lines = [line.strip() for line in flag_lines if line.strip()]
mol.flags = parse_flags(flag_lines)
return mol
[docs]
def guess_fragments(mol: plams.Molecule) -> Dict[str, plams.Molecule]:
"""
Guess fragments based on data from the xyz file. Two methods are currently supported, see the tabs below.
We also support reading of charges and spin-polarizations for the fragments.
They should be given as ``charge_{fragment_name}`` and ``spinpol_{fragment_name}`` respectively.
.. tabs::
.. group-tab:: First method
.. code-block::
8
N 0.00000000 0.00000000 -0.81474153
B -0.00000000 -0.00000000 0.83567034
H 0.47608351 -0.82460084 -1.14410295
H 0.47608351 0.82460084 -1.14410295
H -0.95216703 0.00000000 -1.14410295
H -0.58149793 1.00718395 1.13712667
H -0.58149793 -1.00718395 1.13712667
H 1.16299585 -0.00000000 1.13712667
frag_Donor = 1, 3-5
frag_Acceptor = 2, 6-8
charge_Donor = -1
spinpol_Acceptor = 2
In this case, fragment atom indices must be provided below the coordinates.
The fragment name must be prefixed with ``frag_``. Indices can be given as integers or as ranges using ``-``.
.. group-tab:: Second method
.. code-block::
8
N 0.00000000 0.00000000 -0.81474153 frag=Donor
B -0.00000000 -0.00000000 0.83567034 frag=Acceptor
H 0.47608351 -0.82460084 -1.14410295 frag=Donor
H 0.47608351 0.82460084 -1.14410295 frag=Donor
H -0.95216703 0.00000000 -1.14410295 frag=Donor
H -0.58149793 1.00718395 1.13712667 frag=Acceptor
H -0.58149793 -1.00718395 1.13712667 frag=Acceptor
H 1.16299585 -0.00000000 1.13712667 frag=Acceptor
charge_Donor = -1
spinpol_Acceptor = 2
In this case, fragment atoms are marked with the `frag` flag which gives the name of the fragment the atom belongs to.
Args:
mol: the molecule that is to be split into fragments. It should have defined either method shown above.
If it does not define these methods this function returns ``None``.
Returns:
A dictionary containing fragment names as keys and :class:`plams.Molecule <scm.plams.mol.molecule.Molecule>` objects as values.
Atoms that were not included by either method will be placed in the molecule object with key ``None``.
"""
# first method, check if the fragments are defined as molecule flags
fragment_flags = [flag for flag in mol.flags if flag.startswith("frag_")]
if len(fragment_flags) > 0:
# we split here to get of the frag_ prefix
fragment_mols = {frag.split("_", 1)[1]: plams.Molecule() for frag in fragment_flags}
for frag in fragment_flags:
frag_name = frag.split("_", 1)[1]
indices = []
index_line = ensure_list(mol.flags[frag])
for indx in index_line:
if isinstance(indx, int):
indices.append(indx)
elif isinstance(indx, str) and "-" in indx:
indices.extend(range(int(indx.split("-")[0]), int(indx.split("-")[1]) + 1))
else:
raise ValueError(f"Fragment index {indx} could not be parsed.")
[fragment_mols[frag_name].add_atom(mol[i]) for i in indices]
fragment_mols[frag_name].flags = {"tags": set()}
if f"charge_{frag_name}" in mol.flags:
fragment_mols[frag_name].flags["charge"] = mol.flags[f"charge_{frag_name}"]
if f"spinpol_{frag_name}" in mol.flags:
fragment_mols[frag_name].flags["spinpol"] = mol.flags[f"spinpol_{frag_name}"]
return result.Result(fragment_mols)
# second method, check if the atoms have a frag= flag defined
fragment_names = set(at.flags.get("frag") for at in mol)
if len(fragment_names) > 0:
fragment_mols = {name: plams.Molecule() for name in fragment_names}
for at in mol:
# get the fragment the atom belongs to and add it to the list
fragment_mols[at.flags.get("frag")].add_atom(at)
for frag in fragment_names:
fragment_mols[frag].flags = {"tags": set()}
if f"charge_{frag}" in mol.flags:
fragment_mols[frag].flags["charge"] = mol.flags[f"charge_{frag}"]
if f"spinpol_{frag}" in mol.flags:
fragment_mols[frag].flags["spinpol"] = mol.flags[f"spinpol_{frag}"]
return result.Result(fragment_mols)
# =============================================================================
# Writing Functions for Molecule objects ======================================
# =============================================================================
def _xyz_format(mol: plams.Molecule, include_n_atoms: bool = True) -> str:
"""Returns a string representation of a molecule in the xyz format, e.g.:
C 0.00000000 0.00000000 0.00000000
H 1.00000000 0.00000000 0.00000000
H 0.00000000 1.00000000 0.00000000
H 0.00000000 0.00000000 1.00000000
...
"""
n_atoms = len(mol.atoms)
if include_n_atoms:
return f"{n_atoms}\n" + "\n".join([f"{at.symbol:6s}{at.x:16.8f}{at.y:16.8f}{at.z:16.8f}" for at in mol.atoms])
return "\n".join([f"{at.symbol:6s}{at.x:16.8f}{at.y:16.8f}{at.z:16.8f}" for at in mol.atoms])
def _amv_format(mol: plams.Molecule, step: int, energy: Union[float, None] = None) -> str:
"""Returns a string representation of a molecule in the amv format, e.g.:
Geometry 1, Energy: -0.5 Ha
C 0.00000000 0.00000000 0.00000000
H 1.00000000 0.00000000 0.00000000
H 0.00000000 1.00000000 0.00000000
H 0.00000000 0.00000000 1.00000000
...
If no energy is provided, the energy is not included in the string representation"""
if energy is None:
return f"Geometry {step}\n" + "\n".join([f"{at.symbol:6s}{at.x:16.8f}{at.y:16.8f}{at.z:16.8f}" for at in mol.atoms])
return f"Geometry {step}, Energy: {energy} Ha\n" + "\n".join([f"{at.symbol:6s}{at.x:16.8f}{at.y:16.8f}{at.z:16.8f}" for at in mol.atoms])
[docs]
def write_mol_to_xyz_file(mols: Union[List[plams.Molecule], plams.Molecule], filename: Union[str, pl.Path], include_n_atoms: bool = False) -> None:
"""Writes a list of molecules to a file in xyz format."""
mols = mols if isinstance(mols, list) else [mols]
out_file = pl.Path(f"{filename}.xyz")
[mol.delete_all_bonds() for mol in mols]
write_string = "\n\n".join([_xyz_format(mol, include_n_atoms) for mol in mols])
out_file.write_text(write_string)
return None
[docs]
def write_mol_to_amv_file(mols: Union[List[plams.Molecule], plams.Molecule], energies: Union[List[float], None], filename: Union[str, pl.Path]) -> None:
"""Writes a list of molecules to a file in amv format."""
mols = mols if isinstance(mols, list) else [mols]
out_file = pl.Path(f"{filename}.amv")
energies = energies if energies is not None else [0.0 for _ in mols]
[mol.delete_all_bonds() for mol in mols]
write_string = "\n\n".join([_amv_format(mol, step, energy) for step, (mol, energy) in enumerate(zip(mols, energies), 1)])
out_file.write_text(write_string)
return None
[docs]
def save(mol: plams.Molecule, path: str, comment: str = None):
"""Save a molecule in a custom xyz file format.
Molecule and atom flags can be provided as the "flags" parameter of the object (mol.flags and atom.flags).
"""
comment = comment or mol.comment if hasattr(mol, "comment") else ""
with open(path, "w+") as f:
f.write(f"{len(mol.atoms)}\n{comment}\n")
for at in mol.atoms:
flags_str = ""
flags = at.flags if hasattr(at, "flags") else {}
for key, value in flags.items():
if key == "tags":
if len(value) > 0:
flags_str += " ".join([str(x) for x in value]) + " "
else:
continue
else:
flags_str += f"{key}={value} "
f.write(f"{at.symbol}\t{at.coords[0]: .10f}\t{at.coords[1]: .10f}\t{at.coords[2]: .10f}\t{flags_str}\n")
f.write("\n")
flags = mol.flags if hasattr(mol, "flags") else {}
for key, value in flags.items():
if key == "tags":
f.write("\n".join([str(x) for x in value]) + "\n")
else:
f.write(f"{key} = {value}\n")
if __name__ == "__main__":
# mol = load(r"D:\Users\Yuman\Desktop\PhD\ychem\calculations2\5ef3a511141a993d83552515df6bf882a7560dd806f88df4e2c2887b4d2a9595\transitionstate\input_mol.xyz")
mol = load("../../examples/job/NH3BH3.xyz")
frags = guess_fragments(mol)
print(frags)