Source code for tcutility.report.report

import os
import pathlib as pl
import uuid
from importlib.util import find_spec
from typing import TYPE_CHECKING, List, Tuple, Union

from tcutility import errors

if find_spec("docx") is None:
    raise errors.MissingOptionalPackageError("docx")
if find_spec("htmldocx") is None:
    raise errors.MissingOptionalPackageError("htmldocx")
if find_spec("cv2") is None:
    raise errors.MissingOptionalPackageError("opencv-python")


import cv2  # noqa: E402 # This is the opencv-python package
import docx
import htmldocx
import numpy as np
import PIL
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont
from docx.enum.table import WD_ALIGN_VERTICAL, WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement, parse_xml
from docx.oxml.ns import nsdecls, qn
from docx.shared import Cm, Inches, Pt
from htmldocx import HtmlToDocx

from tcutility.report.formatters.generic import WordFormatter
from tcutility.report.formatters.xyz import StandardXYZFormatter
from tcutility.results.read import read
from tcutility.results.result import Result

if TYPE_CHECKING:
    import docx.document
    import docx.oxml.ns
    import docx.oxml.shared


def _add_page_numbers(document: "docx.document.Document") -> None:
    document.sections[0].footer.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = document.sections[0].footer.paragraphs[0].add_run()
    fldChar1 = OxmlElement("w:fldChar")
    fldChar1.set(qn("w:fldCharType"), "begin")

    instrText = OxmlElement("w:instrText")
    instrText.set(qn("xml:space"), "preserve")
    instrText.text = "PAGE"  # type: ignore  # text not found in docx by mypy

    fldChar2 = OxmlElement("w:fldChar")
    fldChar2.set(qn("w:fldCharType"), "end")

    run._r.append(fldChar1)
    run._r.append(instrText)
    run._r.append(fldChar2)


def _set_cell_border(cell, **kwargs):
    """
    Set a cell`s border
    Usage:

    _set_cell_border(
        cell,
        top={"sz": 12, "val": "single", "color": "#FF0000", "space": "0"},
        bottom={"sz": 12, "color": "#00FF00", "val": "single"},
        start={"sz": 24, "val": "dashed", "shadow": "true"},
        end={"sz": 12, "val": "dashed"},
    )
    """
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()

    # check for tag existnace, if none found, then create one
    tcBorders = tcPr.first_child_found_in("w:tcBorders")
    if tcBorders is None:
        tcBorders = OxmlElement("w:tcBorders")
        tcPr.append(tcBorders)

    # list over all available tags
    for edge in ("start", "top", "end", "bottom", "insideH", "insideV"):
        edge_data = kwargs.get(edge)
        if edge_data:
            tag = "w:{}".format(edge)

            # check for tag existnace, if none found, then create one
            element = tcBorders.find(qn(tag))  # type: ignore
            if element is None:
                element = OxmlElement(tag)
                tcBorders.append(element)

            # looks like order of attributes is important
            for key in ["sz", "val", "color", "space", "shadow"]:
                if key in edge_data:
                    element.set(qn("w:{}".format(key)), str(edge_data[key]))


def _set_repeat_table_header(row):
    """set repeat table row on every new page"""
    tr = row._tr
    trPr = tr.get_or_add_trPr()
    tblHeader = OxmlElement("w:tblHeader")
    tblHeader.set(qn("w:val"), "true")
    trPr.append(tblHeader)
    return row


def _set_cell_color(cell, color):
    shading_elm_1 = parse_xml(rf'<w:shd {nsdecls("w")} w:fill="{color}"/>')
    cell._tc.get_or_add_tcPr().append(shading_elm_1)


table_formatting = Result()
table_formatting.table.alignment = WD_TABLE_ALIGNMENT.CENTER  # type: ignore  # WD_TABLE_ALIGNMENT not found in docx by mypy
table_formatting.font.size = Pt(10.5)  # type: ignore  # Pt not found in docx by mypy


[docs] class DocxFigure: def __init__(self, doc: docx.document.Document, width=None, height=None): self.caption = "" self.doc = doc self.width = width or Cm(17.8) self.height = height self.figures = [] self.html_parser = htmldocx.HtmlToDocx() def __enter__(self): return self def __exit__(self, *args): self.write()
[docs] def add_image(self, path: str, row: Union[int, Tuple[int]], col: Union[int, Tuple[int], None] = None, label: str = ""): img = cv2.imread(path) self.figures.append( { "path": path, "img": img, # 'col': (col, col) if isinstance(col, int) else col, # 'row': (row, row) if isinstance(row, int) else row, "col": col, "row": row, "label": label, } )
[docs] def write(self): new_img = self._stitch_images() new_img_path = str(uuid.uuid4()) + ".png" cv2.imwrite(new_img_path, new_img) p = self.doc.add_paragraph() r = p.add_run() r.add_picture(new_img_path, width=self.width, height=self.height) self._write_caption() os.remove(new_img_path)
def _stitch_images(self): grid_width, grid_height = max(fig["col"] for fig in self.figures) + 1, max(fig["row"] for fig in self.figures) + 1 col_widths = [max(fig["img"].shape[1] for fig in self.figures if fig["col"] == index) for index in range(grid_width)] dpi = sum(col_widths) / (self.width / 360000 * 0.393700787) letter_extra_rows = int(dpi * 12 / 72) + 1 row_heights = [max(fig["img"].shape[0] + letter_extra_rows for fig in self.figures if fig["row"] == index) for index in range(grid_height)] for fig in self.figures: height = row_heights[fig["row"]] width = col_widths[fig["col"]] fig["img"] = cv2.copyMakeBorder(fig["img"], height - fig["img"].shape[0], 0, 0, width - fig["img"].shape[1], cv2.BORDER_CONSTANT, value=[255, 255, 255]) font_path = "Times New Roman" font_size = letter_extra_rows font = PIL.ImageFont.truetype(font_path, font_size) image_pil = PIL.Image.fromarray(fig["img"]) # Convert OpenCV image to PIL image draw = PIL.ImageDraw.Draw(image_pil) if fig["label"]: draw.text((0, 0), fig["label"], font=font, fill=(0, 0, 0)) fig["img"] = np.array(image_pil) # Convert PIL image back to OpenCV image new_img = np.zeros((sum(row_heights), sum(col_widths), 3)) print(self.width / 360000 * 0.393700787, Inches(self.width), sum(col_widths), dpi) for fig in self.figures: start_pos = sum(row_heights[: fig["row"]]), sum(col_widths[: fig["col"]]) end_pos = sum(row_heights[: fig["row"] + 1]), sum(col_widths[: fig["col"] + 1]) new_img[start_pos[0] : end_pos[0], start_pos[1] : end_pos[1], :] = fig["img"] return new_img def _write_caption(self): paragraph = self.doc.add_paragraph("Figure S") paragraph.runs[-1].bold = True # numbering field run = paragraph.add_run("") run.bold = True fldChar = OxmlElement("w:fldChar") fldChar.set(qn("w:fldCharType"), "begin") run._r.append(fldChar) instrText = OxmlElement("w:instrText") instrText.text = " SEQ Figure \\* ARABIC" # type: ignore # text not found in docx by mypy run._r.append(instrText) fldChar = OxmlElement("w:fldChar") fldChar.set(qn("w:fldCharType"), "end") run._r.append(fldChar) html_doc = self.html_parser.parse_html_string(f"<b>.</b> {self.caption}") for add_run in html_doc.paragraphs[-1].runs: run = paragraph.add_run(add_run.text) run.bold = add_run.bold run.italic = add_run.italic run.underline = add_run.underline # type: ignore # underline not found in docx by mypy run.font.superscript = add_run.font.superscript run.font.subscript = add_run.font.subscript run.font.strike = add_run.font.strike run.font.size = add_run.font.size
[docs] class DocxTable: def __init__(self, file: Union[str, pl.Path, docx.document.Document] = "test.docx"): self.file = file self.dont_save = False if isinstance(file, docx.document.Document): self.dont_save = True self.doc = file else: if not os.path.exists(file): self.doc = docx.Document() else: self.doc = docx.Document(str(file)) self.caption = "" self.columns = [] self.column_options = [] self.footnote = None self.rows = [] self.mergers = [] self.html_parser = htmldocx.HtmlToDocx() def __enter__(self): return self def __exit__(self, *args): self.write()
[docs] def add_column(self, name, **kwargs): self.columns.append(["single", name, kwargs]) self.column_options.append(kwargs)
[docs] def add_column_group(self, group_name, column_names, **kwargs): self.columns.append(["grouped", group_name, column_names]) [self.column_options.append(kwargs) for _ in column_names]
[docs] def add_row(self, data): self.rows.append(["data", data])
[docs] def add_header_row(self, name): self.rows.append(["header", name])
[docs] def add_empty_row(self): self.rows.append(["empty"])
[docs] def merge_cells(self, x, y): if isinstance(x, int): x = (x, x) if isinstance(y, int): y = (y, y) self.mergers.append([x, y])
def _correct_table_dims(self): num_cols = 2 * len([col for col in self.columns if col[0] == "single"]) + sum([len(col[2]) + 1 for col in self.columns if col[0] == "grouped"]) - 1 num_rows = 2 + len(self.rows) for _ in range(num_cols - len(self.tab.columns)): self.tab.add_column(Cm(0.1)) for _ in range(num_rows - len(self.tab.rows)): self.tab.add_row() @property def shape(self): Ncols = 0 for col in self.columns: if col[0] == "single": Ncols += 2 if col[0] == "grouped": Ncols += len(col[2]) + 1 return 2 + len(self.rows), Ncols - 1
[docs] def write(self): self._write_caption() # write the table caption self.doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.JUSTIFY self.tab = self.doc.add_table(1, 1) self._correct_table_dims() for x, y in self.mergers: self.tab.cell(x[0], y[0]).merge(self.tab.cell(x[1], y[1])) num_cols = 2 * len([col for col in self.columns if col[0] == "single"]) + sum([len(col[2]) + 1 for col in self.columns if col[0] == "grouped"]) - 1 # create a table self.tab.alignment = table_formatting.table.alignment # type: ignore # Results object does not have typing # write the column headers spacing_columns = [] col_idx = 0 for col in self.columns: if col[0] == "single": self.write_cell(1, col_idx, col[1], bold=True, font_size=table_formatting.font.size) # type: ignore # results object does not have typing spacing_columns.append(col_idx + 1) col_idx += 2 if col[0] == "grouped": _ = self.write_cell(0, (col_idx, col_idx + len(col[2]) - 1), col[1], bold=True, bottom={"sz": 12, "val": "single", "color": "#000000"}, font_size=table_formatting.font.size) # type: ignore # results object does not have typing for i, val in enumerate(col[2]): self.write_cell(1, col_idx + i, val, bold=True, font_size=table_formatting.font.size) # type: ignore # results object does not have typing spacing_columns.append(col_idx + len(col[2])) col_idx += len(col[2]) + 1 # set the lines for the top and bottom header rows for i in range(num_cols): _set_cell_border(self.tab.cell(0, i), top={"sz": 12, "val": "single", "color": "#000000"}) _set_cell_border(self.tab.cell(1, i), bottom={"sz": 12, "val": "single", "color": "#000000"}) for j, row in enumerate(self.rows): if row[0] == "data": for i in range(num_cols): if i in spacing_columns: continue num_spacing_past = len([k for k in spacing_columns if (k - 1) < i]) self.write_cell(j + 2, i, row[1][i - num_spacing_past], font_size=table_formatting.font.size, **self.column_options[i - num_spacing_past]) # type: ignore # results object does not have typing if row[0] == "header": _ = self.write_cell( j + 2, (0, num_cols - 1), row[1], bold=True, top={"sz": 12, "val": "single", "color": "#000000"}, bottom={"sz": 12, "val": "single", "color": "#000000"}, bkgr_color="F2F2F2", font_size=table_formatting.font.size, # type: ignore # font not found in docx by mypy ) if row[0] == "empty": for i in range(num_cols): self.write_cell(j + 2, i, "") _set_repeat_table_header(self.tab.rows[0]) _set_repeat_table_header(self.tab.rows[1]) self._write_footnote() if not self.dont_save: self.doc.save(str(self.file))
[docs] def write_cell(self, row, col, text, alignment="center", vert_alignment="center", bold=None, italic=None, bkgr_color=None, font_size=None, **kwargs): if isinstance(row, int) and isinstance(col, int): cell = self.tab.cell(row, col) else: if isinstance(row, int): row = (row, row) if isinstance(col, int): col = (col, col) cell = self.tab.cell(row[0], col[0]).merge(self.tab.cell(row[1], col[1])) if len(cell.paragraphs[0].runs) > 0: return # cell.text = str(text).strip() alignment = { "center": WD_ALIGN_PARAGRAPH.CENTER, "left": WD_ALIGN_PARAGRAPH.LEFT, "right": WD_ALIGN_PARAGRAPH.RIGHT, }[alignment] vert_alignment = { "center": WD_ALIGN_VERTICAL.CENTER, "top": WD_ALIGN_VERTICAL.TOP, "bottom": WD_ALIGN_VERTICAL.BOTTOM, }[vert_alignment] # for txt, settings in parse_text(text): self.html_parser.add_html_to_cell(text.replace("-", "–"), cell) cell.paragraphs[0].alignment = alignment cell.vertical_alignment = vert_alignment for run in cell.paragraphs[0].runs: if bold: run.bold = bold if italic: run.italic = italic if font_size: run.font.size = font_size _set_cell_border(cell, **kwargs) if bkgr_color is not None: _set_cell_color(cell, bkgr_color) return cell
[docs] def add_footnote(self, text, bold=None, italic=None, font_size=None): self.footnote = (text, bold, italic, font_size)
def _write_footnote(self): if self.footnote is None: return # set the lower line for col in range(self.shape[1]): cell = self.tab.cell(self.shape[0] - 1, col) _set_cell_border(cell, bottom={"sz": 12, "val": "single", "color": "#000000"}) self.html_parser.add_html_to_document(self.footnote[0], self.doc) for run in self.doc.paragraphs[0].runs: if self.footnote[1]: run.bold = self.footnote[1] if self.footnote[2]: run.italic = self.footnote[2] if self.footnote[3]: run.font.size = self.footnote[3] def _write_caption(self): paragraph = self.doc.add_paragraph("Table S") paragraph.runs[-1].bold = True # numbering field run = paragraph.add_run("") run.bold = True fldChar = OxmlElement("w:fldChar") fldChar.set(qn("w:fldCharType"), "begin") run._r.append(fldChar) instrText = OxmlElement("w:instrText") instrText.text = " SEQ Table \\* ARABIC" # type: ignore # text not found in docx by mypy run._r.append(instrText) fldChar = OxmlElement("w:fldChar") fldChar.set(qn("w:fldCharType"), "end") run._r.append(fldChar) html_doc = self.html_parser.parse_html_string(f"<b>.</b> {self.caption}") for add_run in html_doc.paragraphs[-1].runs: run = paragraph.add_run(add_run.text) run.bold = add_run.bold run.italic = add_run.italic run.underline = add_run.underline # type: ignore # underline not found in docx by mypy run.font.superscript = add_run.font.superscript run.font.subscript = add_run.font.subscript run.font.strike = add_run.font.strike run.font.size = add_run.font.size
[docs] class SI: def __init__(self, path: Union[str, pl.Path], overwrite: bool = False, font: str = "Arial") -> None: """Initializes the SI class for creating supporting information (SI) files in Microsoft Word format. This class is responsible for creating and managing a Microsoft Word document that serves as supporting information (SI) for reports or publications. It allows for the addition of various elements such as text, headings, and formatted content from HTML. Args: path (str | pl.Path): The location of the Word file. Does not have to have a file-extension. overwrite (bool, optional): Whether to append to or overwrite the file. Defaults to False. font (str, optional): The font to be used in the document. Defaults to "Arial". Attributes: path (pl.Path): The path to the Word document. doc (docx.Document): The Word document object. """ self.path = pl.Path(path).with_suffix(".docx") self.doc = docx.Document() if not os.path.exists(self.path) or overwrite: self.doc = docx.Document() else: self.doc = docx.Document(str(self.path)) self.doc.styles["Normal"].font.name = "Times New Roman" # type: ignore # font not found in docx by mypy self.doc.styles["Normal"].font.size = Pt(12) # type: ignore # font not found in docx by mypy self.doc.styles["Normal"].paragraph_format.space_after = 1 # type: ignore # paragraph_format not found in docx by mypy self.html_parser = htmldocx.HtmlToDocx() # type: ignore # HtmlToDocx not found in docx by mypy # Set the font to the specified font self.doc.styles["Normal"].font.name = font # type: ignore # font not found in docx by mypy def __enter__(self): """Enables the use of the class as a context manager.""" return self def __exit__(self, *args): for section in self.doc.sections: # 1.9 cm section.left_margin = Cm(1.9) section.right_margin = Cm(1.9) _add_page_numbers(self.doc) self.doc.save(str(self.path))
[docs] def add_figure(self, *args, **kwargs) -> DocxFigure: return DocxFigure(self.doc, *args, **kwargs)
[docs] def add_table(self) -> DocxTable: return DocxTable(self.doc)
[docs] def add_xyz(self, obj: Union[str, Result, pl.Path], title: Union[str, None] = None, formatter: WordFormatter = StandardXYZFormatter()) -> None: """Adds XYZ formatted content to the document. This method is responsible for adding the coordinates and information about a calculation to the supporting information document. It includes details such as the electronic bond energy, Gibb's free energy, enthalpy, imaginary mode, and the coordinates of the molecule. Args: obj: A string specifying a calculation directory or a `TCutility.results.Result` object from a calculation. title: The title to be written before the coordinates and information. If None, no title is added. formatter: The formatter to be used for formatting the content. Defaults to `StandardXYZFormatter`. Returns: None """ ret_str = "" # Add the formatted content to the document if isinstance(obj, str) or isinstance(obj, pl.Path): calc_path = pl.Path(obj) if not calc_path.is_dir(): return # raise ValueError(f"Invalid calculation directory: {calc_path}") obj = read(obj) ret_str += formatter.format(obj, title=title) # print(ret_str) parser = HtmlToDocx() parser.add_html_to_document(ret_str, self.doc) return
[docs] def add_heading(self, text: str, level: int = 1) -> None: """Adds a heading to the document. This method allows for the addition of a heading to the Word document, with customizable text and level. Args: text (str): The text of the heading. level (int, optional): The level of the heading (determines the size and style). Defaults to 1. """ self.doc.add_heading(text, level)
[docs] def add_page_break(self) -> None: self.doc.add_page_break()
[docs] def add_toc(self) -> None: title = self.doc.add_paragraph() title_run = title.add_run("Contents") title_run.bold = True title_run.font.size = Pt(14) title.paragraph_format.space_after = Pt(10) # figures paragraph = self.doc.add_paragraph() paragraph.paragraph_format.space_after = Pt(10) run = paragraph.add_run() fldChar = OxmlElement("w:fldChar") fldChar.set(qn("w:fldCharType"), "begin") fldChar.set(qn("w:dirty"), "true") instrText = OxmlElement("w:instrText") instrText.set(qn("xml:space"), "preserve") instrText.text = 'TOC \\h \\z \\c "Figure"' # type: ignore # "Table" of list of table and "Figure" for list of figure fldChar2 = OxmlElement("w:fldChar") fldChar2.set(qn("w:fldCharType"), "separate") fldChar3 = OxmlElement("w:t") fldChar3.text = "Right-click to update field." # type: ignore # text not found in docx by mypy fldChar2.append(fldChar3) fldChar4 = OxmlElement("w:fldChar") fldChar4.set(qn("w:fldCharType"), "end") run._r.append(fldChar) run._r.append(instrText) run._r.append(fldChar2) run._r.append(fldChar4) # tables paragraph = self.doc.add_paragraph() paragraph.paragraph_format.space_after = Pt(10) run = paragraph.add_run() fldChar = OxmlElement("w:fldChar") fldChar.set(qn("w:fldCharType"), "begin") fldChar.set(qn("w:dirty"), "true") instrText = OxmlElement("w:instrText") instrText.set(qn("xml:space"), "preserve") instrText.text = 'TOC \\h \\z \\c "Table"' # type: ignore # "Table" of list of table and "Figure" for list of figure fldChar2 = OxmlElement("w:fldChar") fldChar2.set(qn("w:fldCharType"), "separate") fldChar3 = OxmlElement("w:t") fldChar3.text = "Right-click to update field." # type: ignore # text not found in docx by mypy fldChar2.append(fldChar3) fldChar4 = OxmlElement("w:fldChar") fldChar4.set(qn("w:fldCharType"), "end") run._r.append(fldChar) run._r.append(instrText) run._r.append(fldChar2) run._r.append(fldChar4)
[docs] def get_subdirs(root_folder: pl.Path) -> List[pl.Path]: """Iteratively searches through a folder and returns all the most nested subdirs.""" most_nested_subdirs = [] for root, dirs, files in os.walk(root_folder): # If 'dirs' is empty, it means 'root' contains no subdirectories, thus it is most nested. if not dirs: most_nested_subdirs.append(pl.Path(root)) return most_nested_subdirs
[docs] def replace_files_rkf_to_ams_rkf(root_folder: pl.Path) -> None: """Iteratively searches through a folder and replaces all files with the extension '.rkf' to '.ams.rkf', except if the file has 'adf.rkf' in the name.""" for root, dirs, files in os.walk(root_folder): for file in files: if file.endswith(".rkf") and "adf.rkf" not in file: new_name = file.replace(".rkf", ".ams.rkf") os.rename(pl.Path(root) / file, pl.Path(root) / new_name)
[docs] def main(): calc_dir = pl.Path("__file__").resolve().parents[0] / "test" / "fixtures" main_path = pl.Path("__file__").resolve().parents[0] / "examples" # all_subdirs = [folder for folder in calc_dir.iterdir() if folder.is_dir()] all_subdirs = get_subdirs(calc_dir) res_objects = [] for dir_ in all_subdirs: try: # Try to read the results of the calculation res_objects.append(read(dir_)) except Exception: pass with SI(main_path / "test", overwrite=False) as si: si.add_heading("SI project") for obj in res_objects: si.add_xyz(obj=obj)
if __name__ == "__main__": main()