Source code for tcmu.cite

import json
import os

from tcmu import environment, spell_check
from tcmu.cache import cache_file
import numpy as np

__all__ = ["cite", "_get_doi_data", "_get_doi_data_from_title", "_get_doi_data_from_query", "_get_publisher_city", "_get_journal_abbreviation"]


@environment.requires_optional_package("requests")
@cache_file('tcmu_doi_data')
def _get_doi_data(doi: str) -> dict:
    """
    Get information about an article using the crossref.org API.

    Args:
            doi: the DOI to get information about.
    """
    import requests

    data = requests.get(f"http://api.crossref.org/works/{doi}").text
    if data == "Resource not found.":
        raise ValueError(f"Could not find DOI {doi}.")
    data = json.loads(data)
    return data


@environment.requires_optional_package("requests")
@cache_file('tcmu_doi_data')
def _get_doi_data_from_title(title: str):
    import requests

    citedby = requests.get(f"http://api.crossref.org/works?query.title={title}").text
    citedby = json.loads(citedby)["message"]["items"]
    citedby = [row for row in citedby if row['type'] == 'journal-article']
    nearnesses = []
    for row in citedby:
        title = ''.join(row['title'])
        nearness = spell_check.wagner_fischer(title, title)
        nearness = nearness / len(title)
        nearnesses.append(nearness)

    if len(nearnesses) == 0:
        return

    nearest_idx = np.argmin(nearnesses)
    return citedby[nearest_idx]


@environment.requires_optional_package("requests")
@cache_file('tcmu_doi_data')
def _get_doi_data_from_query(**queries):
    import requests

    valid_queries = [
        "affiliation",
        "degree",
        "event-acronym",
        "bibliographic",
        "container-title",
        "publisher-name",
        "author",
        "event-theme",
        "standards-body-acronym",
        "chair",
        "event-location",
        "translator",
        "funder-name",
        "event-name",
        "publisher-location",
        "title",
        "standards-body-name",
        "contributor",
        "description",
        "editor",
        "event-sponsor"
        ]

    query_strings = []
    for query, value in queries.items():
        if value is None:
            continue

        query = query.replace('_', '-')

        if query not in valid_queries:
            raise ValueError(f'Query "{query}" is not a valid query!')
        if isinstance(value, str):
            query_strings.append(f'query.{query}={value.replace("&", "")}')
        else:
            for v in value:
                query_strings.append(f'query.{query}={v.replace("&", "")}')

    query_string = '&'.join(query_strings)
    citedby = requests.get(f"http://api.crossref.org/works?{query_string}").text
    # print(f"http://api.crossref.org/works?{query_string}")
    citedby = json.loads(citedby)["message"]["items"]
    citedby = [row for row in citedby if row['type'] == 'journal-article']
    nearnesses = []
    for row in citedby:
        title = ''.join(row['title'])
        nearness = spell_check.wagner_fischer(title, title)
        nearness = nearness / len(title)
        nearnesses.append(nearness)

    if len(nearnesses) == 0:
        return

    nearest_idx = np.argmin(nearnesses)
    return citedby[nearest_idx]

# data = _get_doi_data_from_query(title="AGMFNet: Attention-guided multi-scale feature fusion network for infrared small target detection",
#     author=("He", "Liu", "Yang", "Yuan"), publisher_name="Elsevier")
# print(data)

# exit()

@environment.requires_optional_package("requests")
@cache_file('tcmu_journal_abbrvs')
def _get_journal_abbreviation(journal: str) -> str:
    """
    Get the journal name abbreviation using the abbreviso API.

    Args:
            journal: the name of the journal to get the abbreviation of.
    """
    import requests

    return requests.get(f"https://abbreviso.toolforge.org/a/{journal}").text.replace('amp;', '&')


@cache_file('tcmu_publisher_city')
def _get_publisher_city(publisher: str) -> str:
    """
    Get the city of a publisher.
    """
    with open(os.path.join(os.path.split(__file__)[0], "data", "cite", "_publisher_cities.json")) as cities:
        cities = json.loads(cities.read())

    return cities.get(publisher)


[docs] @cache_file('tcmu_citation') def cite(doi: str, style: str = "wiley", mode="html") -> str: """ Format an article in a certain style. Args: doi: the article DOI to generate a citation for. style: the style formatting to use. Can be ``['wiley', 'acs', 'rsc', 'jcc']``. mode: the formatting mode. Can be ``['html', 'latex', 'plain']``. """ # check if the style was correctly given spell_check.check(style, ["wiley", "acs", "rsc", "jcc"]) spell_check.check(mode, ["html", "latex", "plain"]) # get the information about the DOI data = _get_doi_data(doi) citation = "" if data["message"]["type"] == "journal-article": citation = _format_article(data, style) if data["message"]["type"] == "book-chapter": citation = _format_book_chapter(data, style) if data["message"]["type"] == "monograph" or (data["message"]["type"] == "other" and "ISBN" in data["message"]): citation = _format_book(data, style) if mode == "plain": citation = citation.replace("<i>", "") citation = citation.replace("</i>", "") citation = citation.replace("<b>", "") citation = citation.replace("</b>", "") if mode == "latex": citation = citation.replace("<i>", r"\textit{") citation = citation.replace("</i>", "}") citation = citation.replace("<b>", r"\textbf{") citation = citation.replace("</b>", "}") return citation
def get_pages(data): pages = None url = data["message"]["URL"] if "ceur." in url: pages = "e" + url.split("ceur.")[-1] elif "page" in data["message"]: pages = data["message"]["page"].replace("-", "–") elif "article-number" in data["message"]: pages = data["message"]["article-number"] return pages def is_accepted(data): for assertion in data["message"].get("assertion", []): if assertion["name"] == "accepted": return True return False def _format_article(data: dict, style: str) -> str: # grab usefull data journal = data["message"]["container-title"][0] if journal == 'physica status solidi (a)': journal = 'Physica Status Solidi A' journal_abbreviation = _get_journal_abbreviation(journal) year = data["message"]["issued"]["date-parts"][0][0] volume = data["message"].get("volume", "") pages = get_pages(data) title = data["message"]["title"][0] doi = data["message"]["DOI"] # accepted = is_accepted(data) # not using this one yet citation = "" # Get the initials from the author given names # also store the family names initials = [] last_names = [] for author in data["message"]["author"]: # we get the capital letters from the first names # these will become the initials for this author firsts = [char + "." for char in author["given"].title() if char.isupper()] firsts = " ".join(firsts) initials.append(firsts) last_names.append(author["family"].title()) # format the citation correctly if style == "wiley": names = [f"{first} {last}" for first, last in zip(initials, last_names)] citation = f"{', '.join(names)}, <i>{journal_abbreviation}</i> <b>{year}</b>, <i>{volume}</i>" if pages: citation += f", {pages}" citation += "." elif style == "acs": names = [f"{last}, {first}" for first, last in zip(initials, last_names)] citation = f"{'; '.join(names)} {title} <i>{journal_abbreviation}</i> <b>{year}</b>, <i>{volume}</i>" if pages: citation += f", {pages}" citation += f". DOI: {doi}" elif style == "rsc": names = [f"{first} {last}" for first, last in zip(initials, last_names)] citation = f"{', '.join(names)}, <i>{journal_abbreviation}</i> {year}, <b>{volume}</b>" if pages: citation += f", {pages}" citation += "." elif style == "jcc": names = [f"{first} {last}" for first, last in zip(initials, last_names)] if len(names) == 1: names = names[0] elif len(names) == 2: names = f'{names[0]} and {names[1]}' elif len(names) < 7: names = ', '.join(names[:-1]) + f', and {names[-1]}' else: names = ', '.join(names[:3]) + ', et al.' citation = f"{names}, \"{title},\" <i>{journal}</i> {volume} ({year})" if pages: citation += f": {pages}" citation += f", <a href=https://doi.org/{doi}>https://doi.org/{doi}</a>." return citation def _format_book_chapter(data: dict, style: str) -> str: # grab usefull data publisher = data["message"]["publisher"] if "published-print" in data["message"]: year = data["message"]["published-print"]["date-parts"][0][0] else: year = data["message"]["published"]["date-parts"][0][0] pages = data["message"].get("page") book_title = data["message"]["container-title"][0] chapter_title = data["message"]["title"][0] # city = _get_publisher_city(publisher) citation = "" original_book_data = None for isbn in data["message"]["isbn-type"]: if isbn["type"] == "electronic": try: original_book_data = _get_doi_data(f"{data['message']['prefix']}/{isbn['value']}") except: pass break # Get the initials from the author given names # also store the family names n_authors = len(data["message"]["author"]) initials = [] last_names = [] for author in data["message"]["author"]: # we get the capital letters from the first names # these will become the initials for this author firsts = [char + "." for char in author["given"].title() if char.isupper()] firsts = " ".join(firsts) initials.append(firsts) last_names.append(author["family"].title()) if original_book_data and "editor" in original_book_data["message"]: n_editors = len(original_book_data["message"]["editor"]) editor_initials = [] editor_last_names = [] for author in original_book_data["message"]["editor"]: # we get the capital letters from the first names # these will become the initials for this author firsts = [char + "." for char in author["given"].title() if char.isupper()] firsts = " ".join(firsts) editor_initials.append(firsts) editor_last_names.append(author["family"].title()) else: n_editors = 0 editor_initials = [] editor_last_names = [] # format the citation correctly if style == "wiley": names = [f"{last}, {first}" for first, last in zip(initials, last_names)] editors = [f"{first} {last}" for first, last in zip(editor_initials, editor_last_names)] if n_authors == 1: names = names[0] if 1 < n_authors < 4: names = ", ".join(names[:-1]) + " and " + names[-1] if n_authors >= 4: names = ", ".join(names[:3]) + " et al." if n_editors == 1: editors = editors[0] if 1 < n_editors < 4: editors = ", ".join(editors[:-1]) + " and " + editors[-1] if n_editors >= 4: editors = ", ".join(editors[:3]) + " et al." citation = f"{names} ({year}). {chapter_title}. In: <i>{book_title}</i> (ed. {editors}), {pages}: {publisher}" elif style == "acs": raise NotImplementedError("No support for ACS style yet") elif style == "rsc": raise NotImplementedError("No support for RSC style yet") return citation def _format_book(data: dict, style: str) -> str: ''' Format a book based on the provided data and style key: AL: Author last name AI: Author initials EL: Editor last name EI: Editor initials Ed: Eds. or Ed. depending on number of editors T: Title P: Publisher C: City Y: Year E: Edition Ch: Chapter pp: Page range Wiley: [AL1], [AI1]; [AL2], [AI2] In [T]; [EL1], [EI1]; [EL2], [EI2], [Ed]; [P]: [C], <b>[Y]</b>; [E], [Ch], pp [pp]. JCC: [AI1] [AL1], [AI2] [AL2], <i>[T]</i>, [E] ([P], [Y]). ''' # grab usefull data publisher = data["message"]["publisher"] if "published-print" in data["message"]: year = data["message"]["published-print"]["date-parts"][0][0] else: year = data["message"]["published"]["date-parts"][0][0] pages = data["message"].get("page") book_title = data["message"]["title"][0] chapter_title = data["message"]["title"][0] # city = _get_publisher_city(publisher) citation = "" original_book_data = None for isbn in data["message"]["isbn-type"]: if isbn["type"] == "electronic": try: original_book_data = _get_doi_data(f"{data['message']['prefix']}/{isbn['value']}") except: pass break # Get the initials from the author given names # also store the family names n_authors = len(data["message"]["author"]) initials = [] last_names = [] for author in data["message"]["author"]: # we get the capital letters from the first names # these will become the initials for this author firsts = [char + "." for char in author["given"].title() if char.isupper()] firsts = " ".join(firsts) initials.append(firsts) last_names.append(author["family"].title()) if original_book_data is not None and "editor" in original_book_data["message"]: n_editors = len(original_book_data["message"]["editor"]) editor_initials = [] editor_last_names = [] for author in original_book_data["message"]["editor"]: # we get the capital letters from the first names # these will become the initials for this author firsts = [char + "." for char in author["given"].title() if char.isupper()] firsts = " ".join(firsts) editor_initials.append(firsts) editor_last_names.append(author["family"].title()) else: n_editors = 0 editor_initials = [] editor_last_names = [] # format the citation correctly if style == "wiley": names = [f"{last}, {first}" for first, last in zip(initials, last_names)] names = '; '.join(names) editors = [f"{first} {last}" for first, last in zip(editor_initials, editor_last_names)] editors = '; '.join(editors) ed_signifier = 'Ed.' if len(editors) == 1 else 'Eds.' editors = f' {editors}, {ed_signifier};' # citation = f"{names} ({year}). {chapter_title}. In: <i>{book_title}</i> (ed. {editors}), {pages}: {publisher}" citation = f"{names} In {book_title};" if len(editor_initials) > 0: citation += editors citation += f'{publisher}, <b>{year}</b>.' elif style == "jcc": names = [f"{first} {last}" for first, last in zip(initials, last_names)] if len(names) == 1: names = names[0] elif len(names) == 2: names = f'{names[0]} and {names[1]}' elif len(names) < 7: names = ', '.join(names[:-1]) + f', and {names[-1]}' else: names = ', '.join(names[:3]) + ', et al.' citation = f"{names}, <i>{book_title}</i> ({publisher}, {year})." elif style == "acs": raise NotImplementedError("No support for ACS style yet") elif style == "rsc": raise NotImplementedError("No support for RSC style yet") return citation