import json
import os
from tcmu import environment, spell_check
from tcmu.cache import cache_file
import numpy as np
__all__ = ["cite", "_get_doi_data", "_get_doi_data_from_title", "_get_doi_data_from_query", "_get_publisher_city", "_get_journal_abbreviation"]
@environment.requires_optional_package("requests")
@cache_file('tcmu_doi_data')
def _get_doi_data(doi: str) -> dict:
"""
Get information about an article using the crossref.org API.
Args:
doi: the DOI to get information about.
"""
import requests
data = requests.get(f"http://api.crossref.org/works/{doi}").text
if data == "Resource not found.":
raise ValueError(f"Could not find DOI {doi}.")
data = json.loads(data)
return data
@environment.requires_optional_package("requests")
@cache_file('tcmu_doi_data')
def _get_doi_data_from_title(title: str):
import requests
citedby = requests.get(f"http://api.crossref.org/works?query.title={title}").text
citedby = json.loads(citedby)["message"]["items"]
citedby = [row for row in citedby if row['type'] == 'journal-article']
nearnesses = []
for row in citedby:
title = ''.join(row['title'])
nearness = spell_check.wagner_fischer(title, title)
nearness = nearness / len(title)
nearnesses.append(nearness)
if len(nearnesses) == 0:
return
nearest_idx = np.argmin(nearnesses)
return citedby[nearest_idx]
@environment.requires_optional_package("requests")
@cache_file('tcmu_doi_data')
def _get_doi_data_from_query(**queries):
import requests
valid_queries = [
"affiliation",
"degree",
"event-acronym",
"bibliographic",
"container-title",
"publisher-name",
"author",
"event-theme",
"standards-body-acronym",
"chair",
"event-location",
"translator",
"funder-name",
"event-name",
"publisher-location",
"title",
"standards-body-name",
"contributor",
"description",
"editor",
"event-sponsor"
]
query_strings = []
for query, value in queries.items():
if value is None:
continue
query = query.replace('_', '-')
if query not in valid_queries:
raise ValueError(f'Query "{query}" is not a valid query!')
if isinstance(value, str):
query_strings.append(f'query.{query}={value.replace("&", "")}')
else:
for v in value:
query_strings.append(f'query.{query}={v.replace("&", "")}')
query_string = '&'.join(query_strings)
citedby = requests.get(f"http://api.crossref.org/works?{query_string}").text
# print(f"http://api.crossref.org/works?{query_string}")
citedby = json.loads(citedby)["message"]["items"]
citedby = [row for row in citedby if row['type'] == 'journal-article']
nearnesses = []
for row in citedby:
title = ''.join(row['title'])
nearness = spell_check.wagner_fischer(title, title)
nearness = nearness / len(title)
nearnesses.append(nearness)
if len(nearnesses) == 0:
return
nearest_idx = np.argmin(nearnesses)
return citedby[nearest_idx]
# data = _get_doi_data_from_query(title="AGMFNet: Attention-guided multi-scale feature fusion network for infrared small target detection",
# author=("He", "Liu", "Yang", "Yuan"), publisher_name="Elsevier")
# print(data)
# exit()
@environment.requires_optional_package("requests")
@cache_file('tcmu_journal_abbrvs')
def _get_journal_abbreviation(journal: str) -> str:
"""
Get the journal name abbreviation using the abbreviso API.
Args:
journal: the name of the journal to get the abbreviation of.
"""
import requests
return requests.get(f"https://abbreviso.toolforge.org/a/{journal}").text.replace('amp;', '&')
@cache_file('tcmu_publisher_city')
def _get_publisher_city(publisher: str) -> str:
"""
Get the city of a publisher.
"""
with open(os.path.join(os.path.split(__file__)[0], "data", "cite", "_publisher_cities.json")) as cities:
cities = json.loads(cities.read())
return cities.get(publisher)
[docs]
@cache_file('tcmu_citation')
def cite(doi: str, style: str = "wiley", mode="html") -> str:
"""
Format an article in a certain style.
Args:
doi: the article DOI to generate a citation for.
style: the style formatting to use. Can be ``['wiley', 'acs', 'rsc', 'jcc']``.
mode: the formatting mode. Can be ``['html', 'latex', 'plain']``.
"""
# check if the style was correctly given
spell_check.check(style, ["wiley", "acs", "rsc", "jcc"])
spell_check.check(mode, ["html", "latex", "plain"])
# get the information about the DOI
data = _get_doi_data(doi)
citation = ""
if data["message"]["type"] == "journal-article":
citation = _format_article(data, style)
if data["message"]["type"] == "book-chapter":
citation = _format_book_chapter(data, style)
if data["message"]["type"] == "monograph" or (data["message"]["type"] == "other" and "ISBN" in data["message"]):
citation = _format_book(data, style)
if mode == "plain":
citation = citation.replace("<i>", "")
citation = citation.replace("</i>", "")
citation = citation.replace("<b>", "")
citation = citation.replace("</b>", "")
if mode == "latex":
citation = citation.replace("<i>", r"\textit{")
citation = citation.replace("</i>", "}")
citation = citation.replace("<b>", r"\textbf{")
citation = citation.replace("</b>", "}")
return citation
def get_pages(data):
pages = None
url = data["message"]["URL"]
if "ceur." in url:
pages = "e" + url.split("ceur.")[-1]
elif "page" in data["message"]:
pages = data["message"]["page"].replace("-", "–")
elif "article-number" in data["message"]:
pages = data["message"]["article-number"]
return pages
def is_accepted(data):
for assertion in data["message"].get("assertion", []):
if assertion["name"] == "accepted":
return True
return False
def _format_article(data: dict, style: str) -> str:
# grab usefull data
journal = data["message"]["container-title"][0]
if journal == 'physica status solidi (a)':
journal = 'Physica Status Solidi A'
journal_abbreviation = _get_journal_abbreviation(journal)
year = data["message"]["issued"]["date-parts"][0][0]
volume = data["message"].get("volume", "")
pages = get_pages(data)
title = data["message"]["title"][0]
doi = data["message"]["DOI"]
# accepted = is_accepted(data) # not using this one yet
citation = ""
# Get the initials from the author given names
# also store the family names
initials = []
last_names = []
for author in data["message"]["author"]:
# we get the capital letters from the first names
# these will become the initials for this author
firsts = [char + "." for char in author["given"].title() if char.isupper()]
firsts = " ".join(firsts)
initials.append(firsts)
last_names.append(author["family"].title())
# format the citation correctly
if style == "wiley":
names = [f"{first} {last}" for first, last in zip(initials, last_names)]
citation = f"{', '.join(names)}, <i>{journal_abbreviation}</i> <b>{year}</b>, <i>{volume}</i>"
if pages:
citation += f", {pages}"
citation += "."
elif style == "acs":
names = [f"{last}, {first}" for first, last in zip(initials, last_names)]
citation = f"{'; '.join(names)} {title} <i>{journal_abbreviation}</i> <b>{year}</b>, <i>{volume}</i>"
if pages:
citation += f", {pages}"
citation += f". DOI: {doi}"
elif style == "rsc":
names = [f"{first} {last}" for first, last in zip(initials, last_names)]
citation = f"{', '.join(names)}, <i>{journal_abbreviation}</i> {year}, <b>{volume}</b>"
if pages:
citation += f", {pages}"
citation += "."
elif style == "jcc":
names = [f"{first} {last}" for first, last in zip(initials, last_names)]
if len(names) == 1:
names = names[0]
elif len(names) == 2:
names = f'{names[0]} and {names[1]}'
elif len(names) < 7:
names = ', '.join(names[:-1]) + f', and {names[-1]}'
else:
names = ', '.join(names[:3]) + ', et al.'
citation = f"{names}, \"{title},\" <i>{journal}</i> {volume} ({year})"
if pages:
citation += f": {pages}"
citation += f", <a href=https://doi.org/{doi}>https://doi.org/{doi}</a>."
return citation
def _format_book_chapter(data: dict, style: str) -> str:
# grab usefull data
publisher = data["message"]["publisher"]
if "published-print" in data["message"]:
year = data["message"]["published-print"]["date-parts"][0][0]
else:
year = data["message"]["published"]["date-parts"][0][0]
pages = data["message"].get("page")
book_title = data["message"]["container-title"][0]
chapter_title = data["message"]["title"][0]
# city = _get_publisher_city(publisher)
citation = ""
original_book_data = None
for isbn in data["message"]["isbn-type"]:
if isbn["type"] == "electronic":
try:
original_book_data = _get_doi_data(f"{data['message']['prefix']}/{isbn['value']}")
except:
pass
break
# Get the initials from the author given names
# also store the family names
n_authors = len(data["message"]["author"])
initials = []
last_names = []
for author in data["message"]["author"]:
# we get the capital letters from the first names
# these will become the initials for this author
firsts = [char + "." for char in author["given"].title() if char.isupper()]
firsts = " ".join(firsts)
initials.append(firsts)
last_names.append(author["family"].title())
if original_book_data and "editor" in original_book_data["message"]:
n_editors = len(original_book_data["message"]["editor"])
editor_initials = []
editor_last_names = []
for author in original_book_data["message"]["editor"]:
# we get the capital letters from the first names
# these will become the initials for this author
firsts = [char + "." for char in author["given"].title() if char.isupper()]
firsts = " ".join(firsts)
editor_initials.append(firsts)
editor_last_names.append(author["family"].title())
else:
n_editors = 0
editor_initials = []
editor_last_names = []
# format the citation correctly
if style == "wiley":
names = [f"{last}, {first}" for first, last in zip(initials, last_names)]
editors = [f"{first} {last}" for first, last in zip(editor_initials, editor_last_names)]
if n_authors == 1:
names = names[0]
if 1 < n_authors < 4:
names = ", ".join(names[:-1]) + " and " + names[-1]
if n_authors >= 4:
names = ", ".join(names[:3]) + " et al."
if n_editors == 1:
editors = editors[0]
if 1 < n_editors < 4:
editors = ", ".join(editors[:-1]) + " and " + editors[-1]
if n_editors >= 4:
editors = ", ".join(editors[:3]) + " et al."
citation = f"{names} ({year}). {chapter_title}. In: <i>{book_title}</i> (ed. {editors}), {pages}: {publisher}"
elif style == "acs":
raise NotImplementedError("No support for ACS style yet")
elif style == "rsc":
raise NotImplementedError("No support for RSC style yet")
return citation
def _format_book(data: dict, style: str) -> str:
'''
Format a book based on the provided data and style
key:
AL: Author last name
AI: Author initials
EL: Editor last name
EI: Editor initials
Ed: Eds. or Ed. depending on number of editors
T: Title
P: Publisher
C: City
Y: Year
E: Edition
Ch: Chapter
pp: Page range
Wiley:
[AL1], [AI1]; [AL2], [AI2] In [T]; [EL1], [EI1]; [EL2], [EI2], [Ed]; [P]: [C], <b>[Y]</b>; [E], [Ch], pp [pp].
JCC:
[AI1] [AL1], [AI2] [AL2], <i>[T]</i>, [E] ([P], [Y]).
'''
# grab usefull data
publisher = data["message"]["publisher"]
if "published-print" in data["message"]:
year = data["message"]["published-print"]["date-parts"][0][0]
else:
year = data["message"]["published"]["date-parts"][0][0]
pages = data["message"].get("page")
book_title = data["message"]["title"][0]
chapter_title = data["message"]["title"][0]
# city = _get_publisher_city(publisher)
citation = ""
original_book_data = None
for isbn in data["message"]["isbn-type"]:
if isbn["type"] == "electronic":
try:
original_book_data = _get_doi_data(f"{data['message']['prefix']}/{isbn['value']}")
except:
pass
break
# Get the initials from the author given names
# also store the family names
n_authors = len(data["message"]["author"])
initials = []
last_names = []
for author in data["message"]["author"]:
# we get the capital letters from the first names
# these will become the initials for this author
firsts = [char + "." for char in author["given"].title() if char.isupper()]
firsts = " ".join(firsts)
initials.append(firsts)
last_names.append(author["family"].title())
if original_book_data is not None and "editor" in original_book_data["message"]:
n_editors = len(original_book_data["message"]["editor"])
editor_initials = []
editor_last_names = []
for author in original_book_data["message"]["editor"]:
# we get the capital letters from the first names
# these will become the initials for this author
firsts = [char + "." for char in author["given"].title() if char.isupper()]
firsts = " ".join(firsts)
editor_initials.append(firsts)
editor_last_names.append(author["family"].title())
else:
n_editors = 0
editor_initials = []
editor_last_names = []
# format the citation correctly
if style == "wiley":
names = [f"{last}, {first}" for first, last in zip(initials, last_names)]
names = '; '.join(names)
editors = [f"{first} {last}" for first, last in zip(editor_initials, editor_last_names)]
editors = '; '.join(editors)
ed_signifier = 'Ed.' if len(editors) == 1 else 'Eds.'
editors = f' {editors}, {ed_signifier};'
# citation = f"{names} ({year}). {chapter_title}. In: <i>{book_title}</i> (ed. {editors}), {pages}: {publisher}"
citation = f"{names} In {book_title};"
if len(editor_initials) > 0:
citation += editors
citation += f'{publisher}, <b>{year}</b>.'
elif style == "jcc":
names = [f"{first} {last}" for first, last in zip(initials, last_names)]
if len(names) == 1:
names = names[0]
elif len(names) == 2:
names = f'{names[0]} and {names[1]}'
elif len(names) < 7:
names = ', '.join(names[:-1]) + f', and {names[-1]}'
else:
names = ', '.join(names[:3]) + ', et al.'
citation = f"{names}, <i>{book_title}</i> ({publisher}, {year})."
elif style == "acs":
raise NotImplementedError("No support for ACS style yet")
elif style == "rsc":
raise NotImplementedError("No support for RSC style yet")
return citation