Source code for mcot.bibtex.arxiv
"""
Query the arXiv API and NASA ADS database to get access to the arXiv papers
"""
import ads
import arxiv
from . import file, pubmed
from logging import getLogger
logger = getLogger(__name__)
[docs]def to_bibtex(article: ads.search.Article):
"""
Converts an ADS article metadata into bibtex entry
:param article: input article
:return: bibtex entry
"""
article_bibtex = list(file.BibTexSet.from_string(article.bibtex).entries.values())[0]
for key, value in list(article_bibtex.tags.items()):
article_bibtex.tags[key] = ' '.join(value.split('\n'))
article_bibtex.tags['abstract'] = article.abstract
return article_bibtex
[docs]def query_mult(arxiv_ids):
"""
Convert a sequence of arxiv IDs into a sequence of bibTex entries
:param arxiv_ids: sequence of arXiv IDs
"""
cleaned = []
for arxiv_id in arxiv_ids:
if arxiv_id.startswith('http'):
arxiv_id = arxiv_id.split('/')[-1]
if arxiv_id.lower().startswith('arxiv:'):
arxiv_id = arxiv_id.split(':', nsplit=1)[1]
cleaned.append(arxiv_id)
logger.info(f'Extracting arXiv IDs: {", ".join(cleaned)}')
res = arxiv.query(id_list=cleaned)
assert len(res) == len(cleaned), "missing output files"
entries = []
for arxiv_id, q in zip(cleaned, res):
if q['doi'] is not None:
entries.append(list(pubmed.doi_to_bibtex(q['doi']).entries.values())[0])
else:
articles = list(ads.SearchQuery(q='arXiv:' + arxiv_id))
assert len(articles) == 1
entries.append(to_bibtex(articles[0]))
return entries