Source code for mcot.bibtex.arxiv

"""
Query the arXiv API and NASA ADS database to get access to the arXiv papers
"""
import ads
import arxiv
from . import file, pubmed
from logging import getLogger
logger = getLogger(__name__)


[docs]def to_bibtex(article: ads.search.Article): """ Converts an ADS article metadata into bibtex entry :param article: input article :return: bibtex entry """ article_bibtex = list(file.BibTexSet.from_string(article.bibtex).entries.values())[0] for key, value in list(article_bibtex.tags.items()): article_bibtex.tags[key] = ' '.join(value.split('\n')) article_bibtex.tags['abstract'] = article.abstract return article_bibtex
[docs]def query_mult(arxiv_ids): """ Convert a sequence of arxiv IDs into a sequence of bibTex entries :param arxiv_ids: sequence of arXiv IDs """ cleaned = [] for arxiv_id in arxiv_ids: if arxiv_id.startswith('http'): arxiv_id = arxiv_id.split('/')[-1] if arxiv_id.lower().startswith('arxiv:'): arxiv_id = arxiv_id.split(':', nsplit=1)[1] cleaned.append(arxiv_id) logger.info(f'Extracting arXiv IDs: {", ".join(cleaned)}') res = arxiv.query(id_list=cleaned) assert len(res) == len(cleaned), "missing output files" entries = [] for arxiv_id, q in zip(cleaned, res): if q['doi'] is not None: entries.append(list(pubmed.doi_to_bibtex(q['doi']).entries.values())[0]) else: articles = list(ads.SearchQuery(q='arXiv:' + arxiv_id)) assert len(articles) == 1 entries.append(to_bibtex(articles[0])) return entries