forked from langchain-ai/langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbibtex.py
87 lines (73 loc) · 2.42 KB
/
bibtex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""Util that calls bibtexparser."""
import logging
from typing import Any, Dict, List, Mapping
from pydantic import BaseModel, Extra, root_validator
logger = logging.getLogger(__name__)
OPTIONAL_FIELDS = [
"annotate",
"booktitle",
"editor",
"howpublished",
"journal",
"keywords",
"note",
"organization",
"publisher",
"school",
"series",
"type",
"doi",
"issn",
"isbn",
]
class BibtexparserWrapper(BaseModel):
"""Wrapper around bibtexparser.
To use, you should have the ``bibtexparser`` python package installed.
https://bibtexparser.readthedocs.io/en/master/
This wrapper will use bibtexparser to load a collection of references from
a bibtex file and fetch document summaries.
"""
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that the python package exists in environment."""
try:
import bibtexparser # noqa
except ImportError:
raise ImportError(
"Could not import bibtexparser python package. "
"Please install it with `pip install bibtexparser`."
)
return values
def load_bibtex_entries(self, path: str) -> List[Dict[str, Any]]:
"""Load bibtex entries from the bibtex file at the given path."""
import bibtexparser
with open(path) as file:
entries = bibtexparser.load(file).entries
return entries
def get_metadata(
self, entry: Mapping[str, Any], load_extra: bool = False
) -> Dict[str, Any]:
"""Get metadata for the given entry."""
publication = entry.get("journal") or entry.get("booktitle")
if "url" in entry:
url = entry["url"]
elif "doi" in entry:
url = f'https://doi.org/{entry["doi"]}'
else:
url = None
meta = {
"id": entry.get("ID"),
"published_year": entry.get("year"),
"title": entry.get("title"),
"publication": publication,
"authors": entry.get("author"),
"abstract": entry.get("abstract"),
"url": url,
}
if load_extra:
for field in OPTIONAL_FIELDS:
meta[field] = entry.get(field)
return {k: v for k, v in meta.items() if v is not None}