Skip to content

Instantly share code, notes, and snippets.

@TerenceLiu98
Last active February 1, 2024 03:23
Show Gist options
  • Save TerenceLiu98/ccddd486bdd9f018e82b161d09fa6680 to your computer and use it in GitHub Desktop.
Save TerenceLiu98/ccddd486bdd9f018e82b161d09fa6680 to your computer and use it in GitHub Desktop.
'''
A scholar metadata provider for calibre-web, based on the Cross Ref API(https://github.com/fabiobatalha/crossrefapi)
Author: Terence Lau
LICENSE: MIT
'''
import itertools
from typing import Dict, List, Optional
from urllib.parse import quote, unquote
from crossref.restful import Works
from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
class crossref(Metadata):
__name__ = "Cross Ref"
__id__ = "crossref"
META_URL = "https://crossref.org"
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
val = list()
if self.active:
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = " ".join(tokens)
try:
sch = Works()
results = sch.query(query)
except Exception as e:
log.warning(e)
return list()
idx = 0
for result in results:
try:
match = self._parse_search_result(result=result, generic_cover="", locale=locale)
val.append(match)
except:
val.append(list())
idx += 1
if idx <= 10:
pass
else:
break
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
match = MetaRecord(
id=result["resource"]["primary"]["URL"],
title=result["title"],
authors=[result["author"][i]["given"] + " " + result["author"][i]["family"] for i in range(len(result["author"]))],
url=result["resource"]["primary"]["URL"],
source=MetaSourceInfo(
id=self.__id__, description=self.__name__, link=semantic.META_URL
),
)
match.cover = generic_cover
match.description = ""
match.publisher = result["publisher"]
match.publishedDate = work["indexed"]["date-time"].split("T")[0]
match.identifiers = {"cross-ref": result["resource"]["primary"]["URL"]}
return match
'''
A scholar metadata provider for calibre-web, based on the semantic scholar API(https://github.com/danielnsilva/semanticscholar)
Author: Terence Lau
LICENSE: MIT
'''
import itertools
from typing import Dict, List, Optional
from urllib.parse import quote, unquote
from semanticscholar import SemanticScholar
from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
class semantic(Metadata):
__name__ = "Semantic Scholar"
__id__ = "semanticscholar"
META_URL = "https://www.semanticscholar.org"
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
val = list()
if self.active:
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = " ".join(tokens)
try:
sch = SemanticScholar()
results = sch.search_paper(query)
except Exception as e:
log.warning(e)
return list()
match = self._parse_search_result(
result=results, generic_cover="", locale=locale
)
val.append(match)
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
match = MetaRecord(
id=result[0].url,
title=result[0].title,
authors=[i.name for i in result[0].authors],
url=result[0].url,
source=MetaSourceInfo(
id=self.__id__, description=self.__name__, link=semantic.META_URL
),
)
match.cover = generic_cover
try:
match.description = result[0].title if result[0].abstract == None else result[0].abstract
except:
match.description = ""
try:
match.publisher = result[0].venue
except:
match.publisher = ""
try:
match.publishedDate = result[0].publicationDate.strftime("%Y-%m-%d")
except:
match.publishedDate = ""
try:
match.identifiers = {"semantic-scholar": result[0].url}
except:
match.identifiers = {"semantic-scholar": ""}
return match
@TerenceLiu98
Copy link
Author

  1. To use this metadata_provider, first copy these two files into `calibre-web/cps/metadata_provider/
  2. install dependent packages:
    python3 -m pip install semanticscholar crossrefapi
  3. restart calibre-web

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment