Last active
December 1, 2024 11:16
-
-
Save whitead/5aa3d2867af434e0222e4373e1c2534d to your computer and use it in GitHub Desktop.
Compute number of self citations with Semantic Scholar
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# License CC0 | |
import httpx | |
async def analyze_self_citations(doi): | |
async with httpx.AsyncClient() as client: | |
response = await client.get( | |
f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}", | |
params={"fields": "title,authors,references.authors"} | |
) | |
response.raise_for_status() | |
paper = response.json() | |
if not paper['references']: | |
print(paper) | |
raise ValueError("Could not find references from Semantic Scholar") | |
def check_overlap(n1, n2): | |
# remove initials and check for name intersection | |
s1 = {w for w in n1.lower().replace(".", "").split() if len(w) > 1} | |
s2 = {w for w in n2.lower().replace(".", "").split() if len(w) > 1} | |
return len(s1 | s2) == len(s1) | |
authors = {a['name']: [0, 0] for a in paper['authors']} | |
for ref in paper['references']: | |
ref_authors = {a['name'] for a in ref['authors']} | |
for author in authors: | |
authors[author][1] += 1 | |
if any(check_overlap(author, ra) for ra in ref_authors): | |
authors[author][0] += 1 | |
print(f"Self citations in \"{paper['title']}\"") | |
print(f" N = {len(paper['references'])}") | |
for author, (self_cites, total) in authors.items(): | |
print(f" {author}: {self_cites/total:.2%} self citation") | |
await analyze_self_citations("10.1038/s42256-024-00832-8") | |
# Self citations in "Augmenting large language models with chemistry tools" | |
# N = 114 | |
# Andrés M Bran: 0.00% self citation | |
# Sam Cox: 0.88% self citation | |
# Oliver Schilter: 0.00% self citation | |
# Carlo Baldassari: 0.00% self citation | |
# Andrew D. White: 5.26% self citation | |
# P. Schwaller: 6.14% self citation | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment