Skip to content

Instantly share code, notes, and snippets.

@davidfoerster
Created April 17, 2019 10:07
Show Gist options
  • Save davidfoerster/8d65f4803bfde6eaf1df0f0f65ae2438 to your computer and use it in GitHub Desktop.
Save davidfoerster/8d65f4803bfde6eaf1df0f0f65ae2438 to your computer and use it in GitHub Desktop.
Fetch author citation benchmarks from Google Scholar
#!/usr/bin/python3 -OO
import sys
import operator
import scholarly
import urllib.parse
DEFAULT_ATTRIBUTES = (
'citedby', 'citedby5y',
'hindex', 'hindex5y',
'i10index', 'i10index5y'
)
def main(attributes=None):
if attributes is None:
attributes = sys.argv[1:]
if not attributes:
attributes = DEFAULT_ATTRIBUTES
print('Name', *attributes, sep='\t')
attrgetter = operator.attrgetter(*attributes)
for record in sys.stdin:
record = record.rstrip('\n').split('\t')
if len(record) >= 2 and record[1] and record[1] != '-':
if '=' in record[1]:
gs_url = urllib.parse.urlparse(record[1])
assert gs_url.path == '/citations', 'Unexpected URL: ' + str(gs_url)
gs_id = urllib.parse.parse_qs(
gs_url.query, strict_parsing=True, errors='strict')['user'][0]
else:
gs_id = record[1]
author = scholarly.Author(gs_id).fill()
record[1:] = attrgetter(author)
else:
del record[1:]
print(*record, sep='\t')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment