Created
May 5, 2020 20:58
-
-
Save bnewbold/9918634282f6013e13174badbce64a93 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Depends on: | |
- articlemetaapi | |
Refs: | |
- https://github.com/scieloorg/articlemetaapi/blob/master/articlemeta/client.py | |
- https://github.com/scieloorg/xylose/blob/master/xylose/scielodocument.py | |
""" | |
import os, sys, json, argparse | |
from articlemeta.client import RestfulClient | |
from xylose.scielodocument import Article, Journal, Issue, Citation | |
def reduce_dict(d): | |
""" | |
Takes a dict and removes all the None values. | |
On the assumption that the dict will be transformed into JSON, this can | |
save a bunch of verbosity. | |
""" | |
keys = list(d.keys()) | |
for k in keys: | |
if d[k] is None: | |
d.pop(k) | |
return d | |
def obj_to_dict(obj): | |
""" | |
Journal ref: https://github.com/scieloorg/xylose/blob/master/xylose/scielodocument.py#L688 | |
We basically want all of the @property fields. Some methods are not | |
@property fields and don't work right with getattr; we skip or call those | |
explicitly. | |
""" | |
# sometimes... Citations? are not objects | |
if type(obj) not in (Article, Journal, Issue, Citation): | |
return obj | |
d = {} | |
for key in dir(obj): | |
if key.startswith('_') or key in ('data', 'bibliographic_legends', 'any_issn'): | |
continue | |
val = getattr(obj, key) | |
if type(val).__name__ == 'method': | |
# it's a @property method; call it | |
d[key] = val() | |
else: | |
#t = type(val) | |
#print(f"{key}: {t}", file=sys.stderr) | |
d[key] = val | |
# article specific | |
if d.get('citations'): | |
d['citations'] = [reduce_dict(obj_to_dict(c)) for c in d['citations']] | |
if d.get('journal'): | |
d['journal'] = obj_to_dict(d['journal']) | |
if d.get('issue'): | |
d['issue'] = obj_to_dict(d['issue']) | |
return d | |
def run_articles(): | |
cl = RestfulClient() | |
for article in cl.documents_bulk(): | |
obj = obj_to_dict(article) | |
print(json.dumps(obj, sort_keys=True)) | |
def run_article_ids(): | |
cl = RestfulClient() | |
for ident in cl.documents_by_identifiers(only_identifiers=True): | |
print(ident) | |
def run_journals(): | |
cl = RestfulClient() | |
for journal in cl.journals(): | |
obj = obj_to_dict(journal) | |
print(json.dumps(obj, sort_keys=True)) | |
def main(): | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
subparsers = parser.add_subparsers() | |
sub = subparsers.add_parser('journals', | |
help="print all journals to stdout as JSON-per-line") | |
sub.set_defaults(func=run_journals) | |
sub = subparsers.add_parser('articles', | |
help="print all articles to stdout as JSON-per-line") | |
sub.set_defaults(func=run_articles) | |
sub = subparsers.add_parser('article-ids', | |
help="print all articles to stdout as JSON-per-line") | |
sub.set_defaults(func=run_article_ids) | |
args = parser.parse_args() | |
if not args.__dict__.get("func"): | |
print("tell me what to do! (try --help)") | |
sys.exit(-1) | |
args.func() | |
if __name__=='__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment