Skip to content

Instantly share code, notes, and snippets.

@Phoenix-Effect
Created July 27, 2018 01:28
Show Gist options
  • Save Phoenix-Effect/b1e0acc4e494e813e4c56a502d7e3894 to your computer and use it in GitHub Desktop.
Save Phoenix-Effect/b1e0acc4e494e813e4c56a502d7e3894 to your computer and use it in GitHub Desktop.
Downloads a bunch of DOI numbers from airtable, gets information related to that DOI number and upload it back to airtable.
from habanero import Crossref
from airtable import Airtable
from airtable.auth import AirtableAuth
import time
import pprint
import requests
import datetime
import html.parser
apikey = "API KEY"
baseurl = "BASE ID"
tableid = "ENTER TABLE NAME"
view = "ENTER VIEW NAME"
# string to length
def string_to_length(string):
if '-' not in string:
return ''.join(x for x in string if x.isnumeric())
else:
left = ''.join(x for x in string.split('-')[0] if x.isnumeric())
right = ''.join(x for x in string.split('-')[1] if x.isnumeric())
num = int(right) - int(left) + 1
return str(num)
def doi_to_line(y):
x = {}
if y['status'] == "ok":
y = y['message']
x['ref-count'] = y['reference-count']
x['publisher'] = y['publisher']
x['DOI'] = y['DOI']
x['type'] = y['type']
x['created'] = y['created']['timestamp']
if 'page' in y:
x['length'] = string_to_length(y['page'])
else:
x['length'] = "0"
x['source'] = y['source']
x['ref-by'] = y['is-referenced-by-count']
x['title'] = y['title'][0]
if 'link' in y:
x['url'] = y['link'][0]['URL']
else:
x['url'] = ""
authfl = []
if 'author' in y:
for author in y['author']:
if 'given' in author and 'family' in author:
authfl.append(author['given'] + ' ' + author['family'])
x['auth-FL'] = ", ".join(authfl)
authlf = []
for author in y['author']:
if 'given' in author and 'family' in author:
authlf.append(author['family'] + ";" + author['given'])
x['auth-LF'] = ", ".join(authlf)
else:
x['auth-FL'] = "Unknown"
x['auth-LF'] = "Unknown"
if 'subject' in y:
x['subject'] = ", ".join(y['subject'])
else:
x['subject'] = ""
return x
else:
return "Error"
# uploads to airtable
def upload_to_airtable(line, id):
airtable = Airtable(baseurl, tableid, apikey)
data = {"DOI or ISBN": line['DOI'], "Title": line['title'], "sys-timestamp": line['created'],
"Sponsor": line['publisher'], "Hosting site": line['source'], "Tags": line['subject'], "sys-fixit": False,
"Creator_FirstLast": line['auth-FL'], "APA Reference": line['apa']}
airtable.update(id, data)
def addReference(doi, url):
headers = { 'Accept': 'text/bibliography', 'style': 'apa' }
r = requests.get(url, headers=headers)
r.encoding = 'utf-8'
doi['apa'] = html.parser.HTMLParser().unescape(r.text)
print(r.text)
# MAIN program
airtable = Airtable(baseurl, tableid, apikey)
articles = airtable.get_all(view=view)
cr = Crossref()
cr.mailto="[email protected]"
for article in articles:
if 'sys-fixit' in article['fields'] and article['fields']['Kind of resource'][0] == 'Article':
x = cr.works(article['fields']['URL'])
y = doi_to_line(x)
addReference(y, article['fields']['URL'])
pprint.pprint(y)
upload_to_airtable(y, article['id'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment