Skip to content

Instantly share code, notes, and snippets.

@judell
Created November 14, 2017 17:29
Show Gist options
  • Save judell/443c77c16590aa6311ee53c05fa43bd6 to your computer and use it in GitHub Desktop.
Save judell/443c77c16590aa6311ee53c05fa43bd6 to your computer and use it in GitHub Desktop.
genius to wapo import
# using https://github.com/judell/Hypothesis
def genius_test():
h = Hypothesis(username="judell",
token='6879-... a3df5',
group='Pk...EgL')
anno_id = '11978830'
genius_token = 'yOalMhaO8KhZBzqRgIe1gDvF0wGqRTF4uWHMxoxMlIrNWksSv0gJTpQM-kQR4MuU'
url = 'https://api.genius.com/annotations/' + anno_id
headers = {'Authorization': 'Bearer ' + genius_token, 'Content-Type': 'application/json;charset=utf-8' }
r = requests.get(url, headers=headers)
annotation = r.json()
web_page_id = '551506'
page = 1
done = False
all_referents = []
while done is False:
url = 'https://api.genius.com/referents?web_page_id=%s&per_page=20&page=%s' % (web_page_id, page)
headers = {'Authorization': 'Bearer ' + genius_token, 'Content-Type': 'application/json;charset=utf-8' }
response = requests.get(url, headers=headers)
r = response.json()
referents = r['response']['referents']
if len(referents) == 0:
done = True
else:
all_referents = all_referents + referents
page = page + 1
filtered = [r for r in all_referents if r['annotator_login'] == 'byamberphillips']
def complex_node(node):
tag = None
attributes = None
children = None
for item in node:
if item == 'tag': tag = node[item]
if item == 'attributes': attributes = node[item]
if item == 'children': children = node[item]
return {'tag':tag, 'attributes':attributes, 'children':children }
for item in filtered:
id = '/%s/' % item['id']
url = 'http://' + item['path'].replace(id, '')
title = item['annotatable']['title']
range = item['range']
prefix = range['before'][-30:]
exact = range['content']
suffix = range['after'][0:30]
dom = item['annotations'][0]['body']['dom']
text = ''
for child in dom['children']:
if not 'children' in child:
continue
print child['tag']
if child['tag'] == 'p':
text = text + '<p>'
nodes = child['children']
for node in nodes:
if type(node) is unicode:
text = text + node
else:
complex = complex_node(node)
if complex['tag'] == 'a':
text = text + ' <a href="%s">%s</a> ' % (
complex['attributes']['href'],
complex['children'][0]
)
payload = {
"uri": url,
"target":
[{
"source": [url],
"selector":
[{
"type": "TextQuoteSelector",
"prefix": prefix,
"exact": exact,
"suffix": suffix
}
]
}],
"text": text,
"document": {
"title": [title]
},
"permissions": h.permissions,
"group": h.group
}
r = h.post_annotation(payload)
print r.status_code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment