Created
November 23, 2015 16:37
-
-
Save judell/4bd35286c3c29e756295 to your computer and use it in GitHub Desktop.
rrid annotation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json, requests, re, traceback, pyramid, urlparse | |
from lxml import etree | |
try: | |
from urllib.parse import urlencode | |
except ImportError: | |
from urllib import urlencode | |
host = 'h.jonudell.info' # or your own | |
port = 8081 | |
host_port = 'http://' + host + ':' + str(port) | |
class HypothesisUtils: | |
def __init__(self, username='username', password=None, limit=None, max_results=None, domain=None): | |
if domain is None: | |
self.domain = 'hypothes.is' | |
else: | |
self.domain = domain | |
self.app_url = 'https://%s/app' % self.domain | |
self.api_url = 'https://%s/api' % self.domain | |
self.query_url = 'https://%s/api/search?{query}' % self.domain | |
self.anno_url = 'https://%s/a' % domain | |
self.via_url = 'https://via.hypothes.is' | |
self.username = username | |
self.password = password | |
self.single_page_limit = 200 if limit is None else limit # per-page, the api honors limit= up to (currently) 200 | |
self.multi_page_limit = 200 if max_results is None else max_results # limit for paginated results | |
self.permissions = { | |
"read": ["group:__world__"], | |
"update": ['acct:' + self.username + '@hypothes.is'], | |
"delete": ['acct:' + self.username + '@hypothes.is'], | |
"admin": ['acct:' + self.username + '@hypothes.is'] | |
} | |
def login(self): | |
"""Request an assertion, exchange it for an auth token.""" | |
# https://github.com/rdhyee/hypothesisapi | |
r = requests.get(self.app_url) | |
cookies = r.cookies | |
payload = {"username":self.username,"password":self.password} | |
self.csrf_token = cookies['XSRF-TOKEN'] | |
data = json.dumps(payload) | |
headers = {'content-type':'application/json;charset=UTF-8', 'x-csrf-token': self.csrf_token} | |
r = requests.post(url=self.app_url + "?__formid__=login", data=data, cookies=cookies, headers=headers) | |
url = self.api_url + "/token?" + urlencode({'assertion':self.csrf_token}) | |
r = (requests.get(url=url, | |
cookies=cookies, headers=headers)) | |
self.token = r.content | |
def make_annotation_payload_with_target_using_only_text_quote(self, url, prefix, exact, suffix, text, tags): | |
"""Create JSON payload for API call.""" | |
payload = { | |
"uri": url, | |
"user": 'acct:' + self.username + '@hypothes.is', | |
"permissions": self.permissions, | |
#"document": { | |
# "link": [ { "href": url } ] | |
# }, | |
"target": | |
[{ | |
"scope": [url], | |
"selector": | |
[{ | |
"type": "TextQuoteSelector", | |
"prefix": prefix, | |
"exact": exact, | |
"suffix": suffix | |
},] | |
}], | |
"tags": tags, | |
"text": text | |
} | |
return payload | |
def create_annotation_with_target_using_only_text_quote(self, url=None, prefix=None, | |
exact=None, suffix=None, text=None, tags=None): | |
"""Call API with token and payload, create annotation (using only text quote)""" | |
payload = self.make_annotation_payload_with_target_using_only_text_quote(url, prefix, exact, suffix, text, tags) | |
r = self.post_annotation(payload) | |
return r | |
def post_annotation(self, payload): | |
headers = {'Authorization': 'Bearer ' + self.token, 'Content-Type': 'application/json;charset=utf-8' } | |
data = json.dumps(payload, ensure_ascii=False) | |
r = requests.post(self.api_url + '/annotations', headers=headers, data=data) | |
return r | |
def rrid(request): | |
# http://www.jneurosci.org/content/34/24/8151.full | |
target_uri = urlparse.parse_qs(request.query_string)['uri'][0] | |
r = requests.get(target_uri) | |
html = r.text | |
#readability_doc = readability.Document(html) | |
#content_html = readability_doc.summary() | |
try: | |
rrid_context = re.search('(.{10}?)(RRID:\s*)([\w\-]+)(.{10}?)', html).groups() | |
prefix = rrid_context[0] | |
exact = rrid_context[2] | |
suffix = rrid_context[3] | |
resolver_uri = 'https://scicrunch.org/resolver/%s.xml' % exact | |
r = requests.get(resolver_uri) | |
xml = r.content | |
root = etree.fromstring(xml) | |
data_elements = root.findall('data')[0] | |
s = '' | |
for data_element in data_elements: | |
name = data_element.find('name').text | |
value =data_element.find('value').text | |
s += '<p>%s: %s</p>' % (name, value) | |
h = HypothesisUtils(username='*****', password='*****') | |
h.login() | |
r = h.create_annotation_with_target_using_only_text_quote(url=target_uri, prefix=prefix, exact=exact, suffix=suffix, text=s) | |
print r.text | |
except: | |
print traceback.print_exc() | |
return Response(status_int=302, location=target_uri) | |
if __name__ == '__main__': | |
from wsgiref.simple_server import make_server | |
from pyramid.config import Configurator | |
from pyramid.response import Response | |
config = Configurator() | |
config.add_route('rrid', '/rrid') | |
config.add_view(rrid, route_name='rrid') | |
app = config.make_wsgi_app() | |
server = make_server(host, port, app) | |
server.serve_forever() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment