Skip to content

Instantly share code, notes, and snippets.

@alexwoolford
Created June 29, 2015 20:15
Show Gist options
  • Select an option

  • Save alexwoolford/26a69ed3746dc7edff32 to your computer and use it in GitHub Desktop.

Select an option

Save alexwoolford/26a69ed3746dc7edff32 to your computer and use it in GitHub Desktop.
A quick & dirty script to get taxonomy data from Alchemy API for a list of URL's
#!/usr/bin/env python
import urllib2
import urllib
import logging
import sys
import json
root = logging.getLogger()
root.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
root.addHandler(ch)
api_key = '****************************************'
def get_taxonomies(url):
try:
baseUrl = "http://access.alchemyapi.com/calls/url/URLGetRankedTaxonomy?"
urlArgs = {'apikey':api_key, 'outputMode':'json', \
'extract': 'taxonomy', 'url': url}
apiCallUrl = baseUrl + urllib.urlencode(urlArgs)
response = urllib2.urlopen(apiCallUrl).read()
if eval(response)['status'] == u'OK':
logging.info("Reponse OK getting taxonomy for {0}".format(url))
return(json.dumps(eval(response)))
else:
logging.error("Response status {0} getting taxonomy for {1}".format(eval(response)['status'], url))
except:
logging.error("Error {0} processing url {1}".format(sys.exc_info()[0], url))
if __name__ == "__main__":
with open('urls.txt', 'r') as infile, open('alchemy_url_taxonomy.json', 'w') as outfile:
for line in infile.readlines()[:2]:
taxonomy_json = get_taxonomies(line.strip())
if taxonomy_json:
outfile.write(taxonomy_json + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment