mjlavin80 · December 5, 2016 17:13
diff --git a/worldcat_metadata.py b/worldcat_metadata.py
 # This python script will loop through a set of Worldcat ids, download metadata for each id, and store full xml values in sqlite format (datastore.db) for later parsing. 
 # If the daily key limit is reached, the script will terminate and, the next time you run it, the script will look for Worldcat ids in the database and skip them if present.  
 # Therefore, the intended way to run this script is as a daily cron job until data is downloaded for every id.

 #Worldcat ids go here in list format, like this: ids_list = [11111, 22222, 33333]
 ids_list = []

 #replace 'Your key here' with API key
 KEY = 'Your key here'

 import sqlite3
 conn = sqlite3.connect('datastore.db')
 c = conn.cursor()

 c.execute("""CREATE TABLE IF NOT EXISTS raw_data (id INTEGER, xml BLOB)""")

 def check_data(_id):
    query = "".join(["SELECT * FROM raw_data WHERE id=", _id])
    r = c.execute(query).fetchall()
    return len(r)

 def insert_data(_id, xml):
    query = "".join(["""INSERT INTO raw_data (id, xml) VALUES (""", _id, """, '""", xml, """')"""])
    c.execute(query)
    conn.commit()

 import requests
 import urllib.parse
 import xml.etree.ElementTree as ET
 import time

 def worldcat_record(oclc_id, key, format="atom", schema='info%3Asrw%2Fschema%2Fdc'):
    url = ["http://www.worldcat.org/webservices/catalog/content/",
    oclc_id, "?wskey=", str(key), "&format=", format, "&recordSchema=", schema]
    built_url = "".join(url)
    url_object = requests.get(built_url)
    return url_object

 for w_id in ids_list:
    if check_data(str(w_id)) > 0:
        pass
    else:
        time.sleep(3)
        url_object = worldcat_record(str(w_id), KEY)
        if url_object.status_code == requests.codes.ok:
            xml = url_object.text
            insert_data(str(w_id), urllib.parse.quote_plus(xml))
        else:
            print(url_object.status_code)
            break
	# This python script will loop through a set of Worldcat ids, download metadata for each id, and store full xml values in sqlite format (datastore.db) for later parsing.
	# If the daily key limit is reached, the script will terminate and, the next time you run it, the script will look for Worldcat ids in the database and skip them if present.
	# Therefore, the intended way to run this script is as a daily cron job until data is downloaded for every id.

	#Worldcat ids go here in list format, like this: ids_list = [11111, 22222, 33333]
	ids_list = []

	#replace 'Your key here' with API key
	KEY = 'Your key here'

	import sqlite3
	conn = sqlite3.connect('datastore.db')
	c = conn.cursor()

	c.execute("""CREATE TABLE IF NOT EXISTS raw_data (id INTEGER, xml BLOB)""")

	def check_data(_id):
	query = "".join(["SELECT * FROM raw_data WHERE id=", _id])
	r = c.execute(query).fetchall()
	return len(r)

	def insert_data(_id, xml):
	query = "".join(["""INSERT INTO raw_data (id, xml) VALUES (""", _id, """, '""", xml, """')"""])
	c.execute(query)
	conn.commit()

	import requests
	import urllib.parse
	import xml.etree.ElementTree as ET
	import time

	def worldcat_record(oclc_id, key, format="atom", schema='info%3Asrw%2Fschema%2Fdc'):
	url = ["http://www.worldcat.org/webservices/catalog/content/",
	oclc_id, "?wskey=", str(key), "&format=", format, "&recordSchema=", schema]
	built_url = "".join(url)
	url_object = requests.get(built_url)
	return url_object

	for w_id in ids_list:
	if check_data(str(w_id)) > 0:
	pass
	else:
	time.sleep(3)
	url_object = worldcat_record(str(w_id), KEY)
	if url_object.status_code == requests.codes.ok:
	xml = url_object.text
	insert_data(str(w_id), urllib.parse.quote_plus(xml))
	else:
	print(url_object.status_code)
	break
No results found