mromanello · March 15, 2013 08:25
diff --git a/gistfile1.py b/gistfile1.py
 import json
 import urllib
 import urllib2
 import sys
 import codecs

 """
 Author: Matteo Romanello, <[email protected]>
 """

 def to_KML(place_name, coord, date, desc):
  """
 	This function exports data to KML format
 	"""
 	
 	template = """<Placemark>
      <name>%s</name>
      <Point>
        <coordinates>%s,%s</coordinates>
      </Point>
      <TimeSpan>
        <begin>%s</begin>
        <end>%s</end>
      </TimeSpan>
 	  <description><![CDATA[
 	%s
 	]]></description>
    </Placemark>"""%(place_name,coord[0],coord[1],date,date,desc)
 	return template

 def get_related_publications(zenon_url = "http://highgrass.uni-koeln.de/elwms-zenon/search", key = None):
 	"""
 	This function fetches publication from Zenon, DAI's opac, using `key` as search key and by querying its RESTful API.
 	In this context we used as search key the subject entry in the thesaurus corresponding to a given place in the Limes.
 	"""
 	result = []
 	if(key is not None):
 		zenon_params = {'q':'f999_1:%s'%key,'limit': 1000}
 		zenon_full_url = "%s?%s"%(zenon_url,urllib.urlencode(zenon_params))
 		zenon_data = urllib2.urlopen(zenon_full_url)
 		js_publications = json.load(zenon_data)
 		if(js_publications.has_key('data')):
 			for pub in js_publications['data']:
 				temp = {}
 				temp['id'] = pub["id"]
 				temp['title'] = pub["title"]
 				temp['author'] = pub["author"]
 				temp['date'] = pub['imprint'].replace('.','') # here we need to have some more sophisticated normalization
 				temp['zenon_id'] = pub['id']
 				temp['link'] = "http://testopac.dainst.org/&#35;book/%s"%temp['zenon_id']
 				result.append(temp)
 	return result

 def get_places(url = "http://gazetteer.dainst.org/search.json"):
 	"""
 	This function retrieves geo-data for Limes locations from DAI's gazetteer using its RESTful API. 
 	"""
 	result = []
 	# here is about querying the gazetters and getting the places related to the Limes
 	params = {'limit': 1000, 'q': 'tags:limes'} # limit param is very important here to get the full results
 	url_values = urllib.urlencode(params)
 	full_url = "%s?%s"%(url,url_values)
 	data = urllib2.urlopen(full_url)
 	js_places = json.load(data)
 	for pl in js_places['result']:
 		if(pl.has_key('prefLocation')):
 			temp = {}
 			temp['coord'] = pl['prefLocation']['coordinates']
 			temp['name'] = pl['prefName']['title'] # there could be multiple names here in diff languages
 			temp['id'] = pl['@id']
 			temp['subject_headings'] = [id['value'] for id in pl['identifiers'] if id['context']==u'zenon-thesaurus']
 			temp['gazlink'] =  temp["id"].replace("#","&#35;")
 			result.append(temp)
 		else:
 			print >> sys.stderr, "Skipped %s as it has no coordinates"%temp['id']
 	return result

 def main(ofname="output.kml"):
 	pubs = []
 	places = get_places()
 	file = codecs.open(ofname,'w','utf-8')
 	
 	metadata = """
 	<ExtendedData xmlns:dc="http://purl.org/dc/elements/1.1/">
                 <dc:description>
 				This dataset contains data about publications related to the Limes grouped by the place in the Limes they relate to.
 				The main purpose is the display within the DARIAH Geo-browser, a tool to visualize data in time and space.
                 </dc:description>
                 <dc:subject>Archaeology, geo-date, Limes</dc:subject>
                 <dc:identifier>http://de.digitalclassicist.org/berlin/files/output.kml</dc:identifier>   
                 <dc:language>de</dc:language>
                 <dc:date>2013-03-15</dc:date>
                 <dc:rights>Copyright 2000, O'Reilly Network</dc:rights>
                 <dc:publisher>German Archaeological Institute</dc:publisher>
                </ns:metadata>
            </ExtendedData>
 	"""
 	
 	intro = """<?xml version="1.0" encoding="utf-8"?>
 	<kml xmlns="http://www.opengis.net/kml/2.2">
 	  <Document>%s
 	"""%metadata
 	outro = """
 	  </Document>
 	</kml>"""
 	
 	file.write(intro)
 	for place in places:
 		for key in place["subject_headings"]:
 			temp = get_related_publications(key = key.encode('utf-8'))
 			if(len(temp)>0):
 				pubs += temp
 				print >> sys.stderr,"Found %i related publications in Zenon related to %s"%(len(temp),place['name'])
 				for pub in temp:
 					if(len(pub['date'])<=4):
 						desc = "<p>%s <em>%s</em> (%s) [<a target=\"_blank\" href='%s'>View in Zenon</a> ; <a href='%s' target=\"_blank\">View in Gazetteer</a> ] </p>"%(pub['author'],pub['title'],pub['date'],pub['link'],place['gazlink'])
 						file.write(to_KML(place['name'],place['coord'],pub['date'],desc))
 	#print >> sys.stderr, len(pubs)
 	file.write(outro)
 	file.close()
 	
 	
 if __name__ == "__main__":
 	main()
 	# if you want the output printed to another destination
 	#main(ofname="thefilenameidecide.kml")
	import json
	import urllib
	import urllib2
	import sys
	import codecs

	"""
	Author: Matteo Romanello, <[email protected]>
	"""

	def to_KML(place_name, coord, date, desc):
	"""
	This function exports data to KML format
	"""

	template = """<Placemark>
	<name>%s</name>
	<Point>
	<coordinates>%s,%s</coordinates>
	</Point>
	<TimeSpan>
	<begin>%s</begin>
	<end>%s</end>
	</TimeSpan>
	<description><![CDATA[
	%s
	]]></description>
	</Placemark>"""%(place_name,coord[0],coord[1],date,date,desc)
	return template

	def get_related_publications(zenon_url = "http://highgrass.uni-koeln.de/elwms-zenon/search", key = None):
	"""
	This function fetches publication from Zenon, DAI's opac, using `key` as search key and by querying its RESTful API.
	In this context we used as search key the subject entry in the thesaurus corresponding to a given place in the Limes.
	"""
	result = []
	if(key is not None):
	zenon_params = {'q':'f999_1:%s'%key,'limit': 1000}
	zenon_full_url = "%s?%s"%(zenon_url,urllib.urlencode(zenon_params))
	zenon_data = urllib2.urlopen(zenon_full_url)
	js_publications = json.load(zenon_data)
	if(js_publications.has_key('data')):
	for pub in js_publications['data']:
	temp = {}
	temp['id'] = pub["id"]
	temp['title'] = pub["title"]
	temp['author'] = pub["author"]
	temp['date'] = pub['imprint'].replace('.','') # here we need to have some more sophisticated normalization
	temp['zenon_id'] = pub['id']
	temp['link'] = "http://testopac.dainst.org/#book/%s"%temp['zenon_id']
	result.append(temp)
	return result

	def get_places(url = "http://gazetteer.dainst.org/search.json"):
	"""
	This function retrieves geo-data for Limes locations from DAI's gazetteer using its RESTful API.
	"""
	result = []
	# here is about querying the gazetters and getting the places related to the Limes
	params = {'limit': 1000, 'q': 'tags:limes'} # limit param is very important here to get the full results
	url_values = urllib.urlencode(params)
	full_url = "%s?%s"%(url,url_values)
	data = urllib2.urlopen(full_url)
	js_places = json.load(data)
	for pl in js_places['result']:
	if(pl.has_key('prefLocation')):
	temp = {}
	temp['coord'] = pl['prefLocation']['coordinates']
	temp['name'] = pl['prefName']['title'] # there could be multiple names here in diff languages
	temp['id'] = pl['@id']
	temp['subject_headings'] = [id['value'] for id in pl['identifiers'] if id['context']==u'zenon-thesaurus']
	temp['gazlink'] = temp["id"].replace("#","#")
	result.append(temp)
	else:
	print >> sys.stderr, "Skipped %s as it has no coordinates"%temp['id']
	return result

	def main(ofname="output.kml"):
	pubs = []
	places = get_places()
	file = codecs.open(ofname,'w','utf-8')

	metadata = """
	<ExtendedData xmlns:dc="http://purl.org/dc/elements/1.1/">
	<dc:description>
	This dataset contains data about publications related to the Limes grouped by the place in the Limes they relate to.
	The main purpose is the display within the DARIAH Geo-browser, a tool to visualize data in time and space.
	</dc:description>
	<dc:subject>Archaeology, geo-date, Limes</dc:subject>
	<dc:identifier>http://de.digitalclassicist.org/berlin/files/output.kml</dc:identifier>
	<dc:language>de</dc:language>
	<dc:date>2013-03-15</dc:date>
	<dc:rights>Copyright 2000, O'Reilly Network</dc:rights>
	<dc:publisher>German Archaeological Institute</dc:publisher>
	</ns:metadata>
	</ExtendedData>
	"""

	intro = """<?xml version="1.0" encoding="utf-8"?>
	<kml xmlns="http://www.opengis.net/kml/2.2">
	<Document>%s
	"""%metadata
	outro = """
	</Document>
	</kml>"""

	file.write(intro)
	for place in places:
	for key in place["subject_headings"]:
	temp = get_related_publications(key = key.encode('utf-8'))
	if(len(temp)>0):
	pubs += temp
	print >> sys.stderr,"Found %i related publications in Zenon related to %s"%(len(temp),place['name'])
	for pub in temp:
	if(len(pub['date'])<=4):
	desc = "<p>%s <em>%s</em> (%s) [<a target=\"_blank\" href='%s'>View in Zenon</a> ; <a href='%s' target=\"_blank\">View in Gazetteer</a> ] </p>"%(pub['author'],pub['title'],pub['date'],pub['link'],place['gazlink'])
	file.write(to_KML(place['name'],place['coord'],pub['date'],desc))
	#print >> sys.stderr, len(pubs)
	file.write(outro)
	file.close()


	if __name__ == "__main__":
	main()
	# if you want the output printed to another destination
	#main(ofname="thefilenameidecide.kml")