Created
March 15, 2013 08:25
-
-
Save mromanello/5168303 to your computer and use it in GitHub Desktop.
Python script to create an example of data that can be displyaed in DARIAH geo-browser <http://dev2.dariah.eu/e4d/>, a tool to visualize data in time and space. Data are drawn from the Gazetteer and the Opac (library catalogue) of the German Archaeological Institute (DAI). This dataset contains data about publications related to the Limes groupe…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import urllib | |
import urllib2 | |
import sys | |
import codecs | |
""" | |
Author: Matteo Romanello, <[email protected]> | |
""" | |
def to_KML(place_name, coord, date, desc): | |
""" | |
This function exports data to KML format | |
""" | |
template = """<Placemark> | |
<name>%s</name> | |
<Point> | |
<coordinates>%s,%s</coordinates> | |
</Point> | |
<TimeSpan> | |
<begin>%s</begin> | |
<end>%s</end> | |
</TimeSpan> | |
<description><![CDATA[ | |
%s | |
]]></description> | |
</Placemark>"""%(place_name,coord[0],coord[1],date,date,desc) | |
return template | |
def get_related_publications(zenon_url = "http://highgrass.uni-koeln.de/elwms-zenon/search", key = None): | |
""" | |
This function fetches publication from Zenon, DAI's opac, using `key` as search key and by querying its RESTful API. | |
In this context we used as search key the subject entry in the thesaurus corresponding to a given place in the Limes. | |
""" | |
result = [] | |
if(key is not None): | |
zenon_params = {'q':'f999_1:%s'%key,'limit': 1000} | |
zenon_full_url = "%s?%s"%(zenon_url,urllib.urlencode(zenon_params)) | |
zenon_data = urllib2.urlopen(zenon_full_url) | |
js_publications = json.load(zenon_data) | |
if(js_publications.has_key('data')): | |
for pub in js_publications['data']: | |
temp = {} | |
temp['id'] = pub["id"] | |
temp['title'] = pub["title"] | |
temp['author'] = pub["author"] | |
temp['date'] = pub['imprint'].replace('.','') # here we need to have some more sophisticated normalization | |
temp['zenon_id'] = pub['id'] | |
temp['link'] = "http://testopac.dainst.org/#book/%s"%temp['zenon_id'] | |
result.append(temp) | |
return result | |
def get_places(url = "http://gazetteer.dainst.org/search.json"): | |
""" | |
This function retrieves geo-data for Limes locations from DAI's gazetteer using its RESTful API. | |
""" | |
result = [] | |
# here is about querying the gazetters and getting the places related to the Limes | |
params = {'limit': 1000, 'q': 'tags:limes'} # limit param is very important here to get the full results | |
url_values = urllib.urlencode(params) | |
full_url = "%s?%s"%(url,url_values) | |
data = urllib2.urlopen(full_url) | |
js_places = json.load(data) | |
for pl in js_places['result']: | |
if(pl.has_key('prefLocation')): | |
temp = {} | |
temp['coord'] = pl['prefLocation']['coordinates'] | |
temp['name'] = pl['prefName']['title'] # there could be multiple names here in diff languages | |
temp['id'] = pl['@id'] | |
temp['subject_headings'] = [id['value'] for id in pl['identifiers'] if id['context']==u'zenon-thesaurus'] | |
temp['gazlink'] = temp["id"].replace("#","#") | |
result.append(temp) | |
else: | |
print >> sys.stderr, "Skipped %s as it has no coordinates"%temp['id'] | |
return result | |
def main(ofname="output.kml"): | |
pubs = [] | |
places = get_places() | |
file = codecs.open(ofname,'w','utf-8') | |
metadata = """ | |
<ExtendedData xmlns:dc="http://purl.org/dc/elements/1.1/"> | |
<dc:description> | |
This dataset contains data about publications related to the Limes grouped by the place in the Limes they relate to. | |
The main purpose is the display within the DARIAH Geo-browser, a tool to visualize data in time and space. | |
</dc:description> | |
<dc:subject>Archaeology, geo-date, Limes</dc:subject> | |
<dc:identifier>http://de.digitalclassicist.org/berlin/files/output.kml</dc:identifier> | |
<dc:language>de</dc:language> | |
<dc:date>2013-03-15</dc:date> | |
<dc:rights>Copyright 2000, O'Reilly Network</dc:rights> | |
<dc:publisher>German Archaeological Institute</dc:publisher> | |
</ns:metadata> | |
</ExtendedData> | |
""" | |
intro = """<?xml version="1.0" encoding="utf-8"?> | |
<kml xmlns="http://www.opengis.net/kml/2.2"> | |
<Document>%s | |
"""%metadata | |
outro = """ | |
</Document> | |
</kml>""" | |
file.write(intro) | |
for place in places: | |
for key in place["subject_headings"]: | |
temp = get_related_publications(key = key.encode('utf-8')) | |
if(len(temp)>0): | |
pubs += temp | |
print >> sys.stderr,"Found %i related publications in Zenon related to %s"%(len(temp),place['name']) | |
for pub in temp: | |
if(len(pub['date'])<=4): | |
desc = "<p>%s <em>%s</em> (%s) [<a target=\"_blank\" href='%s'>View in Zenon</a> ; <a href='%s' target=\"_blank\">View in Gazetteer</a> ] </p>"%(pub['author'],pub['title'],pub['date'],pub['link'],place['gazlink']) | |
file.write(to_KML(place['name'],place['coord'],pub['date'],desc)) | |
#print >> sys.stderr, len(pubs) | |
file.write(outro) | |
file.close() | |
if __name__ == "__main__": | |
main() | |
# if you want the output printed to another destination | |
#main(ofname="thefilenameidecide.kml") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment