Skip to content

Instantly share code, notes, and snippets.

@sgillies
Last active December 13, 2015 18:38
Show Gist options
  • Save sgillies/4956284 to your computer and use it in GitHub Desktop.
Save sgillies/4956284 to your computer and use it in GitHub Desktop.
Digging into GANE data
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "Digging into GANE data"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from collections import defaultdict\n",
"import glob\n",
"import json\n",
"import pprint"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gane_names = []\n",
"\n",
"for fname in glob.glob(\"names-0*.json\"):\n",
" f = open(fname)\n",
" text = f.read()\n",
" gane_names.extend(json.loads(text))\n",
" f.close()\n",
" \n",
"print len(gane_names)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"600\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gane_tree = defaultdict(dict)\n",
"\n",
"for n in gane_names:\n",
" try:\n",
" placeURI = n.get('placeURI')\n",
" if \"pleiades.stoa.org\" in placeURI:\n",
" branch = int(n.get('GANEid', -1))\n",
" else:\n",
" branch = int(n.get('placeURI').split(\"placeID=\")[1])\n",
" except:\n",
" print n \n",
" raise\n",
" leaf = int(n.get('GANEid', -1))\n",
" gane_tree[branch][leaf] = n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print len(gane_tree) # the number of GANE places"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"470\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print gane_tree.items()[0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(17067, {17067: {u'externalURIs': False, u'placeURI': u'http://pleiades.stoa.org/places/746700', u'reference': {u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 463)', u'index-page': 463, u'index-volume': 1}, u'title': u'\\u1eb8\\u0304lat', u'maxDate': 2000, u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa', u'GANEid': 17067, u'periods': [u'Modern Middle East'], u'extent': {u'type': u'Point', u'coordinates': [34.55, 29.3]}, u'nameTransliterated': False, u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig', u'minDate': 1918}})\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def in_pleiades(args):\n",
" # is a GANE place in Pleiades?\n",
" k, v = args\n",
" return \"pleiades.stoa.org\" in v[k]['placeURI']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = gane_tree[13]\n",
"print in_pleiades((13, x))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"False\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pprint.pprint(gane_tree[x])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{12: {u'GANEid': 12,\n",
" u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa',\n",
" u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig',\n",
" u'extent': {u'coordinates': [48, 30], u'type': u'Point'},\n",
" u'externalURIs': False,\n",
" u'maxDate': 1950,\n",
" u'minDate': 1900,\n",
" u'nameTransliterated': False,\n",
" u'periods': [u'Ottoman Decline-Mandate Middle East'],\n",
" u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',\n",
" u'reference': {u'index-page': 1,\n",
" u'index-volume': 1,\n",
" u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 1)'},\n",
" u'title': u'\\u02bfAba\\u0304d\\u0101n'},\n",
" 13: {u'GANEid': 13,\n",
" u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa',\n",
" u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig',\n",
" u'extent': {u'coordinates': [48.1, 30.2], u'type': u'Point'},\n",
" u'externalURIs': [u'http://en.wikipedia.org/wiki/Abadan,_Iran',\n",
" u'http://www.iranicaonline.org/articles/abadan'],\n",
" u'maxDate': 2000,\n",
" u'minDate': 819,\n",
" u'nameTransliterated': [u'Abadan'],\n",
" u'periods': [u'Samanid-Ghaznavid Iran',\n",
" u'Safavid Middle East',\n",
" u'Ottoman Decline-Mandate Middle East',\n",
" u'Modern Middle East'],\n",
" u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',\n",
" u'reference': {u'index-page': 1,\n",
" u'index-volume': 1,\n",
" u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 1)'},\n",
" u'title': u'\\u0100b\\u0101d\\u0101n'},\n",
" 78: {u'GANEid': 78,\n",
" u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa',\n",
" u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig',\n",
" u'extent': {u'coordinates': [48.15, 30.15], u'type': u'Point'},\n",
" u'externalURIs': False,\n",
" u'maxDate': 1950,\n",
" u'minDate': 750,\n",
" u'nameTransliterated': [u'Abbadan', u\"'Abbadan\"],\n",
" u'periods': [u'Abassid Middle East',\n",
" u'Samanid-Ghaznavid Iran',\n",
" u'Seljuq-Khwarezmian Middle East',\n",
" u'Khwarezmian Middle East',\n",
" u'1200 BC Middle East',\n",
" u'Mongol Middle East',\n",
" u'Ilkhanate Middle East',\n",
" u'Timurid Middle East',\n",
" u'Ottoman Decline-Mandate Middle East'],\n",
" u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',\n",
" u'reference': {u'index-page': 3,\n",
" u'index-volume': 1,\n",
" u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 3)'},\n",
" u'title': u'\\u02bfAbb\\u0101d\\u0101n'}}\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for k, v in x.items():\n",
" print k, v['title'], v['nameTransliterated']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"12 \u02bfAba\u0304d\u0101n False\n",
"13 \u0100b\u0101d\u0101n [u'Abadan']\n",
"78 \u02bfAbb\u0101d\u0101n [u'Abbadan', u\"'Abbadan\"]\n"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment