Last active
December 13, 2015 18:38
-
-
Save sgillies/4956284 to your computer and use it in GitHub Desktop.
Digging into GANE data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "Digging into GANE data" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from collections import defaultdict\n", | |
| "import glob\n", | |
| "import json\n", | |
| "import pprint" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 9 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gane_names = []\n", | |
| "\n", | |
| "for fname in glob.glob(\"names-0*.json\"):\n", | |
| " f = open(fname)\n", | |
| " text = f.read()\n", | |
| " gane_names.extend(json.loads(text))\n", | |
| " f.close()\n", | |
| " \n", | |
| "print len(gane_names)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "600\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 10 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gane_tree = defaultdict(dict)\n", | |
| "\n", | |
| "for n in gane_names:\n", | |
| " try:\n", | |
| " placeURI = n.get('placeURI')\n", | |
| " if \"pleiades.stoa.org\" in placeURI:\n", | |
| " branch = int(n.get('GANEid', -1))\n", | |
| " else:\n", | |
| " branch = int(n.get('placeURI').split(\"placeID=\")[1])\n", | |
| " except:\n", | |
| " print n \n", | |
| " raise\n", | |
| " leaf = int(n.get('GANEid', -1))\n", | |
| " gane_tree[branch][leaf] = n" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 11 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "print len(gane_tree) # the number of GANE places" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "470\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 12 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "print gane_tree.items()[0]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "(17067, {17067: {u'externalURIs': False, u'placeURI': u'http://pleiades.stoa.org/places/746700', u'reference': {u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 463)', u'index-page': 463, u'index-volume': 1}, u'title': u'\\u1eb8\\u0304lat', u'maxDate': 2000, u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa', u'GANEid': 17067, u'periods': [u'Modern Middle East'], u'extent': {u'type': u'Point', u'coordinates': [34.55, 29.3]}, u'nameTransliterated': False, u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig', u'minDate': 1918}})\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 13 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "def in_pleiades(args):\n", | |
| " # is a GANE place in Pleiades?\n", | |
| " k, v = args\n", | |
| " return \"pleiades.stoa.org\" in v[k]['placeURI']" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 14 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "x = gane_tree[13]\n", | |
| "print in_pleiades((13, x))" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "False\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 15 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pprint.pprint(gane_tree[x])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "{12: {u'GANEid': 12,\n", | |
| " u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa',\n", | |
| " u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig',\n", | |
| " u'extent': {u'coordinates': [48, 30], u'type': u'Point'},\n", | |
| " u'externalURIs': False,\n", | |
| " u'maxDate': 1950,\n", | |
| " u'minDate': 1900,\n", | |
| " u'nameTransliterated': False,\n", | |
| " u'periods': [u'Ottoman Decline-Mandate Middle East'],\n", | |
| " u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',\n", | |
| " u'reference': {u'index-page': 1,\n", | |
| " u'index-volume': 1,\n", | |
| " u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 1)'},\n", | |
| " u'title': u'\\u02bfAba\\u0304d\\u0101n'},\n", | |
| " 13: {u'GANEid': 13,\n", | |
| " u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa',\n", | |
| " u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig',\n", | |
| " u'extent': {u'coordinates': [48.1, 30.2], u'type': u'Point'},\n", | |
| " u'externalURIs': [u'http://en.wikipedia.org/wiki/Abadan,_Iran',\n", | |
| " u'http://www.iranicaonline.org/articles/abadan'],\n", | |
| " u'maxDate': 2000,\n", | |
| " u'minDate': 819,\n", | |
| " u'nameTransliterated': [u'Abadan'],\n", | |
| " u'periods': [u'Samanid-Ghaznavid Iran',\n", | |
| " u'Safavid Middle East',\n", | |
| " u'Ottoman Decline-Mandate Middle East',\n", | |
| " u'Modern Middle East'],\n", | |
| " u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',\n", | |
| " u'reference': {u'index-page': 1,\n", | |
| " u'index-volume': 1,\n", | |
| " u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 1)'},\n", | |
| " u'title': u'\\u0100b\\u0101d\\u0101n'},\n", | |
| " 78: {u'GANEid': 78,\n", | |
| " u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig, F. Deblauwe, E. Kansa',\n", | |
| " u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\\xf6llig',\n", | |
| " u'extent': {u'coordinates': [48.15, 30.15], u'type': u'Point'},\n", | |
| " u'externalURIs': False,\n", | |
| " u'maxDate': 1950,\n", | |
| " u'minDate': 750,\n", | |
| " u'nameTransliterated': [u'Abbadan', u\"'Abbadan\"],\n", | |
| " u'periods': [u'Abassid Middle East',\n", | |
| " u'Samanid-Ghaznavid Iran',\n", | |
| " u'Seljuq-Khwarezmian Middle East',\n", | |
| " u'Khwarezmian Middle East',\n", | |
| " u'1200 BC Middle East',\n", | |
| " u'Mongol Middle East',\n", | |
| " u'Ilkhanate Middle East',\n", | |
| " u'Timurid Middle East',\n", | |
| " u'Ottoman Decline-Mandate Middle East'],\n", | |
| " u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',\n", | |
| " u'reference': {u'index-page': 3,\n", | |
| " u'index-volume': 1,\n", | |
| " u'text': u'A place name in the T\\xfcbingen Atlas Index (Vol. 1, pp. 3)'},\n", | |
| " u'title': u'\\u02bfAbb\\u0101d\\u0101n'}}\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 16 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "for k, v in x.items():\n", | |
| " print k, v['title'], v['nameTransliterated']" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "12 \u02bfAba\u0304d\u0101n False\n", | |
| "13 \u0100b\u0101d\u0101n [u'Abadan']\n", | |
| "78 \u02bfAbb\u0101d\u0101n [u'Abbadan', u\"'Abbadan\"]\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 19 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": {} | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment