Last active
June 3, 2018 05:44
-
-
Save willismonroe/e3dbc9ba0ee834befae82fb641535783 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json, pprint\n", | |
"from oracc_reader import ORACC_text_reader" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"o \n", | |
"o 1 𒁹 𒀸 𒌗𒉈 𒀭𒅎 𒅗𒋙 𒊒𒈠\n", | |
" 1 ina {iti}NE {d}IM GU₃-šu₂ ŠUB-ma\n", | |
" 1 ina Abi Adad rigimšu iddīma\n", | |
"o 2 𒌓 𒋙 𒀭 𒋩𒉣 𒉏𒄈 𒅁𒍮\n", | |
" UD ŠU₂ AN ŠUR-nun NIM.GIR₂ ib-riq₂\n", | |
" ūmu erpi šamû iznun birqu ibriq\n", | |
"o 3 𒀀𒈨𒌍 𒀸 𒅂 𒇲𒈨𒌍\n", | |
" A-MEŠ ina IDIM LA₂-MEŠ\n", | |
" mê ina nagbi imaṭṭû\n", | |
"o 4 𒁹 𒀸 𒌓 𒆷 𒅕𒁉 𒀭𒅎 𒄑𒋛\n", | |
" 1 ina UD la er-pi₂ {d}IM is-si\n", | |
" 1 ina ūme lā erpi Adad issi\n", | |
"o 5 𒁕𒌝𒈠𒌅 𒆠𒈫 𒋢𒅥 𒀸 𒆳 𒅅\n", | |
" da-um-ma-tu KI.MIN SU.KU₂ ina KUR GAL₂\n", | |
" daʾummatu KI.MIN bubūtu ina māti ibašši\n", | |
"o 6 𒀸 𒌋𒅗 𒆷 𒂅𒌒 𒍜 𒀭𒉌𒄿\n", | |
" ina UGU la ṭu-ub UZU an-ni-i\n", | |
" ina muhhi lā ṭūb šīri annî\n", | |
"o 7 𒈗 𒁁𒉌 𒋬 𒊮𒁉𒋙 𒆷 𒄿𒁕𒁍𒌒\n", | |
" LUGAL be-li₂ TA@v ŠA₃-bi-šu₂ la i-da-bu-ub\n", | |
" šarru bēlī issi libbīšu lā idabbub\n", | |
"o 8 𒄯𒍮 𒈬𒀭𒈾 𒋗𒌑\n", | |
" mur-ṣu MU.AN.NA šu-u₂\n", | |
" murṣu šattu šû\n", | |
"o 9 𒌦𒈨𒌍 𒄠𒈥 𒈥𒍮𒌋𒉌\n", | |
" UN-MEŠ am-mar mar-ṣu-u-ni\n", | |
" nišē ammar marṣūni\n", | |
"o 10 𒃮𒁍 𒂄𒈬\n", | |
" gab-bu šul-mu\n", | |
" gabbu šulmu\n", | |
"o 11 𒌅𒊏𒈠 𒈗 𒁁𒉌\n", | |
" tu-ra-ma LUGAL be-li₂\n", | |
" tūrāma šarru bēlī\n", | |
"o 12 𒊭 𒉺𒈛 𒀭𒈨𒌍 𒋗𒌅𒌋𒉌\n", | |
" ša pa-lih₃ DINGIR-MEŠ šu-tu-u-ni\n", | |
" ša pālih ilāni šūtūni\n", | |
"o 13 𒌓𒈬 𒅇 𒈬𒋙 𒀭𒈨𒌍 𒌑𒉌𒇻𒌋𒉌\n", | |
" UD-mu u₃ mu-šu₂ DINGIR-MEŠ u₂-ṣal-lu-u-ni\n", | |
" ūmu u mūšu ilāni uṣallûni\n", | |
"r \n", | |
"r 1 𒆤𒌅𒌑 𒈨𒈨𒉌\n", | |
" kit-tu-u₂ me-me-ni\n", | |
" kittû memmēni\n", | |
"r 2 𒀀𒈾 𒈗 𒂗𒅀 𒅇 𒆰𒋙 𒅋𒆷𒅗\n", | |
" a-na LUGAL EN-ia u₃ NUMUN-šu₂ il-la-ka\n", | |
" ana šarri bēlīya u zarʾīšu[seed(s)]N illāka\n", | |
"r 3 𒄿𒇻𒌓𒌅 𒀀𒋓𒌓\n", | |
" i-lu-ut-tu a-šir-tu₂\n", | |
" iluttu aširtu\n", | |
"r 4 𒁲𒂊𒄿𒆥\n", | |
" de-e-i-qi\n", | |
" deʾīqi\n", | |
"r 5 𒋗𒌑 𒆠𒄿 𒀭𒉌𒄿 𒋡𒀀𒁉\n", | |
" šu-u₂ ki-i an-ni-i qa-a-bi\n", | |
" šû kî annî qābi\n", | |
"r 6 𒈠𒃲 𒀀𒊺𒅕 𒌓𒈨𒌍𒋙 𒆸𒁕𒈨𒌍\n", | |
" ma-gal a-še-er UD-MEŠ-šu₂ LUGUD₂.DA-MEŠ\n", | |
" magal ašer ūmēšu kurûti\n", | |
"r 7 𒅎𒁕𒈾𒊏𒊍 𒌓𒈨𒌍𒋙 𒁍𒁕𒈨𒌍\n", | |
" im-da-na-ra-aṣ UD-MEŠ-šu₂ GID₂.DA-MEŠ\n", | |
" imdanarraṣ ūmēšu arkūti\n", | |
"r 8 𒊭 𒁹𒀭𒌋𒐊𒈬𒄰𒌍\n", | |
" ša {1}{d}15--MU-KAM-eš\n", | |
" ša Issar-šumu-ereš\n" | |
] | |
} | |
], | |
"source": [ | |
"with open('P336558.json') as f:\n", | |
" json_text = f.read()\n", | |
"tr = ORACC_text_reader(json_text)\n", | |
"for c, t, n in zip(tr.output_cuneiform(),\n", | |
" tr.output_translit(with_line_headers=False),\n", | |
" tr.output_norm(with_line_headers=False)):\n", | |
" line_header = ' '.join(c.split()[:2])\n", | |
" spacing = ' ' * len(line_header)\n", | |
" print(line_header + ' ' + ' '.join(c.split()[2:]))\n", | |
" if t != '': print(spacing + t)\n", | |
" if n != '': print(spacing + n)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json, pprint | |
class ORACC_text_reader: | |
def __init__(self, json_string, DEBUG=False): | |
self.DEBUG = DEBUG | |
self.data = json.loads(json_string) | |
try: | |
for node in self.data['cdl'][0]['cdl']: | |
if 'cdl' in node.keys(): | |
self.text = node['cdl'][0]['cdl'] | |
except: | |
if self.DEBUG: pprint.pprint(self.data) | |
def output_translit(self, with_line_headers=True): | |
output = [] | |
line = 'o' if with_line_headers else '' | |
for node in self.text: | |
if node['node'] == 'd' and 'label' in node.keys(): | |
output.append(line) | |
line = node['label'] if with_line_headers else '' | |
elif node['node'] == 'l': | |
line += ' ' + node['frag'] | |
output.append(line) | |
return output | |
def output_norm(self, with_line_headers=True): | |
output = [] | |
line = 'o' if with_line_headers else '' | |
for node in self.text: | |
if node['node'] == 'd' and 'label' in node.keys(): | |
output.append(line) | |
line = node['label'] if with_line_headers else '' | |
elif node['node'] == 'l': | |
if 'norm' in node['f'].keys(): | |
line += ' ' + node['f']['norm'] | |
else: | |
line += ' ' + node ['f']['form'] | |
output.append(line) | |
return output | |
def output_sense(self, with_line_headers=True): | |
output = [] | |
line = 'o' if with_line_headers else '' | |
for node in self.text: | |
if node['node'] == 'd' and 'label' in node.keys(): | |
output.append(line) | |
line = node['label'] if with_line_headers else '' | |
elif node['node'] == 'l': | |
if 'sense' in node['f'].keys(): | |
line += ' ' + node['f']['sense'] | |
else: | |
line += ' ' + node ['f']['form'] | |
output.append(line) | |
return output | |
def output_cuneiform(self, with_line_headers=True): | |
output = [] | |
line = 'o' if with_line_headers else '' | |
for node in self.text: | |
if node['node'] == 'd' and 'label' in node.keys(): | |
output.append(line) | |
line = node['label'] if with_line_headers else '' | |
elif node['node'] == 'l': | |
translit = node['frag'] | |
gdl = node['f']['gdl'] | |
if len(gdl) == 1: | |
# single gdl | |
sign = '' | |
if 'group' in gdl[0].keys(): | |
group = gdl[0]['group'] | |
for el in group: | |
if 'gdl_utf8' in el.keys(): | |
sign += el['gdl_utf8'] | |
else: | |
sign += el['seq'][0]['gdl_utf8'] | |
else: | |
sign = gdl[0]['gdl_utf8'] | |
if self.DEBUG: print("Single gdl ✓ {} = {}".format(translit, sign)) | |
line += ' ' + sign | |
elif len(gdl) > 1: | |
sign = '' | |
for el in gdl: | |
if 'gdl_utf8' in el.keys(): | |
sign += el['gdl_utf8'] | |
elif 'seq' in el.keys(): | |
for seq in el['seq']: | |
sign += seq['gdl_utf8'] | |
elif 'group' in el.keys(): | |
for el2 in el['group']: | |
sign += el2['gdl_utf8'] | |
else: | |
if self.DEBUG: print("Error with multi-gdl ✗ {} = {}".format(translit, sign)) | |
pprint.pprint(gdl) | |
if self.DEBUG: print("Multi-gdl ✓ {} = {}".format(translit, sign)) | |
line += ' ' + sign | |
else: | |
if self.DEBUG: print("Can't process ✗") | |
pprint.pprint(node) | |
print() | |
output.append(line) | |
return output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compare with: http://oracc.museum.upenn.edu/saao/saa08/P336558/html