Created
February 11, 2019 15:53
-
-
Save adam704a/3c20dec9194d7fef4598c077664fc90a to your computer and use it in GitHub Desktop.
Generate a GML file to import to DHIS2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Generate a GML file to import to DHIS2\n", | |
"This notebook takes care of creating a GML file to import to DHIS2 from a GML file. The GML files are generated from a shape file we got from geoconnect. To generate the file we used **ogr2ogr** as described [here](https://docs.dhis2.org/2.22/en/user/html/ch18s02.html). To get the UIDs of the organizations in DHIS2 we exported the organization unit metadata using DHIS2's import export module. In this notebook, this file is called *ntddb_orgs.json*. \n", | |
"\n", | |
"\n", | |
"Running through this entire notebook will generate 3 GML files, one for each administrative level that we have geographic information for These files are called admin1-ethiopia.xml for region level boundries, admin2-ethiopia.xml for district level boundries, and admin3-ethiopia.xml for woreada level boundries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# use this to quickly look up an ORG ID, we'll use this quite a bit\n", | |
"def admin_org_search(name, orgs):\n", | |
" return [element['id'] for element in orgs if element['name'].strip() == name]\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Generate Region level GML\n", | |
"where admin1s is used to generate the admin1 (region) GML file and admin1_ids is used for building out the admin file (district)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"\n", | |
"\n", | |
"parent = \"tfrwwy49VMx\"\n", | |
"#org_file_name = \"ntddb_orgs.json\" # https://ntddb.callawaywilson.com/\n", | |
"org_file_name = \"ntd2-orgs.json\" \n", | |
"\n", | |
"#parent = \"iuGjpnxnFbI\" # https://ntddb.callawaywilson.com/\n", | |
"\n", | |
"data = json.load(open(org_file_name))\n", | |
"admin1s = []\n", | |
"admin1_ids = []\n", | |
"for org in data[\"organisationUnits\"]:\n", | |
" if \"parent\" in org and org[\"parent\"][\"id\"] == parent:\n", | |
" admin1 = {\"name\": org[\"name\"], \"id\": org[\"id\"]}\n", | |
" admin1s.append(admin1)\n", | |
" admin1_ids.append(org[\"id\"])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Get the for Ethiopia regions in the GML file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xml.etree.ElementTree as ET\n", | |
"tree1 = ET.parse('/Users/apreston/Ethiopia/06092017_update/admin1.gml')\n", | |
"root1 = tree1.getroot()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'id': 'ORG00000001', 'name': 'Addis Ababa'},\n", | |
" {'id': 'ORG00000013', 'name': 'Afar'},\n", | |
" {'id': 'ORG00000053', 'name': 'Amhara'},\n", | |
" {'id': 'ORG00000218', 'name': 'Beneshangul Gumuz'},\n", | |
" {'id': 'ORG00000256', 'name': 'Dire Dawa'},\n", | |
" {'id': 'ORG00000267', 'name': 'Gambella'},\n", | |
" {'id': 'ORG00000299', 'name': 'Harare'},\n", | |
" {'id': 'ORG00000310', 'name': 'Oromia'},\n", | |
" {'id': 'ORG00000667', 'name': 'SNNPR'},\n", | |
" {'id': 'ORG00000842', 'name': 'Somali'},\n", | |
" {'id': 'ORG00000933', 'name': 'Tigray'}]" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"admin1s" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Build out list of admin1 elements (with special UID attribute)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"ethiopia_admin1_orgs = []\n", | |
"\n", | |
"for orgxml in root1.iter('{http://www.opengis.net/gml}featureMember'):\n", | |
" \n", | |
"\n", | |
" # filter on country = Ethopia\n", | |
" if (orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN0\").text == \"Ethiopia\"):\n", | |
" \n", | |
" org_id = admin_org_search(orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN1\").text, admin1s)[0]\n", | |
" ET.SubElement(orgxml[0], \"rti:UID\").text = org_id\n", | |
" \n", | |
" ethiopia_admin1_orgs.append(orgxml)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Write out XML" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xml.etree.cElementTree as ET\n", | |
"\n", | |
"ET.register_namespace('ogr', \"http://ogr.maptools.org/\")\n", | |
"ET.register_namespace('gml', \"http://www.opengis.net/gml\")\n", | |
"ET.register_namespace('rti', \"https://www.rti.org/\")\n", | |
"\n", | |
"\n", | |
"root = ET.Element(\"ogr:FeatureCollection\")\n", | |
"root.set(\"xmlns:xsi\", \"http://www.w3.org/2001/XMLSchema-instance\")\n", | |
"root.set(\"xsi:schemaLocation\", \"http://ogr.maptools.org/ Admin1.xsd\")\n", | |
"root.set(\"xmlns:rti\", \"https://www.rti.org/\")\n", | |
"\n", | |
"# go through and add feature members\n", | |
"for element in ethiopia_admin1_orgs:\n", | |
" root.append(element)\n", | |
"\n", | |
"tree = ET.ElementTree(root)\n", | |
"\n", | |
"tree.write(\"admin1-ethiopia.xml\", encoding='utf-8', xml_declaration=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['ORG00000001',\n", | |
" 'ORG00000013',\n", | |
" 'ORG00000053',\n", | |
" 'ORG00000218',\n", | |
" 'ORG00000256',\n", | |
" 'ORG00000267',\n", | |
" 'ORG00000299',\n", | |
" 'ORG00000310',\n", | |
" 'ORG00000667',\n", | |
" 'ORG00000842',\n", | |
" 'ORG00000933']" | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"admin1_ids" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Genderate district level GML\n", | |
"where admin2s is used to generate the admin2 (region) GML file and admin2_ids is used for building out the admin file (woreada)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'ntd2-orgs.json'" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"org_file_name" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"\n", | |
"\n", | |
"data = json.load(open(org_file_name))\n", | |
"admin2s = []\n", | |
"admin2_ids = []\n", | |
"\n", | |
"for org in data[\"organisationUnits\"]:\n", | |
" \n", | |
" if \"parent\" in org and org[\"parent\"][\"id\"] in admin1_ids:\n", | |
"\n", | |
" admin2 = {\"name\": org[\"name\"], \"id\": org[\"id\"]}\n", | |
" # print(admin2)\n", | |
" admin2s.append(admin2)\n", | |
" admin2_ids.append(org[\"id\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#admin2s" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Get the for Ethiopia regions in the GML file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xml.etree.ElementTree as ET\n", | |
"tree2 = ET.parse('/Users/apreston/Ethiopia/06092017_update/Admin2.gml')\n", | |
"root2 = tree2.getroot()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Build out list of admin2 elements (with special UID attribute)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ethiopia_admin2_orgs = []\n", | |
"\n", | |
"for orgxml in root2.iter('{http://www.opengis.net/gml}featureMember'):\n", | |
" \n", | |
" # filter on country = Ethopia\n", | |
" if (orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN0\").text == \"Ethiopia\"):\n", | |
" org_id = admin_org_search(orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN2\").text, admin2s)\n", | |
" org_name = orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN2\").text\n", | |
" region_name = orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN1\").text\n", | |
" \n", | |
" if (len(org_id) == 0):\n", | |
" print(org_name + \" is not found as an admin2 in DHIS2 in region \" \n", | |
" + region_name + \". Skipping.\")\n", | |
" else:\n", | |
" ET.SubElement(orgxml[0], \"rti:UID\").text = org_id[0]\n", | |
" ethiopia_admin2_orgs.append(orgxml)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Write out XML" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xml.etree.cElementTree as ET\n", | |
"\n", | |
"ET.register_namespace('ogr', \"http://ogr.maptools.org/\")\n", | |
"ET.register_namespace('gml', \"http://www.opengis.net/gml\")\n", | |
"ET.register_namespace('rti', \"https://www.rti.org/\")\n", | |
"\n", | |
"root = ET.Element(\"ogr:FeatureCollection\")\n", | |
"root.set(\"xmlns:xsi\", \"http://www.w3.org/2001/XMLSchema-instance\")\n", | |
"root.set(\"xsi:schemaLocation\", \"http://ogr.maptools.org/ Admin1.xsd\")\n", | |
"root.set(\"xmlns:rti\", \"https://www.rti.org/\")\n", | |
"\n", | |
"# go through and add feature members\n", | |
"for element in ethiopia_admin2_orgs:\n", | |
" root.append(element)\n", | |
"\n", | |
"tree = ET.ElementTree(root)\n", | |
"\n", | |
"tree.write(\"admin2-ethiopia.xml\", encoding='utf-8', xml_declaration=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Generate woreada level GML\n", | |
"where *admin3s* is used to generate the admin3 (woreada) GML file " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"\n", | |
"data = json.load(open(org_file_name))\n", | |
"admin3s = []\n", | |
"\n", | |
"for org in data[\"organisationUnits\"]:\n", | |
" if \"parent\" in org and org[\"parent\"][\"id\"] in admin2_ids: # make sure a parent is one of the districts\n", | |
" admin3 = {\"name\": org[\"name\"], \"id\": org[\"id\"]}\n", | |
" admin3s.append(admin3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#admin3s" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Get the for Ethiopia woreada in the GML file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xml.etree.ElementTree as ET\n", | |
"tree3 = ET.parse('/Users/apreston/Ethiopia/06092017_update/Admin3.gml')\n", | |
"root3 = tree3.getroot()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Build out list of admin3 elements (with special UID attribute)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Harare is not found as an admin3 in DHIS2 in district/region Harare/Harare. Skipping.\n", | |
"Halaba is not found as an admin3 in DHIS2 in district/region Special/SNNPR. Skipping.\n", | |
"Qerca is not found as an admin3 in DHIS2 in district/region Guji/Oromia. Skipping.\n", | |
"Jijiga Rural is not found as an admin3 in DHIS2 in district/region Faafame/Somali. Skipping.\n" | |
] | |
} | |
], | |
"source": [ | |
"ethiopia_admin3_orgs = []\n", | |
"\n", | |
"for orgxml in root3.iter('{http://www.opengis.net/gml}featureMember'):\n", | |
" \n", | |
" if (orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN0\").text == \"Ethiopia\"):\n", | |
"\n", | |
" org_name = orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN3\").text\n", | |
" parent_name = orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN2\").text\n", | |
" region_name = orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN1\").text\n", | |
" \n", | |
" org_id = admin_org_search(orgxml[0].find(\"{http://ogr.maptools.org/}ADMIN3\").text, admin3s)\n", | |
" \n", | |
" if (len(org_id) == 0):\n", | |
" print(org_name + \" is not found as an admin3 in DHIS2 in district/region \" \n", | |
" + parent_name + \"/\"\n", | |
" + region_name + \". Skipping.\")\n", | |
" else:\n", | |
" ET.SubElement(orgxml[0], \"rti:UID\").text = org_id[0]\n", | |
" ethiopia_admin3_orgs.append(orgxml)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import xml.etree.cElementTree as ET\n", | |
"\n", | |
"ET.register_namespace('ogr', \"http://ogr.maptools.org/\")\n", | |
"ET.register_namespace('gml', \"http://www.opengis.net/gml\")\n", | |
"ET.register_namespace('rti', \"https://www.rti.org/\")\n", | |
"\n", | |
"root = ET.Element(\"ogr:FeatureCollection\")\n", | |
"root.set(\"xmlns:xsi\", \"http://www.w3.org/2001/XMLSchema-instance\")\n", | |
"root.set(\"xsi:schemaLocation\", \"http://ogr.maptools.org/ Admin1.xsd\")\n", | |
"root.set(\"xmlns:rti\", \"https://www.rti.org/\")\n", | |
"\n", | |
"# go through and add feature members\n", | |
"for element in ethiopia_admin3_orgs:\n", | |
" root.append(element)\n", | |
"\n", | |
"tree = ET.ElementTree(root)\n", | |
"\n", | |
"tree.write(\"admin3-ethiopia.xml\", encoding='utf-8', xml_declaration=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 336, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"execution_count": 336, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# testing\n", | |
"org_id = admin_org_search(\"Harare\", admin3s)\n", | |
"org_id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 338, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#admin3s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment