Created
September 11, 2017 16:26
-
-
Save Guts/179af7dd9e1ab49b710b74ff77f45268 to your computer and use it in GitHub Desktop.
Brest - Metadata fixer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: UTF-8 -*- | |
from __future__ import (absolute_import, print_function, unicode_literals) | |
# ----------------------------------------------------------------------------- | |
# Name: Metadata XML fixer | |
# Purpose: Check | |
# Python: 2.7.x | |
# Author: Julien Moura (https://github.com/Guts) | |
# Created: 08/09/2017 | |
# ----------------------------------------------------------------------------- | |
# ############################################################################# | |
# ###### Libraries ######### | |
# ########################## | |
# Standard library | |
from datetime import datetime, date | |
from copy import deepcopy # advanced copy | |
import logging | |
from os import getcwd, listdir, mkdir, path | |
import sys | |
# Python 3 backported | |
from collections import OrderedDict | |
# 3rd party libraries | |
from xml.etree import ElementTree as ET | |
# ############################################################################## | |
# ############ Globals ############ | |
# ################################# | |
ds_creation_date = "" | |
# ############################################################################# | |
# ########### Classes ############# | |
# ################################# | |
class MetadataXML19139Fixer(object): | |
def __init__(self): | |
"""Batch edit input XML files to enhance compliance to ISO19139.""" | |
self.check_folders() | |
self.ns = self.add_namespaces() | |
super(MetadataXML19139Fixer, self).__init__() | |
def check_folders(self): | |
"""Check prerequisites.""" | |
self.fold_in = path.join(getcwd(), "input") | |
self.fold_out = path.join(getcwd(), "output") | |
# input folder | |
if not path.isdir(self.fold_in): | |
try: | |
mkdir(self.fold_in, 0777) | |
except Exception as e: | |
logging.error(e) | |
sys.exit() | |
else: | |
logging.info("Input folder already exists.") | |
pass | |
# input XML files | |
if not len(listdir(self.fold_in)): | |
logging.error("Input folder was not created, so " | |
"there is not any XML file. Please " | |
"copy your XML ISO19139 files inside.") | |
sys.exit() | |
else: | |
logging.info("Files are present in input folder.") | |
# output folder | |
if not path.isdir(self.fold_out): | |
try: | |
mkdir(self.fold_out, 0777) | |
except Exception as e: | |
logging.error(e) | |
sys.exit() | |
else: | |
logging.info("Output folder already exists.") | |
pass | |
# method ending | |
return 0 | |
def add_namespaces(self): | |
"""Add ISO19139 namespaces.""" | |
ns = {"gts": "http://www.isotc211.org/2005/gts", | |
"gml": "http://www.opengis.net/gml", | |
"xsi": "http://www.w3.org/2001/XMLSchema-instance", | |
"gco": "http://www.isotc211.org/2005/gco", | |
"gmd": "http://www.isotc211.org/2005/gmd", | |
"gmx": "http://www.isotc211.org/2005/gmx", | |
"srv": "http://www.isotc211.org/2005/srv"} | |
return ns | |
def add_ds_creation_date(self): | |
"""Add metadata creation date into metadata XML. | |
Under /MD_Metadata/identificationInfo/MD_DataIdentification/citation/CI_Citation/date): | |
<date> | |
<CI_Date> | |
<date> | |
<gco:Date>2010-07-07Z</gco:Date> | |
</date> | |
<dateType> | |
<CI_DateTypeCode codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#CI_DateTypeCode" codeListValue="creation">creation</CI_DateTypeCode> | |
</dateType> | |
</CI_Date> | |
</date> | |
""" | |
for item in self.get_identificationInfo().find('gmd:MD_DataIdentification', self.ns): | |
print(item) | |
return | |
def get_identificationInfo(self): | |
"""Get XML main first levels items.""" | |
for i in self.tpl_root.findall('gmd:identificationInfo', self.ns): | |
self.idenfo = i | |
return self.idenfo | |
def iso19139(self, dest, dico_layer, dico_profil, blabla): | |
u""" | |
Export to xml file according to the ISO 19139 | |
""" | |
# opening the template | |
with open(r"data/xml/template_iso19139.xml", 'r')as iso: | |
# parser | |
template = ET.parse(iso) | |
# namespaces | |
namespaces = ET.register_namespace("gts","http://www.isotc211.org/2005/gts") | |
namespaces = ET.register_namespace("gml","http://www.opengis.net/gml") | |
namespaces = ET.register_namespace("xsi","http://www.w3.org/2001/XMLSchema-instance") | |
namespaces = ET.register_namespace("gco","http://www.isotc211.org/2005/gco") | |
namespaces = ET.register_namespace("gmd","http://www.isotc211.org/2005/gmd") | |
namespaces = ET.register_namespace("gmx","http://www.isotc211.org/2005/gmx") | |
namespaces = ET.register_namespace("srv","http://www.isotc211.org/2005/srv") | |
# getting the elements and sub-elements structure | |
tpl_root = template.getroot() | |
# transform coordinates to WGS84 for catalogs display | |
if dico_layer.get('EPSG') != u'None': | |
u""" if ogr found the ESPG code """ | |
srs84 = Transproj(epsg = int(dico_layer.get('EPSG')), | |
Xmin = dico_layer.get('Xmin'), | |
Ymin = dico_layer.get('Ymin'), | |
Xmax = dico_layer.get('Xmax'), | |
Ymax = dico_layer.get('Ymax')).tupwgs84 | |
else: | |
u""" if not... """ | |
srs84 = (dico_layer.get('Xmin'), | |
dico_layer.get('Ymin'), | |
dico_layer.get('Xmax'), | |
dico_layer.get('Ymax')) | |
# parsing and completing template structure | |
for elem in tpl_root.getiterator(): | |
# universal identifier to know how the metadata has been created | |
if elem.tag == '{http://www.isotc211.org/2005/gmd}fileIdentifier': | |
elem[0].text = "Metadator_" + \ | |
str(datetime.today()).replace(" ", "")\ | |
.replace(":", "")\ | |
.replace(".","-jm-") | |
# EPSG code | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}code': | |
elem[0].text = dico_layer.get(u'srs') + u" (EPSG : " + unicode(dico_layer.get(u'EPSG')) + ")" | |
continue | |
# standart projection EPSG | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}codeSpace': | |
elem[0].text = 'EPSG' | |
continue | |
# title | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}title': | |
elem[0].text = dico_layer.get('title') | |
continue | |
# spatial extension | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}westBoundLongitude': | |
elem[0].text = str(srs84[0]) | |
## elem[0].text = str(dico_layer['Xmin']) | |
continue | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}eastBoundLongitude': | |
elem[0].text = str(srs84[2]) | |
## elem[0].text = str(dico_layer['Xmax']) | |
continue | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}southBoundLatitude': | |
elem[0].text = str(srs84[1]) | |
## elem[0].text = str(dico_layer['Ymin']) | |
continue | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}northBoundLatitude': | |
elem[0].text = str(srs84[3]) | |
## elem[0].text = str(dico_layer['Ymax']) | |
continue | |
# description | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}abstract': | |
elem[0].text = dico_profil['description'] | |
continue | |
# update rythm | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}status': | |
elem[0].attrib['codeListValue'] = dico_profil['rythm'] | |
continue | |
# infos descriptives | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}MD_DataIdentification': | |
infos = elem | |
mtc_them = infos[5] | |
mtc_geo = infos[6] | |
lg_don = infos[10] | |
continue | |
# scale | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}MD_RepresentativeFraction': | |
scale = elem | |
scale[0][0].text = str(dico_profil.get('echelle')) | |
continue | |
# distribution | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': | |
distrib = elem | |
siteweb = distrib[0][0] | |
siteweb[0][0].text = dico_profil.get('url') | |
siteweb[2][0].text = dico_profil.get('url_label') | |
continue | |
# creation date | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}CI_Date' \ | |
and elem[1][0].attrib['codeListValue'] == 'creation': | |
elem[0][0].text = dico_layer.get(u'date_crea') | |
## crea = date.isoformat(datetime(crea[0], crea[1], crea[2])) | |
## elem[0][0].text = crea | |
# last update | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}CI_Date' \ | |
and elem[1][0].attrib['codeListValue'] == 'revision': | |
elem[0][0].text = dico_layer.get(u'date_actu') | |
## reviz = date.isoformat(datetime(reviz[0], reviz[1], reviz[2])) | |
## elem[0][0].text = reviz | |
# format | |
elif elem.tag == '{http://www.isotc211.org/2005/gmd}MD_Format': | |
elem[0][0].text = 'ESRI Shapefile' | |
tpl_cat = list(tpl_root) | |
# metadata language | |
lg_met = tpl_cat[1] | |
list(lg_met)[0].text = dico_profil[u'lang_md'] | |
# data language | |
list(lg_don)[0].text = dico_profil[u'lang_data'] | |
# metadata date | |
tpl_cat[4][0].text = str(datetime.today())[:-7] | |
# Contact | |
ct_cont = tpl_cat[3][0] | |
list(ct_cont)[0][0].text = dico_profil[u'cont_name'] | |
list(ct_cont)[1][0].text = dico_profil[u'cont_orga'] | |
list(ct_cont)[2][0].text = dico_profil[u'cont_role'] | |
cont_info = list(ct_cont)[3][0] | |
cont_phone = list(cont_info)[0] | |
list(cont_phone)[0][0][0].text = dico_profil['cont_phone'] | |
cont_adress = list(cont_info)[1] | |
adress = list(cont_adress[0])[0] | |
adress[0].text = dico_profil['cont_street'] | |
ville = list(cont_adress[0])[1] | |
ville[0].text = dico_profil['cont_city'] | |
cp = list(cont_adress[0])[2] | |
cp[0].text = dico_profil['cont_cp'] | |
pays = list(cont_adress[0])[3] | |
pays[0].text = dico_profil['cont_country'] | |
mail = list(cont_adress[0])[4] | |
mail[0].text = dico_profil['cont_mail'] | |
fonction = list(ct_cont)[4] | |
fonction[0].attrib['codeListValue'] = dico_profil['cont_func'] | |
# Responsable | |
ct_resp = list(infos[3][0]) | |
ct_resp[0][0].text = dico_profil[u'resp_name'] | |
ct_resp[1][0].text = dico_profil[u'resp_orga'] | |
ct_resp[2][0].text = dico_profil[u'resp_role'] | |
resp_info = list(ct_resp[3][0]) | |
resp_info[0][0][0][0].text = dico_profil[u'resp_phone'] | |
resp_adress = list(resp_info[1][0]) | |
resp_adress[0][0].text = dico_profil['resp_street'] | |
resp_adress[1][0].text = dico_profil['resp_city'] | |
resp_adress[2][0].text = dico_profil['resp_cp'] | |
resp_adress[3][0].text = dico_profil['resp_country'] | |
resp_adress[4][0].text = dico_profil['resp_mail'] | |
# thematics keywords | |
for i in dico_profil.get('keywords'): | |
infos.append(deepcopy(mtc_them)) | |
infos.remove(mtc_them) | |
x = 0 | |
for tem in list(infos): | |
if tem.tag == '{http://www.isotc211.org/2005/gmd}descriptiveKeywords' \ | |
and tem[0][1][0].attrib['codeListValue'] == 'theme': | |
tem[0][0][0].text = dico_profil.get('keywords')[x] | |
x = x+1 | |
# places keywords | |
for i in dico_profil.get('geokeywords'): | |
infos.append(deepcopy(mtc_geo)) | |
infos.remove(mtc_geo) | |
y = 0 | |
for tem in list(infos): | |
if tem.tag == '{http://www.isotc211.org/2005/gmd}descriptiveKeywords' \ | |
and tem[0][1][0].attrib['codeListValue'] == 'place': | |
tem[0][0][0].text = dico_profil.get('geokeywords')[y] | |
y = y+1 | |
# saving the xml file | |
template.write(path.join(dest + "/{0}_MD.html".format(dico_layer['name'][:-4])), | |
encoding='utf-8', | |
xml_declaration='version="1.0"', | |
default_namespace=namespaces, | |
method='xml') | |
# End of function | |
return template | |
# ############################################################################# | |
# ### Stand alone execution ####### | |
# ################################# | |
if __name__ == '__main__': | |
"""Test parameters for a stand-alone run.""" | |
app = MetadataXML19139Fixer() | |
print(dir(app)) | |
for xml in listdir(app.fold_in): | |
logging.info(xml) | |
print(xml) | |
# opening the input | |
with open(path.join(app.fold_in, xml), 'r') as in_xml: | |
print(in_xml) | |
# parser | |
app.tpl = ET.parse(in_xml) | |
# getting the elements and sub-elements structure | |
app.tpl_root = app.tpl.getroot() | |
# creation date | |
app.add_ds_creation_date() | |
# saving the output xml file | |
app.tpl.write(path.join(app.fold_out, xml), | |
encoding='utf-8', | |
xml_declaration='version="1.0"', | |
# default_namespace=namespaces, | |
method='xml') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment