Last active
February 2, 2022 14:19
-
-
Save tomkralidis/9d5369b2de2994f55b6c3cd64a149932 to your computer and use it in GitHub Desktop.
GeoCRIS metadata generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gml="http://www.opengis.net/gml" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gco="http://www.isotc211.org/2005/gco" xsi:schemaLocation="http://www.isotc211.org/2005/gmd http://www.isotc211.org/2005/gmd/gmd.xsd"> | |
<gmd:fileIdentifier> | |
<gco:CharacterString>{{ record['identifier'] }}</gco:CharacterString> | |
</gmd:fileIdentifier> | |
<gmd:language> | |
<gco:CharacterString>{{ record['language'] }}</gco:CharacterString> | |
</gmd:language> | |
<gmd:characterSet> | |
<gmd:MD_CharacterSetCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode" codeListValue="utf8">utf8</gmd:MD_CharacterSetCode> | |
</gmd:characterSet> | |
<gmd:hierarchyLevel> | |
<gmd:MD_ScopeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ScopeCode" codeListValue="{{ record['type'] }}">{{ record['type'] }}</gmd:MD_ScopeCode> | |
</gmd:hierarchyLevel> | |
<gmd:contact> | |
<gmd:CI_ResponsibleParty> | |
<gmd:organisationName> | |
<gco:CharacterString>{{ record['organization'] }}</gco:CharacterString> | |
</gmd:organisationName> | |
<gmd:role> | |
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact">pointOfContact</gmd:CI_RoleCode> | |
</gmd:role> | |
</gmd:CI_ResponsibleParty> | |
</gmd:contact> | |
<gmd:dateStamp> | |
<gco:DateTime>{{ record['insert_date'] }}</gco:DateTime> | |
</gmd:dateStamp> | |
<gmd:metadataStandardName> | |
<gco:CharacterString>ISO 19115:2003 - Geographic information - Metadata</gco:CharacterString> | |
</gmd:metadataStandardName> | |
<gmd:metadataStandardVersion> | |
<gco:CharacterString>ISO 19115:2003</gco:CharacterString> | |
</gmd:metadataStandardVersion> | |
<gmd:dataSetURI> | |
<gco:CharacterString>{{ record['dataset'] }}</gco:CharacterString> | |
</gmd:dataSetURI> | |
<gmd:referenceSystemInfo> | |
<gmd:MD_ReferenceSystem> | |
<gmd:referenceSystemIdentifier> | |
<gmd:RS_Identifier> | |
<gmd:code> | |
<gco:CharacterString>{{ record['crs'][1] }}</gco:CharacterString> | |
</gmd:code> | |
<gmd:codeSpace> | |
<gco:CharacterString>{{ record['crs'][0] }}</gco:CharacterString> | |
</gmd:codeSpace> | |
<gmd:version> | |
<gco:CharacterString>6.11</gco:CharacterString> | |
</gmd:version> | |
</gmd:RS_Identifier> | |
</gmd:referenceSystemIdentifier> | |
</gmd:MD_ReferenceSystem> | |
</gmd:referenceSystemInfo> | |
<gmd:identificationInfo> | |
<gmd:MD_DataIdentification> | |
<gmd:citation> | |
<gmd:CI_Citation> | |
<gmd:title> | |
<gco:CharacterString>{{ record['title'] }}</gco:CharacterString> | |
</gmd:title> | |
<gmd:date> | |
<gmd:CI_Date> | |
<gmd:date> | |
<gco:Date>{{ record['date_creation'] }}</gco:Date> | |
</gmd:date> | |
<gmd:dateType> | |
<gmd:CI_DateTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="creation">creation</gmd:CI_DateTypeCode> | |
</gmd:dateType> | |
</gmd:CI_Date> | |
</gmd:date> | |
<gmd:date> | |
<gmd:CI_Date> | |
<gmd:date> | |
<gco:Date>{{ record['date_revision'] }}</gco:Date> | |
</gmd:date> | |
<gmd:dateType> | |
<gmd:CI_DateTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="revision">revision</gmd:CI_DateTypeCode> | |
</gmd:dateType> | |
</gmd:CI_Date> | |
</gmd:date> | |
<gmd:date> | |
<gmd:CI_Date> | |
<gmd:date> | |
<gco:Date>{{ record['date_publication'] }}</gco:Date> | |
</gmd:date> | |
<gmd:dateType> | |
<gmd:CI_DateTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="publication">publication</gmd:CI_DateTypeCode> | |
</gmd:dateType> | |
</gmd:CI_Date> | |
</gmd:date> | |
<gmd:presentationForm> | |
<gmd:CI_PresentationFormCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_PresentationFormCode" codeListValue="mapDigital">mapDigital</gmd:CI_PresentationFormCode> | |
</gmd:presentationForm> | |
</gmd:CI_Citation> | |
</gmd:citation> | |
<gmd:abstract> | |
<gco:CharacterString>{{ record['abstract'] }}</gco:CharacterString> | |
</gmd:abstract> | |
<gmd:status> | |
<gmd:MD_ProgressCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ProgressCode" codeListValue="onGoing">onGoing</gmd:MD_ProgressCode> | |
</gmd:status> | |
<gmd:contact> | |
<gmd:CI_ResponsibleParty> | |
<gmd:organisationName> | |
<gco:CharacterString>{{ record['organization'] }}</gco:CharacterString> | |
</gmd:organisationName> | |
<gmd:role> | |
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact">pointOfContact</gmd:CI_RoleCode> | |
</gmd:role> | |
</gmd:CI_ResponsibleParty> | |
</gmd:contact> | |
<gmd:contact> | |
<gmd:CI_ResponsibleParty> | |
<gmd:organisationName> | |
<gco:CharacterString>{{ record['creator'] }}</gco:CharacterString> | |
</gmd:organisationName> | |
<gmd:role> | |
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="creator">creator</gmd:CI_RoleCode> | |
</gmd:role> | |
</gmd:CI_ResponsibleParty> | |
</gmd:contact> | |
<gmd:contact> | |
<gmd:CI_ResponsibleParty> | |
<gmd:organisationName> | |
<gco:CharacterString>{{ record['publisher'] }}</gco:CharacterString> | |
</gmd:organisationName> | |
<gmd:role> | |
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="publisher">publisher</gmd:CI_RoleCode> | |
</gmd:role> | |
</gmd:CI_ResponsibleParty> | |
</gmd:contact> | |
<gmd:contact> | |
<gmd:CI_ResponsibleParty> | |
<gmd:organisationName> | |
<gco:CharacterString>{{ record['contributor'] }}</gco:CharacterString> | |
</gmd:organisationName> | |
<gmd:role> | |
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="contributor">contributor</gmd:CI_RoleCode> | |
</gmd:role> | |
</gmd:CI_ResponsibleParty> | |
</gmd:contact> | |
<gmd:resourceMaintenance> | |
<gmd:MD_MaintenanceInformation> | |
<gmd:maintenanceAndUpdateFrequency> | |
<gmd:MD_MaintenanceFrequencyCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_MaintenanceFrequencyCode" codeListValue="continual">continual</gmd:MD_MaintenanceFrequencyCode> | |
</gmd:maintenanceAndUpdateFrequency> | |
</gmd:MD_MaintenanceInformation> | |
</gmd:resourceMaintenance> | |
<gmd:resourceFormat> | |
<gmd:MD_Format> | |
<gmd:name> | |
<gco:CharacterString>{{ record['format'] }}</gco:CharacterString> | |
</gmd:name> | |
<gmd:version> | |
<gco:CharacterString gco:nilReason="missing"/> | |
</gmd:version> | |
</gmd:MD_Format> | |
</gmd:resourceFormat> | |
<gmd:descriptiveKeywords> | |
<gmd:MD_Keywords> | |
{% for kw in record['keywords'] %} | |
<gmd:keyword> | |
<gco:CharacterString>{{ kw }}</gco:CharacterString> | |
</gmd:keyword> | |
{% endfor %} | |
<gmd:type> | |
<gmd:MD_KeywordTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_KeywordTypeCode" codeListValue="theme">theme</gmd:MD_KeywordTypeCode> | |
</gmd:type> | |
</gmd:MD_Keywords> | |
</gmd:descriptiveKeywords> | |
<gmd:descriptiveKeywords> | |
<gmd:MD_Keywords> | |
<gmd:keyword> | |
<gco:CharacterString>{{ record['country']['name'] }}</gco:CharacterString> | |
</gmd:keyword> | |
<gmd:type> | |
<gmd:MD_KeywordTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_KeywordTypeCode" codeListValue="place">place</gmd:MD_KeywordTypeCode> | |
</gmd:type> | |
</gmd:MD_Keywords> | |
</gmd:descriptiveKeywords> | |
<gmd:spatialRepresentationType> | |
<gmd:MD_SpatialRepresentationTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_SpatialRepresentationTypeCode" codeListValue="{{ record['data_type'] }}">{{ record['data_type'] }}</gmd:MD_SpatialRepresentationTypeCode> | |
</gmd:spatialRepresentationType> | |
<gmd:language> | |
<gco:CharacterString>{{ record['lanaguage'] }}</gco:CharacterString> | |
</gmd:language> | |
<gmd:characterSet> | |
<gmd:MD_CharacterSetCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode" codeListValue="utf8">utf8</gmd:MD_CharacterSetCode> | |
</gmd:characterSet> | |
<gmd:topicCategory> | |
<gmd:MD_TopicCategoryCode>{{ record['category']['name'] }}</gmd:MD_TopicCategoryCode> | |
</gmd:topicCategory> | |
<gmd:extent> | |
<gmd:EX_Extent> | |
<gmd:geographicElement> | |
<gmd:EX_GeographicBoundingBox> | |
<gmd:westBoundLongitude> | |
<gco:Decimal>{{ record['country']['bbox'][0] }}</gco:Decimal> | |
</gmd:westBoundLongitude> | |
<gmd:eastBoundLongitude> | |
<gco:Decimal>{{ record['country']['bbox'][2] }}</gco:Decimal> | |
</gmd:eastBoundLongitude> | |
<gmd:southBoundLatitude> | |
<gco:Decimal>{{ record['country']['bbox'][1] }}</gco:Decimal> | |
</gmd:southBoundLatitude> | |
<gmd:northBoundLatitude> | |
<gco:Decimal>{{ record['country']['bbox'][3] }}</gco:Decimal> | |
</gmd:northBoundLatitude> | |
</gmd:EX_GeographicBoundingBox> | |
</gmd:geographicElement> | |
</gmd:EX_Extent> | |
</gmd:extent> | |
<gmd:extent> | |
<gmd:EX_Extent> | |
<gmd:temporalElement> | |
<gmd:EX_TemporalExtent> | |
<gmd:extent> | |
<gml:TimePeriod gml:id="T_01"> | |
<gml:beginPosition>{{ record['time_begin'] }}</gml:beginPosition> | |
<gml:endPosition>{{ record['time_end'] }}</gml:endPosition> | |
</gml:TimePeriod> | |
</gmd:extent> | |
</gmd:EX_TemporalExtent> | |
</gmd:temporalElement> | |
</gmd:EX_Extent> | |
</gmd:extent> | |
<gmd:supplementalInformation> | |
<gco:CharacterString>{{ record['supplemental_information'] }}</gco:CharacterString> | |
</gmd:supplementalInformation> | |
</gmd:MD_DataIdentification> | |
</gmd:identificationInfo> | |
<gmd:distributionInfo> | |
<gmd:MD_Distribution> | |
<gmd:transferOptions> | |
<gmd:MD_DigitalTransferOptions> | |
{% for link in record['links'] %} | |
<gmd:onLine> | |
<gmd:CI_OnlineResource> | |
<gmd:linkage> | |
<gmd:URL>{{ link['url'] }}</gmd:URL> | |
</gmd:linkage> | |
<gmd:protocol> | |
<gco:CharacterString>{{ link['protocol'] }}</gco:CharacterString> | |
</gmd:protocol> | |
<gmd:name> | |
<gco:CharacterString>{{ link['name'] }}</gco:CharacterString> | |
</gmd:name> | |
<gmd:description> | |
<gco:CharacterString>{{ link['description'] }}</gco:CharacterString> | |
</gmd:description> | |
</gmd:CI_OnlineResource> | |
</gmd:onLine> | |
{% endfor %} | |
</gmd:MD_DigitalTransferOptions> | |
</gmd:transferOptions> | |
</gmd:MD_Distribution> | |
</gmd:distributionInfo> | |
<gmd:dataQualityInfo> | |
<gmd:DQ_DataQuality> | |
<gmd:scope> | |
<gmd:DQ_Scope> | |
<gmd:level> | |
<gmd:MD_ScopeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ScopeCode" codeListValue="{{ record['type'] }}">{{ record['type'] }}</gmd:MD_ScopeCode> | |
</gmd:level> | |
</gmd:DQ_Scope> | |
</gmd:scope> | |
<gmd:lineage> | |
<gmd:LI_Lineage> | |
<gmd:statement> | |
<gco:CharacterString>{{ record['data_quality'] }}</gco:CharacterString> | |
</gmd:statement> | |
</gmd:LI_Lineage> | |
</gmd:lineage> | |
</gmd:DQ_DataQuality> | |
</gmd:dataQualityInfo> | |
</gmd:MD_Metadata> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import os | |
import shutil | |
import ssl | |
import sys | |
from urllib.request import urlopen | |
import uuid | |
from jinja2 import Environment, FileSystemLoader | |
import pyexcel | |
TEMPLATE_ENV = Environment( | |
loader=FileSystemLoader(['.']), | |
autoescape=True | |
) | |
TEMPLATE = TEMPLATE_ENV.get_template('full-metadata.j2') | |
WMS_URL = 'https://geocris2.cdema.org/cgi-bin/mapserv?map=/home/cdemabb/geocris-backend/mapserver/rasters.map' # noqa | |
TMS_URL = 'https://geocris2.cdema.org/mapproxy/tiles/1.0.0/{}/webmercator/{{z}}/{{x}}/{{y}}.png' # noqa | |
OAPIF_URL = 'https://geocris2.cdema.org/features/collections/{}.{}/items.json' | |
MVT_URL = 'https://geocris2.cdema.org/tegola/maps/{}/{}/{{z}}/{{x}}/{{y}}.pbf' | |
DOC_URL = 'https://geocris2.cdema.org/documents/{}' | |
def isempty(value): | |
if value in ['', None]: | |
return True | |
else: | |
return False | |
def render_template(record): | |
pass | |
def parse_spreadsheet(filename): | |
data = { | |
'categories': {}, | |
'countries': {} | |
} | |
book = pyexcel.get_book(file_name=filename) | |
categories = book['Categories'] | |
categories.name_columns_by_row(0) | |
for row in range(0, categories.number_of_rows()-1): | |
code = categories[row, 'Code'] | |
name = categories[row, 'Name'] | |
label = categories[row, 'Label'] | |
data['categories'][code] = {'name': name, 'label': label} | |
countries = book['Countries'] | |
countries.name_columns_by_row(0) | |
for row in range(0, countries.number_of_rows()-1): | |
code = countries[row, 'Code'] | |
name = countries[row, 'Name'] | |
bbox = [ | |
countries[row, 'West'], | |
countries[row, 'South'], | |
countries[row, 'East'], | |
countries[row, 'North'] | |
] | |
data['countries'][code] = { | |
'code': code, | |
'name': name, | |
'bbox': bbox | |
} | |
data['records'] = book['Metadata'] | |
data['records'].name_columns_by_row(0) | |
return data | |
def generate_metadata(d): | |
records = [] | |
for row in range(0, d['records'].number_of_rows()-1): | |
print('Processing {}'.format(d['records'][row, 'dataset'])) | |
if isempty(d['records'][row, 'dataset']): | |
print('Skipping row {}'.format(row)) | |
continue | |
m = {'links': []} | |
m['identifier'] = d['records'][row, 'identifier'] | |
if isempty(m['identifier']): | |
print(' identifier empty; generating uuid') | |
m['identifier'] = (uuid.uuid4()) | |
m['type'] = d['records'][row, 'type'] | |
m['country'] = d['countries'][d['records'][row, 'country']] | |
m['category'] = d['categories'][d['records'][row, 'category']] | |
m['data_type'] = d['records'][row, 'data_type'] | |
if m['data_type'] is not None: | |
m['data_type'] = m['data_type'].lower() | |
if m['data_type'] == 'raster': | |
m['data_type'] = 'grid' | |
m['dataset'] = d['records'][row, 'dataset'] | |
m['title'] = d['records'][row, 'title'] | |
m['abstract'] = d['records'][row, 'abstract'] | |
m['organization'] = d['records'][row, 'organization'] | |
m['keywords'] = [x.strip() for x in d['records'][row, 'keywords'].split(';')] # noqa | |
m['language'] = 'en' # d['records'][row, 'language'] | |
m['date'] = d['records'][row, 'date'] | |
m['insert_date'] = d['records'][row, 'insert_date'] | |
m['date_revision'] = d['records'][row, 'date_revision'] | |
m['date_creation'] = d['records'][row, 'date_creation'] | |
m['date_publication'] = d['records'][row, 'date_publication'] | |
m['date_modified'] = d['records'][row, 'date_modified'] | |
m['time_begin'] = d['records'][row, 'time_begin'] | |
m['time_end'] = d['records'][row, 'time_end'] | |
m['creator'] = d['records'][row, 'creator'] | |
m['publisher'] = d['records'][row, 'publisher'] | |
m['contributor'] = d['records'][row, 'credit/contributor'] | |
m['format'] = d['records'][row, 'format'] | |
m['source'] = d['records'][row, 'source'] | |
m['crs'] = d['records'][row, 'crs'].split(':') | |
m['linkage'] = d['records'][row, 'linkage'] | |
m['supplementary_information'] = d['records'][row, 'suppInfo'] | |
m['data_quality'] = d['records'][row, 'DataQual'] | |
m['classification'] = d['records'][row, 'Classification Code'] | |
# links | |
# name, description, protocol, url | |
title_sanitized = m['title'].replace(',', ' ') | |
if m['data_type'] == 'vector': | |
url = OAPIF_URL.format(m['country']['code'], m['dataset']) | |
try: | |
with urlopen(url, context=ssl.SSLContext()) as u: | |
m['links'].append({ | |
'name': m['dataset'], | |
'description': title_sanitized, | |
'protocol': 'OGC:OAPIF', | |
'url': url | |
}) | |
except Exception as err: | |
print(' OAPIF URL error: {}'.format(err)) | |
m['links'].append({ | |
'name': m['dataset'], | |
'description': title_sanitized, | |
'protocol': 'Mapbox:MVT', | |
'url': MVT_URL.format(m['country']['code'], m['dataset']) | |
}) | |
elif m['data_type'] in ['grid', 'raster']: | |
m['links'].append({ | |
'name': m['dataset'], | |
'description': title_sanitized, | |
'protocol': 'OGC:WMS', | |
'url': WMS_URL.format(m['country']['code'], m['dataset']) | |
}) | |
m['links'].append({ | |
'name': m['dataset'], | |
'description': title_sanitized, | |
'protocol': 'OSGeo:TMS', | |
'url': TMS_URL.format(m['dataset']) | |
}) | |
elif m['type'] == 'document': | |
m['links'].append({ | |
'name': m['dataset'], | |
'description': title_sanitized, | |
'protocol': 'WWW:LINK', | |
'url': DOC_URL.format(m['dataset']) | |
}) | |
if not isempty(m['linkage']): | |
m['links'].append({ | |
'name': m['dataset'], | |
'description': title_sanitized, | |
'protocol': 'WWW:LINK', | |
'url': m['linkage'] | |
}) | |
records.append(m) | |
return records | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print('Usage: {} </path/to/spreadsheet.ods>'.format(sys.argv[0])) | |
sys.exit(1) | |
print('parsing spreadsheet') | |
data = parse_spreadsheet(sys.argv[1]) | |
print('generating XML') | |
records = generate_metadata(data) | |
print('Writing XML to disk') | |
if os.path.exists('output'): | |
shutil.rmtree('output') | |
os.makedirs('output') | |
for record in records: | |
xml = TEMPLATE.render(record=record) | |
filename = 'output/{}.xml'.format(record['dataset']) | |
with io.open(filename, 'w', encoding='utf-8') as fh: | |
fh.write(xml) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Jinja2 | |
pyexcel | |
pyexcel-ods |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment