Skip to content

Instantly share code, notes, and snippets.

@tbnorth
Last active February 18, 2016 18:33
Show Gist options
  • Save tbnorth/003ac740550ba521067d to your computer and use it in GitHub Desktop.
Save tbnorth/003ac740550ba521067d to your computer and use it in GitHub Desktop.
Make ArcMap HTML metadata suitable for general use, e.g. with CKAN

Make ArcMap HTML metadata suitable for general use, e.g. with CKAN

When you view the Description of something in ArcMap or ArcCatalog, that's an HTML page you're looking at. You can right-click -> Properties to see where it's saved, something like file:///C:/Users/tbrown/AppData/Local/Temp/arc5E14/tmpDE3C.tmp.htm, and copy it for use elsewhere. It's in UTF-16 (two bytes per character) encoding, and the link to the thumbnail will probably break - this simple program below fixes both those issues. Example usage:

python arcmap2ckan.py tmpBC5A.tmp.htm GLStress5971.metadata.html GLStress5971.view.jpg

The second program, arcmap2reorder.py, can be used to reorder the fields in ArcMap metadata, so they match the order in the attribute table.

Works with python 2.7 or 3.x

"""
arcmap2ckan.py - make ArcMap HTML exported metadata suitable for
general use, e.g. as a CKAN resource.
See https://gist.github.com/tbnorth/003ac740550ba521067d
Terry Brown, [email protected], Wed Oct 28 12:59:19 2015
"""
import sys
from xml.etree import ElementTree
def error_exit():
"""
error_exit - Show help and exit
"""
print("\nusage: python arcmap2ckan.py fromfile tofile [imagefile]\n\n"
"Converts <fromfile> from UTF-16 to UTF-8 and writes to <tofile>\n"
"Also links <imagefile> as clickable thumbnail if present. I.e.\n"
"<fromfile> is a export of ArcMap HTML metadata.\n")
exit(10)
def link_image(tofile, imagefile):
"""
link_image - link `imagefile` as thumbnail for ArcMap HTML metadata.
:param str tofile: path to file containing metadata, will be re-written
:param str imagefile: path to image, typically just the file name
"""
dom = ElementTree.parse(tofile)
if imagefile is None: # just remove the element
wrapper = dom.find(".//div[@class='noThumbnail']/..")
outer = dom.find(".//div[@class='noThumbnail']/../..")
outer.remove(wrapper)
else:
img = dom.find(".//img[@id='thumbnail']")
img.set('src', imagefile)
parent = dom.find(".//img[@id='thumbnail']/..")
a = ElementTree.fromstring("<a target='_blank' href='%s'></a>" % imagefile)
parent.append(a)
parent.remove(img)
a.append(img)
dom.write(tofile)
def main():
if not (3 <= len(sys.argv) <= 4):
error_exit()
fromfile = sys.argv[1]
tofile = sys.argv[2]
imagefile = sys.argv[3] if len(sys.argv) == 4 else None
# from http://stackoverflow.com/a/8827512
with open(fromfile, 'rb') as source_file:
with open(tofile, 'w+b') as dest_file:
contents = source_file.read()
dest_file.write(contents.decode('utf-16').encode('utf-8'))
link_image(tofile, imagefile)
if __name__ == '__main__':
main()
"""
arcmap2reorder.py - re-order ArcMap metadata fields to match attribute table
Terry Brown, [email protected], Wed Oct 28 12:59:19 2015
"""
import os
import sys
from xml.etree import ElementTree
def error_exit():
"""
error_exit - Show help and exit
"""
print("\nusage: python arcmap2reorder.py metadatafile fields...\n\n"
"Without fields, lists fields in metadata\n"
"With fields, reorders fields in metadata to match\n\n")
exit(10)
def reorder_fields(fromfile, fields):
"""
reorder_fields - see error_exit()
:param str fromfile: path to metadata file, may be re-written
:param list fields: list of field names, may be empty
"""
dom = ElementTree.parse(fromfile)
if not fields:
print(' '.join(attr.text for attr in dom.findall(".//attrlabl")))
return
attribs = {i.find("attrlabl").text: i for i in dom.findall(".//attr")}
parent = dom.find(".//attr/..")
for attr in attribs.values():
parent.remove(attr)
for field in fields:
if field in attribs:
parent.append(attribs[field])
else:
print("WARNING: no field '%s' in metadata" % field)
for field in sorted(attribs):
if field in fields:
continue
print("WARNING: retaining unmentioned field '%s' in metadata" % field)
parent.append(attribs[field])
dom.write(fromfile)
def main():
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]):
error_exit()
fromfile = sys.argv[1]
fields = sys.argv[2:]
reorder_fields(fromfile, fields)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment