Skip to content

Instantly share code, notes, and snippets.

@tomkralidis
Created June 22, 2014 14:07
Show Gist options
  • Save tomkralidis/6f2533904c8a1d404d77 to your computer and use it in GitHub Desktop.
Save tomkralidis/6f2533904c8a1d404d77 to your computer and use it in GitHub Desktop.
ISO metadata keywords analyzer
# scan a directory of ISO metadata files
# and generate a keyword frequency count
from glob import glob
import os
from pprint import pprint
import sys
from lxml import etree
from owslib.iso import MD_Metadata
if len(sys.argv) < 2:
print('Usage: %s <dirpath>' % sys.argv[0])
sys.exit(1)
keyword_counts = {}
for md_file in glob('%s%s*.xml' % (sys.argv[1], os.sep)):
md = MD_Metadata(etree.parse(md_file))
for keywords in md.identification.keywords:
for keyword in keywords['keywords']:
if keyword in keyword_counts:
keyword_counts[keyword] += 1
else:
keyword_counts[keyword] = 1
print(pprint(sorted(keyword_counts.items(),
key=lambda x: x[1],
reverse=True)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment