Skip to content

Instantly share code, notes, and snippets.

@lukecampbell
Created May 1, 2015 16:49
Show Gist options
  • Save lukecampbell/e67543cb78ebd12768a0 to your computer and use it in GitHub Desktop.
Save lukecampbell/e67543cb78ebd12768a0 to your computer and use it in GitHub Desktop.
Script to generate CBIBS catalog
#!/usr/bin/env python
'''
build_agg.py
Builds the catalog for the CBIBS Stations
'''
import sys
from jinja2 import Template
import os
def main(args):
'''
Compiles the catalog
'''
path = args.path
files = find_files(path)
stations = get_stations(files)
records = build_records(stations)
catalog_xml = build_template(records)
with open(args.output, 'w') as f:
f.write(catalog_xml)
return 0
def find_files(path):
files = []
for prefix, dirnames, filenames in os.walk(path):
for filename in filenames:
if filename.endswith('.nc'):
files.append(os.path.join(prefix, filename))
return files
def split_filename(filename):
pieces = filename.split('/')
try:
category, station_id, year, filename = pieces[-4:]
except ValueError:
print pieces[-4:]
raise
prefix = '/'.join(pieces[:-4])
return prefix, category, station_id, year, filename
def get_stations(files):
stations = []
for filename in files:
prefix, category, station_id, year, filename = split_filename(filename)
if (prefix, category, station_id) not in stations:
stations.append((prefix, category, station_id))
return stations
def build_records(stations):
records = []
for prefix, category, station_id in stations:
record = {
"title": "%s %s Aggregation" % (station_id, category),
"dataset_id" : "%s_%s_agg" % (category.lower(), station_id),
"url_path" : "%s_%s_agg" % (category.lower(), station_id),
"path" : os.path.join(prefix, category, station_id)
}
print record
records.append(record)
return records
def build_template(records):
with open('template.xml') as f:
buf = f.read()
template = Template(buf)
return template.render(stations=records)
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser(description=main.__doc__)
parser.add_argument('path', help='Path to files')
parser.add_argument('-o', '--output', default='catalog.xml', help='Output file')
args = parser.parse_args()
sys.exit(main(args))
<?xml version="1.0" encoding="UTF-8"?>
<catalog name="THREDDS Server Default Catalog : You must change this to fit your server!"
xmlns="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
xmlns:xlink="http://www.w3.org/1999/xlink">
<service name="all" base="" serviceType="compound">
<service name="odap" serviceType="OpenDAP" base="/thredds/dodsC/" />
<service name="iso" serviceType="ISO" base="/thredds/iso/" />
<service name="sos" serviceType="SOS" base="/thredds/sos/" />
</service>
{% for station in stations %}
<dataset name="{{station.title}}" ID="{{station.dataset_id}}" urlPath="{{station.url_path}}">
<metadata inherited="true">
<serviceName>all</serviceName>
<dataType>Station</dataType>
<dataFormat>NetCDF</dataFormat>
</metadata>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<aggregation dimName="time" type="joinExisting">
<scan location="{{station.path}}" suffix=".nc" />
</aggregation>
</netcdf>
</dataset>
{% endfor %}
</catalog>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment