Last active
September 11, 2017 19:38
-
-
Save kwilcox/28a6d4dd1aa581aae4210efc684e8789 to your computer and use it in GitHub Desktop.
Compare (2) ERDDAP datasest.xml files and output a new one with the active flag set correctly.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import os | |
import argparse | |
from lxml import etree | |
def main(oldxml, newxml, outfile): | |
oldtree = None | |
if os.path.isfile(oldxml): | |
oldtree = etree.parse(oldxml) | |
newtree = None | |
if os.path.isfile(newxml): | |
newtree = etree.parse(newxml) | |
if oldtree and newtree: | |
# Find removed datasets | |
datasetids = etree.XPath( | |
"//erddapDatasets/dataset/@datasetID", | |
smart_strings=False | |
) | |
oldids = set(list(datasetids(oldtree))) | |
newids = set(list(datasetids(newtree))) | |
removedids = list(oldids.difference(newids)) | |
find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]") | |
for r in removedids: | |
# **** NOTE **** | |
# This won't handle a dataset that was marked as active=false | |
# and then came back into the picture. It won't remove active=false | |
# and the datasets will be in a state of purgatory. This shouldn't | |
# happen often, if ever. | |
dnode = find_dataset(oldtree, name=r)[0] | |
if dnode.get('active') == 'false': | |
# Don't do anything, it's ready to be removed from the datasets.xml | |
pass | |
else: | |
# Deactivate the dataset | |
dnode.set('active', 'false') | |
ds = newtree.getroot() | |
ds.append(dnode) | |
if not oldtree and newtree: | |
print("No existing datasets.xml so using the newly generated one") | |
if newtree: | |
with open(outfile, 'wt') as f: | |
f.write(etree.tostring(newtree, encoding='ISO-8859-1', pretty_print=True, xml_declaration=True).decode('iso-8859-1')) | |
f.write('\n') | |
else: | |
print("Not doing anything. No new datasets.xml file.") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('oldxml', | |
help="Old datasets.xml file", | |
nargs='?') | |
parser.add_argument('newxml', | |
help="New datasets.xml file", | |
nargs='?') | |
parser.add_argument('outfile', | |
help="File to write the final datasets.xml", | |
nargs='?') | |
args = parser.parse_args() | |
main(args.oldxml, args.newxml, args.outfile) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Merge all of the individual dataset XML files in a single datasets.xml | |
FOLDER="./datasets" | |
OUTPUT_FOLDER="./output" | |
TMP="$FOLDER/datasets.tmp" | |
MASTER="$OUTPUT_FOLDER/datasets.xml" | |
PROCESSED="$FOLDER/datasets.processed" | |
if [ -e $TMP ]; then | |
rm -f $TMP # Clear file | |
fi | |
for x in $(find $FOLDER -regextype posix-extended -regex '.*[0-9]{4}_.*\.xml' -type f | sort); do | |
cat $x >> $TMP # Append contents | |
done | |
if [ -e $TMP ]; then | |
if [ -e $PROCESSED ]; then | |
rm -f $PROCESSED # Clear file | |
fi | |
python changes.py $MASTER $TMP $PROCESSED | |
rm $TMP | |
fi | |
if [ -e $PROCESSED ]; then | |
xmllint --format $PROCESSED --noblanks --output $MASTER | |
rm $PROCESSED | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment