kwilcox · September 11, 2017 19:38
diff --git a/changes.py b/changes.py
 #! /usr/bin/env python
 import os
 import argparse

 from lxml import etree


 def main(oldxml, newxml, outfile):

    oldtree = None
    if os.path.isfile(oldxml):
        oldtree = etree.parse(oldxml)

    newtree = None
    if os.path.isfile(newxml):
        newtree = etree.parse(newxml)

    if oldtree and newtree:
        # Find removed datasets
        datasetids = etree.XPath(
            "//erddapDatasets/dataset/@datasetID",
            smart_strings=False
        )

        oldids = set(list(datasetids(oldtree)))
        newids = set(list(datasetids(newtree)))
        removedids = list(oldids.difference(newids))

        find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")
        for r in removedids:
            # **** NOTE ****
            # This won't handle a dataset that was marked as active=false
            # and then came back into the picture. It won't remove active=false
            # and the datasets will be in a state of purgatory. This shouldn't
            # happen often, if ever.
            dnode = find_dataset(oldtree, name=r)[0]
            if dnode.get('active') == 'false':
                # Don't do anything, it's ready to be removed from the datasets.xml
                pass
            else:
                # Deactivate the dataset
                dnode.set('active', 'false')
                ds = newtree.getroot()
                ds.append(dnode)

    if not oldtree and newtree:
        print("No existing datasets.xml so using the newly generated one")

    if newtree:
        with open(outfile, 'wt') as f:
            f.write(etree.tostring(newtree, encoding='ISO-8859-1', pretty_print=True, xml_declaration=True).decode('iso-8859-1'))
            f.write('\n')
    else:
        print("Not doing anything. No new datasets.xml file.")


 if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    parser.add_argument('oldxml',
                        help="Old datasets.xml file",
                        nargs='?')
    parser.add_argument('newxml',
                        help="New datasets.xml file",
                        nargs='?')
    parser.add_argument('outfile',
                        help="File to write the final datasets.xml",
                        nargs='?')
    args = parser.parse_args()

    main(args.oldxml, args.newxml, args.outfile)
diff --git a/combine.sh b/combine.sh
 # Merge all of the individual dataset XML files in a single datasets.xml
 FOLDER="./datasets"
 OUTPUT_FOLDER="./output"
 TMP="$FOLDER/datasets.tmp"
 MASTER="$OUTPUT_FOLDER/datasets.xml"
 PROCESSED="$FOLDER/datasets.processed"
 if [ -e $TMP ]; then
    rm -f $TMP  # Clear file
 fi
 for x in $(find $FOLDER -regextype posix-extended -regex '.*[0-9]{4}_.*\.xml' -type f | sort); do
    cat $x >> $TMP  # Append contents
 done

 if [ -e $TMP ]; then
    if [ -e $PROCESSED ]; then
        rm -f $PROCESSED  # Clear file
    fi
    python changes.py $MASTER $TMP $PROCESSED
    rm $TMP
 fi

 if [ -e $PROCESSED ]; then
    xmllint --format $PROCESSED --noblanks --output $MASTER
    rm $PROCESSED
 fi
	#! /usr/bin/env python
	import os
	import argparse

	from lxml import etree


	def main(oldxml, newxml, outfile):

	oldtree = None
	if os.path.isfile(oldxml):
	oldtree = etree.parse(oldxml)

	newtree = None
	if os.path.isfile(newxml):
	newtree = etree.parse(newxml)

	if oldtree and newtree:
	# Find removed datasets
	datasetids = etree.XPath(
	"//erddapDatasets/dataset/@datasetID",
	smart_strings=False
	)

	oldids = set(list(datasetids(oldtree)))
	newids = set(list(datasetids(newtree)))
	removedids = list(oldids.difference(newids))

	find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")
	for r in removedids:
	# ** NOTE **
	# This won't handle a dataset that was marked as active=false
	# and then came back into the picture. It won't remove active=false
	# and the datasets will be in a state of purgatory. This shouldn't
	# happen often, if ever.
	dnode = find_dataset(oldtree, name=r)[0]
	if dnode.get('active') == 'false':
	# Don't do anything, it's ready to be removed from the datasets.xml
	pass
	else:
	# Deactivate the dataset
	dnode.set('active', 'false')
	ds = newtree.getroot()
	ds.append(dnode)

	if not oldtree and newtree:
	print("No existing datasets.xml so using the newly generated one")

	if newtree:
	with open(outfile, 'wt') as f:
	f.write(etree.tostring(newtree, encoding='ISO-8859-1', pretty_print=True, xml_declaration=True).decode('iso-8859-1'))
	f.write('\n')
	else:
	print("Not doing anything. No new datasets.xml file.")


	if __name__ == "__main__":

	parser = argparse.ArgumentParser()

	parser.add_argument('oldxml',
	help="Old datasets.xml file",
	nargs='?')
	parser.add_argument('newxml',
	help="New datasets.xml file",
	nargs='?')
	parser.add_argument('outfile',
	help="File to write the final datasets.xml",
	nargs='?')
	args = parser.parse_args()

	main(args.oldxml, args.newxml, args.outfile)
	# Merge all of the individual dataset XML files in a single datasets.xml
	FOLDER="./datasets"
	OUTPUT_FOLDER="./output"
	TMP="$FOLDER/datasets.tmp"
	MASTER="$OUTPUT_FOLDER/datasets.xml"
	PROCESSED="$FOLDER/datasets.processed"
	if [ -e $TMP ]; then
	rm -f $TMP # Clear file
	fi
	for x in $(find $FOLDER -regextype posix-extended -regex '.[0-9]{4}_.\.xml' -type f \| sort); do
	cat $x >> $TMP # Append contents
	done

	if [ -e $TMP ]; then
	if [ -e $PROCESSED ]; then
	rm -f $PROCESSED # Clear file
	fi
	python changes.py $MASTER $TMP $PROCESSED
	rm $TMP
	fi

	if [ -e $PROCESSED ]; then
	xmllint --format $PROCESSED --noblanks --output $MASTER
	rm $PROCESSED
	fi