Created
February 4, 2019 21:27
-
-
Save ross-spencer/1a3963579d4e7437883015e460daa953 to your computer and use it in GitHub Desktop.
Minimal METS validation using METSRW
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import print_function | |
import argparse | |
import lxml | |
import logging | |
import metsrw | |
import sys | |
LOGFORMAT = '%(asctime)-15s %(levelname)s: %(message)s' | |
DATEFORMAT = '%m/%d/%Y %H:%M:%S' | |
OFFLINE_ERR = ( | |
"The QName value '{http://www.w3.org/1999/xlink}simpleLink' does not " | |
"resolve to a(n) attribute group definition.") | |
def validate_mets(mets): | |
try: | |
is_valid, report = metsrw.xsd_validate(mets.serialize()) | |
if is_valid is not True: | |
print("Validation result. {}".format(is_valid)) | |
print("Reason: {}".format(report)) | |
else: | |
print("Schema validation via XSD is valid.") | |
except lxml.etree.XMLSchemaParseError as e: | |
if (OFFLINE_ERR) in str(e): | |
logging.info("We're likely offline, so ignoring validation") | |
else: | |
logging.error(e) | |
def load_mets(filename): | |
try: | |
mets = metsrw.METSDocument.fromfile(filename) # Reads a file | |
return mets | |
except lxml.etree.XMLSyntaxError as e: | |
logging.error("METS %s", e) | |
sys.exit(1) | |
except IOError as e: | |
logging.error("File does not exist %s", e) | |
sys.exit(1) | |
def use_mets(filename): | |
try: | |
mets = load_mets(filename[0]) | |
except IndexError: | |
return | |
#validate_mets(mets) | |
for entry in mets.all_files(): | |
print(entry.label) | |
print(entry) | |
def main(): | |
parser = argparse.ArgumentParser( | |
description='metsrw client to test validation') | |
parser.add_argument( | |
'mets', metavar='M', type=str, nargs=1, help='a mets file to parse') | |
parser.add_argument( | |
'--logging', type=str, nargs="?", default="DEBUG", | |
help='logging level, INFO, DEBUG, WARNING, ERROR') | |
args = parser.parse_args() | |
if args.logging not in ["INFO", "DEBUG", "WARNING", "ERROR"]: | |
logging.basicConfig( | |
format=LOGFORMAT, datefmt=DATEFORMAT, level="DEBUG") | |
else: | |
logging.basicConfig( | |
format=LOGFORMAT, datefmt=DATEFORMAT, level=args.logging) | |
use_mets(args.mets) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment