Skip to content

Instantly share code, notes, and snippets.

@ross-spencer
Created February 4, 2019 21:27
Show Gist options
  • Save ross-spencer/1a3963579d4e7437883015e460daa953 to your computer and use it in GitHub Desktop.
Save ross-spencer/1a3963579d4e7437883015e460daa953 to your computer and use it in GitHub Desktop.
Minimal METS validation using METSRW
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import argparse
import lxml
import logging
import metsrw
import sys
LOGFORMAT = '%(asctime)-15s %(levelname)s: %(message)s'
DATEFORMAT = '%m/%d/%Y %H:%M:%S'
OFFLINE_ERR = (
"The QName value '{http://www.w3.org/1999/xlink}simpleLink' does not "
"resolve to a(n) attribute group definition.")
def validate_mets(mets):
try:
is_valid, report = metsrw.xsd_validate(mets.serialize())
if is_valid is not True:
print("Validation result. {}".format(is_valid))
print("Reason: {}".format(report))
else:
print("Schema validation via XSD is valid.")
except lxml.etree.XMLSchemaParseError as e:
if (OFFLINE_ERR) in str(e):
logging.info("We're likely offline, so ignoring validation")
else:
logging.error(e)
def load_mets(filename):
try:
mets = metsrw.METSDocument.fromfile(filename) # Reads a file
return mets
except lxml.etree.XMLSyntaxError as e:
logging.error("METS %s", e)
sys.exit(1)
except IOError as e:
logging.error("File does not exist %s", e)
sys.exit(1)
def use_mets(filename):
try:
mets = load_mets(filename[0])
except IndexError:
return
#validate_mets(mets)
for entry in mets.all_files():
print(entry.label)
print(entry)
def main():
parser = argparse.ArgumentParser(
description='metsrw client to test validation')
parser.add_argument(
'mets', metavar='M', type=str, nargs=1, help='a mets file to parse')
parser.add_argument(
'--logging', type=str, nargs="?", default="DEBUG",
help='logging level, INFO, DEBUG, WARNING, ERROR')
args = parser.parse_args()
if args.logging not in ["INFO", "DEBUG", "WARNING", "ERROR"]:
logging.basicConfig(
format=LOGFORMAT, datefmt=DATEFORMAT, level="DEBUG")
else:
logging.basicConfig(
format=LOGFORMAT, datefmt=DATEFORMAT, level=args.logging)
use_mets(args.mets)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment