Skip to content

Instantly share code, notes, and snippets.

@Avantol13
Created October 5, 2020 18:58
Show Gist options
  • Save Avantol13/ace554bc75b3dfbf441cfd11c7c4cabe to your computer and use it in GitHub Desktop.
Save Avantol13/ace554bc75b3dfbf441cfd11c7c4cabe to your computer and use it in GitHub Desktop.
import argparse
import os
import sys
import sys
import logging
import asyncio
from gen3.tools import metadata
# TODO: Maybe this script to its own repo to be distributed properly
# Debugging:
# $ export LOGLEVEL=DEBUG
# how to run:
# $ python metadata_manifest_qa.py metadata -m 1kG.tsv -e preprod.gen3.biodatacatalyst.nhlbi.nih.gov -n dbgap
LOGLEVEL = os.environ.get("LOGLEVEL", "DEBUG").upper()
logging.basicConfig(level=LOGLEVEL, format="%(asctime)-15s [%(levelname)s] %(message)s")
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
def make_parser():
parser = argparse.ArgumentParser(
description="QA'ing data release manifests",
formatter_class=argparse.RawTextHelpFormatter,
epilog="""\
This script performs QA operations against metadata manifests.
It leverages the gen3sdk-python module to perform checks against a target
Gen3 Commons environment and make sure the metadata API records for the given namespace
match the information in the manifest, among other formatting checks.
The general syntax for this script is:
metadata_manifest_qa.py <command> <args>
e.g., metadata_manifest_qa.py metadata <manifest_file> <environment> <namespace>
The most commonly used commands are:
metadata Queries the Indexd records from a target environment to make sure the data matches what is in the manifest
e.g. $ python metadata_manifest_qa.py metadata -m 1kG.tsv -e preprod.gen3.biodatacatalyst.nhlbi.nih.gov -n dbgap
""",
)
subparsers = parser.add_subparsers()
parser_metadata = subparsers.add_parser(
"metadata",
description="Checks the indexd records to make sure we have matching data",
)
parser_metadata.add_argument(
"-m",
"--manifest",
dest="manifest",
required=True,
type=str,
help="path to the manifest file (e.g., /Users/${USER}/Downloads/1kG.tsv)",
)
parser_metadata.add_argument(
"-e",
"--env",
dest="env",
required=True,
type=str,
help="name of the environment (e.g., preprod.gen3.biodatacatalyst.nhlbi.nih.gov)",
)
parser_metadata.add_argument(
"-n",
"--namespace",
dest="namespace",
required=True,
type=str,
default="dbgap",
help="namespace of the metadata in the MDS json blob",
)
parser.set_defaults(func=verify_metadata)
return parser
def main():
parser = make_parser()
args = parser.parse_args()
if len(args._get_kwargs()) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args.func(args)
def verify_metadata(args):
manifest_file = args.manifest
target_env = args.env
namespace = args.namespace
logging.debug("manifest_file: {}".format(manifest_file))
logging.debug("target_env: {}".format(target_env))
logging.debug("namespace: {}".format(namespace))
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(
metadata.async_verify_metadata_manifest(
"https://{}".format(target_env),
manifest_file=manifest_file,
metadata_source=namespace,
)
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment