Created
October 5, 2020 18:58
-
-
Save Avantol13/ace554bc75b3dfbf441cfd11c7c4cabe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
import sys | |
import sys | |
import logging | |
import asyncio | |
from gen3.tools import metadata | |
# TODO: Maybe this script to its own repo to be distributed properly | |
# Debugging: | |
# $ export LOGLEVEL=DEBUG | |
# how to run: | |
# $ python metadata_manifest_qa.py metadata -m 1kG.tsv -e preprod.gen3.biodatacatalyst.nhlbi.nih.gov -n dbgap | |
LOGLEVEL = os.environ.get("LOGLEVEL", "DEBUG").upper() | |
logging.basicConfig(level=LOGLEVEL, format="%(asctime)-15s [%(levelname)s] %(message)s") | |
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) | |
def make_parser(): | |
parser = argparse.ArgumentParser( | |
description="QA'ing data release manifests", | |
formatter_class=argparse.RawTextHelpFormatter, | |
epilog="""\ | |
This script performs QA operations against metadata manifests. | |
It leverages the gen3sdk-python module to perform checks against a target | |
Gen3 Commons environment and make sure the metadata API records for the given namespace | |
match the information in the manifest, among other formatting checks. | |
The general syntax for this script is: | |
metadata_manifest_qa.py <command> <args> | |
e.g., metadata_manifest_qa.py metadata <manifest_file> <environment> <namespace> | |
The most commonly used commands are: | |
metadata Queries the Indexd records from a target environment to make sure the data matches what is in the manifest | |
e.g. $ python metadata_manifest_qa.py metadata -m 1kG.tsv -e preprod.gen3.biodatacatalyst.nhlbi.nih.gov -n dbgap | |
""", | |
) | |
subparsers = parser.add_subparsers() | |
parser_metadata = subparsers.add_parser( | |
"metadata", | |
description="Checks the indexd records to make sure we have matching data", | |
) | |
parser_metadata.add_argument( | |
"-m", | |
"--manifest", | |
dest="manifest", | |
required=True, | |
type=str, | |
help="path to the manifest file (e.g., /Users/${USER}/Downloads/1kG.tsv)", | |
) | |
parser_metadata.add_argument( | |
"-e", | |
"--env", | |
dest="env", | |
required=True, | |
type=str, | |
help="name of the environment (e.g., preprod.gen3.biodatacatalyst.nhlbi.nih.gov)", | |
) | |
parser_metadata.add_argument( | |
"-n", | |
"--namespace", | |
dest="namespace", | |
required=True, | |
type=str, | |
default="dbgap", | |
help="namespace of the metadata in the MDS json blob", | |
) | |
parser.set_defaults(func=verify_metadata) | |
return parser | |
def main(): | |
parser = make_parser() | |
args = parser.parse_args() | |
if len(args._get_kwargs()) == 1: | |
parser.print_help(sys.stderr) | |
sys.exit(1) | |
args.func(args) | |
def verify_metadata(args): | |
manifest_file = args.manifest | |
target_env = args.env | |
namespace = args.namespace | |
logging.debug("manifest_file: {}".format(manifest_file)) | |
logging.debug("target_env: {}".format(target_env)) | |
logging.debug("namespace: {}".format(namespace)) | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
loop.run_until_complete( | |
metadata.async_verify_metadata_manifest( | |
"https://{}".format(target_env), | |
manifest_file=manifest_file, | |
metadata_source=namespace, | |
) | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment