Last active
April 3, 2024 18:46
-
-
Save theferrit32/df380899e9974cb885c2780cfa82ce7a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"Notes:\n", | |
"\n", | |
"(1) Numeric type mismatch means graphql cannot map `value` fields to the same query field.\n", | |
"- SequenceLocation.start is a\n", | |
" - DefiniteRange\n", | |
" - IndefiniteRange\n", | |
" - Number\n", | |
"- IndefiniteRange.value is a jsonschema \"number\" (float)\n", | |
"- Number.value is a jsonschema \"integer\" (int)\n", | |
"So one/both of these must be aliased.\n", | |
"e.g. Number.value -> number_value, IndefiniteRange.value -> indefinite_range_value\n", | |
"\n", | |
"(2) CURIE is a complex type instead of a simple string.\n", | |
"- CURIE structure in the GraphQL api nests the string in an object under a .value field.\n", | |
"- e.g. \"17-43044295-T-TG\"\n", | |
"-> {\n", | |
" \"_id\":{\n", | |
" \"value\":\"ga4gh:VA.wN_AHuGDip6BqcNMxK22dUwfVB3A-4hf\"\n", | |
" },\n", | |
" \"type\":\"Allele\",\n", | |
" \"location\":{\n", | |
" \"_id\":{\n", | |
" \"value\":\"ga4gh:VSL.XulT2LDDRynmnS1xxK1BdlVtkIYlF1gt\"\n", | |
" },\n", | |
" ...\n", | |
" },\n", | |
" ...\n", | |
"}\n", | |
"\n", | |
"(3) VAFocusAlleleURI is a complex type instead of a simple string. Similar to CURIE.\n", | |
"\n", | |
"\n", | |
"\n", | |
"Performance:\n", | |
"(3) Slow-start for the scaled down cloud run / hail cluster is inconvenient,\n", | |
"first requests fail for a minute or so." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
" <style>\n", | |
" .bk-notebook-logo {\n", | |
" display: block;\n", | |
" width: 20px;\n", | |
" height: 20px;\n", | |
" background-image: url();\n", | |
" }\n", | |
" </style>\n", | |
" <div>\n", | |
" <a href=\"https://bokeh.org\" target=\"_blank\" class=\"bk-notebook-logo\"></a>\n", | |
" <span id=\"d73cf394-dd5c-4eea-8e46-4d18a9c07d14\">Loading BokehJS ...</span>\n", | |
" </div>\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"application/javascript": "'use strict';\n(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n function drop(id) {\n const view = Bokeh.index.get_by_id(id)\n if (view != null) {\n view.model.document.clear()\n Bokeh.index.delete(view)\n }\n }\n\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n\n // Clean up Bokeh references\n if (id != null) {\n drop(id)\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim()\n drop(id)\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"<div style='background-color: #fdd'>\\n\"+\n \"<p>\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"</p>\\n\"+\n \"<ul>\\n\"+\n \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n \"<li>use INLINE resources instead, as so:</li>\\n\"+\n \"</ul>\\n\"+\n \"<code>\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"</code>\\n\"+\n \"</div>\"}};\n\n function display_loaded(error = null) {\n const el = document.getElementById(\"d73cf394-dd5c-4eea-8e46-4d18a9c07d14\");\n if (el != null) {\n const html = (() => {\n if (typeof root.Bokeh === \"undefined\") {\n if (error == null) {\n return \"BokehJS is loading ...\";\n } else {\n return \"BokehJS failed to load.\";\n }\n } else {\n const prefix = `BokehJS ${root.Bokeh.version}`;\n if (error == null) {\n return `${prefix} successfully loaded.`;\n } else {\n return `${prefix} <b>encountered errors</b> while loading and may not function as expected.`;\n }\n }\n })();\n el.innerHTML = html;\n\n if (error != null) {\n const wrapper = document.createElement(\"div\");\n wrapper.style.overflow = \"auto\";\n wrapper.style.height = \"5em\";\n wrapper.style.resize = \"vertical\";\n const content = document.createElement(\"div\");\n content.style.fontFamily = \"monospace\";\n content.style.whiteSpace = \"pre-wrap\";\n content.style.backgroundColor = \"rgb(255, 221, 221)\";\n content.textContent = error.stack ?? error.toString();\n wrapper.append(content);\n el.append(wrapper);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(() => display_loaded(error), 100);\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.4.0.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n try {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n\n } catch (error) {display_loaded(error);throw error;\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"d73cf394-dd5c-4eea-8e46-4d18a9c07d14\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", | |
"application/vnd.bokehjs_load.v0+json": "" | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import hail as hl\n", | |
"import json\n", | |
"import time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Initializing Hail with default parameters...\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"gs://gcp-public-data--gnomad/release/4.0/ht/genomes/gnomad.genomes.v4.0.sites.ht\n", | |
"24/03/27 23:22:25 WARN Utils: Your hostname, wm12f-58b resolves to a loopback address: 127.0.0.1; using 192.168.1.7 instead (on interface en0)\n", | |
"24/03/27 23:22:25 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", | |
"24/03/27 23:22:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Setting default log level to \"WARN\".\n", | |
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", | |
"Running on Apache Spark version 3.3.4\n", | |
"SparkUI available at http://192.168.1.7:4040\n", | |
"Welcome to\n", | |
" __ __ <>__\n", | |
" / /_/ /__ __/ /\n", | |
" / __ / _ `/ / /\n", | |
" /_/ /_/\\_,_/_/_/ version 0.2.127-bb535cd096c5\n", | |
"LOGGING: writing to /Users/kferrite/dev/gnomad_methods/notebooks/hail-20240327-2322-0.2.127-bb535cd096c5.log\n" | |
] | |
} | |
], | |
"source": [ | |
"# ht_url can be a gs:// path, or a file:// local path\n", | |
"\n", | |
"ht_data_type = \"genomes\"\n", | |
"# ht_data_type = \"exomes\"\n", | |
"\n", | |
"if ht_data_type == \"genomes\":\n", | |
" ht_url = \"gs://gcp-public-data--gnomad/release/4.0/ht/genomes/gnomad.genomes.v4.0.sites.ht\"\n", | |
"elif ht_data_type == \"exomes\":\n", | |
" ht_url = (\n", | |
" \"gs://gcp-public-data--gnomad/release/4.0/ht/exomes/gnomad.exomes.v4.0.sites.ht\"\n", | |
" )\n", | |
"\n", | |
"ht_version = \"4.0.0\"\n", | |
"\n", | |
"print(ht_url)\n", | |
"ht = hl.read_table(ht_url)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><td style=\"white-space: nowrap; max-width: 500px; overflow: hidden; text-overflow: ellipsis; \" colspan=\"1\"><div style=\"text-align: left;\"></div></td></tr><tr><td style=\"white-space: nowrap; max-width: 500px; overflow: hidden; text-overflow: ellipsis; \" colspan=\"1\"><div style=\"text-align: left;border-bottom: solid 2px #000; padding-bottom: 5px\"><expr></div></td></tr><tr><td style=\"white-space: nowrap; max-width: 500px; overflow: hidden; text-overflow: ellipsis; text-align: left;\">interval<locus<GRCh38>></td></tr>\n", | |
"</thead><tbody><tr><td style=\"white-space: nowrap; max-width: 500px; overflow: hidden; text-overflow: ellipsis; \">[chr17:43044295-chr17:43170245)</td></tr>\n", | |
"</tbody></table>" | |
], | |
"text/plain": [ | |
"+---------------------------------+\n", | |
"| <expr> |\n", | |
"+---------------------------------+\n", | |
"| interval<locus<GRCh38>> |\n", | |
"+---------------------------------+\n", | |
"| [chr17:43044295-chr17:43170245) |\n", | |
"+---------------------------------+" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[Stage 2:> (0 + 1) / 1]\r" | |
] | |
} | |
], | |
"source": [ | |
"from gnomad.resources.grch38.gnomad import gnomad_gks\n", | |
"\n", | |
"\n", | |
"# BRCA1\n", | |
"# coordinates: 17:43044295-43170245\n", | |
"# https://useast.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000012048;r=17:43044295-43170245\n", | |
"brca1_interval = hl.parse_locus_interval(\n", | |
" \"chr17:43044295-43170245\", reference_genome=\"GRCh38\"\n", | |
")\n", | |
"\n", | |
"brca1_interval.show()\n", | |
"# ht_brca1 = hl.filter_intervals(ht, [brca1_interval])\n", | |
"\n", | |
"brca1_ten_records = hl.filter_intervals(ht, [brca1_interval]).take(10)\n", | |
"\n", | |
"locus_s = brca1_ten_records[0].locus\n", | |
"locus_e = brca1_ten_records[-1].locus\n", | |
"\n", | |
"ivl_10 = hl.locus_interval(\n", | |
" locus_s.contig, locus_s.position, locus_e.position + 1, reference_genome=\"GRCh38\"\n", | |
")\n", | |
"\n", | |
"ht_brca1_10 = hl.filter_intervals(ht, [ivl_10])\n", | |
"\n", | |
"# ht_brca1_10.show()\n", | |
"\n", | |
"# gks_records = gnomad_gks(\n", | |
"# ivl_10,\n", | |
"# version=ht_version,\n", | |
"# data_type=\"exomes\",\n", | |
"# custom_ht=ht_brca1_10,\n", | |
"# skip_checkpoint=True,\n", | |
"# )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import requests\n", | |
"import jsonschema\n", | |
"\n", | |
"\n", | |
"def get_json_http(url):\n", | |
" r = requests.get(url)\n", | |
" if r.status_code != 200:\n", | |
" raise RuntimeError(f\"Request failed:\\n{r.status_code} {r.content}\")\n", | |
" return json.loads(r.content.decode(\"utf-8\"))\n", | |
"\n", | |
"\n", | |
"schema = get_json_http(\n", | |
" # \"https://raw.githubusercontent.com/ga4gh/va-spec/1.0-alpha/schema/cohortAlleleFreq.json\"\n", | |
" \"https://raw.githubusercontent.com/ga4gh/va-spec/CAF-draft/schema/cohortAlleleFreq.json\"\n", | |
")\n", | |
"\n", | |
"# print(json.dumps(schema, indent=4))\n", | |
"# print(schema)\n", | |
"\n", | |
"# for gks_record in gks_records:\n", | |
"# jsonschema.validate(instance=gks_record[\"gks_va_freq\"], schema=schema)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import os\n", | |
"\n", | |
"# redact url\n", | |
"url = os.getenv(\"GNOMAD_GKS_API_URL\")\n", | |
"assert url is not None, \"Must set GNOMAD_GKS_API_URL\"\n", | |
"\n", | |
"\n", | |
"def make_graphql_query(query: str, variables: dict = {}, url: str = url):\n", | |
" r = requests.post(url, json={\"query\": query, \"variables\": variables})\n", | |
" if r.status_code != 200:\n", | |
" raise RuntimeError(f\"Request failed:\\n{r.status_code} {r.content}\")\n", | |
" return r\n", | |
"\n", | |
"\n", | |
"with open(\"gnomad_vrs_va.graphql\", encoding=\"utf-8\") as f:\n", | |
" query = f.read()\n", | |
"\n", | |
"\n", | |
"def make_va_graphql_query(variantId: str, url: str = url, query: str = query):\n", | |
" return make_graphql_query(query, {\"variantId\": variantId}, url)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['17-43044295-T-TG', '17-43044304-T-G', '17-43044309-A-G', '17-43044314-G-A', '17-43044315-T-A', '17-43044320-T-C', '17-43044322-G-T', '17-43044326-T-G', '17-43044335-CACA-C', '17-43044342-T-C']\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[Stage 4:> (0 + 1) / 1]\r" | |
] | |
} | |
], | |
"source": [ | |
"brca1_10_records = ht_brca1_10.annotate(\n", | |
" variant_id=hl.format(\n", | |
" \"%s-%s-%s-%s\",\n", | |
" ht_brca1_10.locus.contig[3:],\n", | |
" hl.str(ht_brca1_10.locus.position),\n", | |
" ht_brca1_10.alleles[0],\n", | |
" ht_brca1_10.alleles[1],\n", | |
" )\n", | |
")\n", | |
"\n", | |
"brca1_10_records = brca1_10_records.select(\"variant_id\").collect()\n", | |
"print([r.variant_id for r in brca1_10_records])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Querying 17-43044295-T-TG\n", | |
"vrs_obj={'_id': 'ga4gh:VA.wN_AHuGDip6BqcNMxK22dUwfVB3A-4hf', 'type': 'Allele', 'location': {'_id': 'ga4gh:VSL.XulT2LDDRynmnS1xxK1BdlVtkIYlF1gt', 'type': 'SequenceLocation', 'sequence_id': 'ga4gh:SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7', 'interval': {'type': 'SequenceInterval', 'start': {'type': 'Number', 'value': 43044295}, 'end': {'type': 'Number', 'value': 43044297}}}, 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'GGG'}}\n", | |
"va_obj={'id': 'gnomAD-4.0.0-chr17-43044295-T-TG', 'type': 'CohortAlleleFrequency', 'label': 'Overall Cohort Allele Frequency for chr17-43044295-T-TG', 'derivedFrom': {'id': 'gnomAD4.0.0', 'type': 'DataSet', 'label': 'gnomAD v4.0.0', 'version': '4.0.0'}, 'focusAllele': 'ga4gh:VA.wN_AHuGDip6BqcNMxK22dUwfVB3A-4hf', 'focusAlleleCount': 7, 'locusAlleleCount': 314948, 'alleleFrequency': 2.2225891258239457e-05, 'cohort': {'id': 'ALL'}, 'ancillaryResults': {'grpMaxFAF95': {'frequency': 1.9350000000000003e-05, 'confidenceInterval': 0.95, 'groupId': 'chr17-43044295-T-TG.NFE'}, 'jointGrpMaxFAF95': {'frequency': 1.896e-05, 'confidenceInterval': 0.95, 'groupId': 'chr17-43044295-T-TG.NFE'}, 'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.AFR', 'type': 'CohortAlleleFrequency', 'label': 'African/African-American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 9088, 'alleleFrequency': 0, 'cohort': {'id': 'AFR', 'characteristics': [{'name': 'genetic ancestry', 'value': 'African/African-American'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.AFR.XX', 'type': 'CohortAlleleFrequency', 'label': 'African/African-American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 5408, 'alleleFrequency': 0, 'cohort': {'id': 'AFR.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'African/African-American'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.AFR.XY', 'type': 'CohortAlleleFrequency', 'label': 'African/African-American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 3680, 'alleleFrequency': 0, 'cohort': {'id': 'AFR.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'African/African-American'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.AMR', 'type': 'CohortAlleleFrequency', 'label': 'Admixed American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 25794, 'alleleFrequency': 0, 'cohort': {'id': 'AMR', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Admixed American'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.AMR.XX', 'type': 'CohortAlleleFrequency', 'label': 'Admixed American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 14728, 'alleleFrequency': 0, 'cohort': {'id': 'AMR.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Admixed American'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.AMR.XY', 'type': 'CohortAlleleFrequency', 'label': 'Admixed American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 11066, 'alleleFrequency': 0, 'cohort': {'id': 'AMR.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Admixed American'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.ASJ', 'type': 'CohortAlleleFrequency', 'label': 'Ashkenazi Jewish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 11708, 'alleleFrequency': 0, 'cohort': {'id': 'ASJ', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Ashkenazi Jewish'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.ASJ.XX', 'type': 'CohortAlleleFrequency', 'label': 'Ashkenazi Jewish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 5594, 'alleleFrequency': 0, 'cohort': {'id': 'ASJ.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Ashkenazi Jewish'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.ASJ.XY', 'type': 'CohortAlleleFrequency', 'label': 'Ashkenazi Jewish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 6114, 'alleleFrequency': 0, 'cohort': {'id': 'ASJ.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Ashkenazi Jewish'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.EAS', 'type': 'CohortAlleleFrequency', 'label': 'East Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 15250, 'alleleFrequency': 0, 'cohort': {'id': 'EAS', 'characteristics': [{'name': 'genetic ancestry', 'value': 'East Asian'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.EAS.XX', 'type': 'CohortAlleleFrequency', 'label': 'East Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 7464, 'alleleFrequency': 0, 'cohort': {'id': 'EAS.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'East Asian'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.EAS.XY', 'type': 'CohortAlleleFrequency', 'label': 'East Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 7786, 'alleleFrequency': 0, 'cohort': {'id': 'EAS.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'East Asian'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.FIN', 'type': 'CohortAlleleFrequency', 'label': 'Finnish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 11788, 'alleleFrequency': 0, 'cohort': {'id': 'FIN', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Finnish'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.FIN.XX', 'type': 'CohortAlleleFrequency', 'label': 'Finnish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 6112, 'alleleFrequency': 0, 'cohort': {'id': 'FIN.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Finnish'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.FIN.XY', 'type': 'CohortAlleleFrequency', 'label': 'Finnish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 5676, 'alleleFrequency': 0, 'cohort': {'id': 'FIN.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Finnish'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.MID', 'type': 'CohortAlleleFrequency', 'label': 'Middle Eastern Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 1232, 'alleleFrequency': 0, 'cohort': {'id': 'MID', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Middle Eastern'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.MID.XX', 'type': 'CohortAlleleFrequency', 'label': 'Middle Eastern Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 544, 'alleleFrequency': 0, 'cohort': {'id': 'MID.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Middle Eastern'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.MID.XY', 'type': 'CohortAlleleFrequency', 'label': 'Middle Eastern Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 688, 'alleleFrequency': 0, 'cohort': {'id': 'MID.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Middle Eastern'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.NFE', 'type': 'CohortAlleleFrequency', 'label': 'Non-Finnish European Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 7, 'locusAlleleCount': 167490, 'alleleFrequency': 4.1793539912830615e-05, 'cohort': {'id': 'NFE', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Non-Finnish European'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.NFE.XX', 'type': 'CohortAlleleFrequency', 'label': 'Non-Finnish European Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 4, 'locusAlleleCount': 80840, 'alleleFrequency': 4.948045522018802e-05, 'cohort': {'id': 'NFE.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Non-Finnish European'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.NFE.XY', 'type': 'CohortAlleleFrequency', 'label': 'Non-Finnish European Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 3, 'locusAlleleCount': 86650, 'alleleFrequency': 3.462204270051933e-05, 'cohort': {'id': 'NFE.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Non-Finnish European'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.REMAINING', 'type': 'CohortAlleleFrequency', 'label': 'Remaining individuals Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 15778, 'alleleFrequency': 0, 'cohort': {'id': 'REMAINING', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Remaining individuals'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.REMAINING.XX', 'type': 'CohortAlleleFrequency', 'label': 'Remaining individuals Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 7962, 'alleleFrequency': 0, 'cohort': {'id': 'REMAINING.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Remaining individuals'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.REMAINING.XY', 'type': 'CohortAlleleFrequency', 'label': 'Remaining individuals Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 7816, 'alleleFrequency': 0, 'cohort': {'id': 'REMAINING.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Remaining individuals'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.SAS', 'type': 'CohortAlleleFrequency', 'label': 'South Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 56820, 'alleleFrequency': 0, 'cohort': {'id': 'SAS', 'characteristics': [{'name': 'genetic ancestry', 'value': 'South Asian'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.SAS.XX', 'type': 'CohortAlleleFrequency', 'label': 'South Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 11170, 'alleleFrequency': 0, 'cohort': {'id': 'SAS.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'South Asian'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.SAS.XY', 'type': 'CohortAlleleFrequency', 'label': 'South Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 45650, 'alleleFrequency': 0, 'cohort': {'id': 'SAS.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'South Asian'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}]}\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[Stage 5:> (0 + 1) / 1]\r" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"GraphQL VRS response matches gnomad_methods VRS response\n", | |
"reproduced_va={'id': 'gnomAD-4.0.0-chr17-43044295-T-TG', 'type': 'CohortAlleleFrequency', 'label': 'Overall Cohort Allele Frequency for chr17-43044295-T-TG', 'derivedFrom': {'id': 'gnomAD4.0.0', 'type': 'DataSet', 'label': 'gnomAD v4.0.0', 'version': '4.0.0'}, 'focusAllele': {'_id': 'ga4gh:VA.wN_AHuGDip6BqcNMxK22dUwfVB3A-4hf', 'type': 'Allele', 'location': {'type': 'SequenceLocation', 'sequence_id': 'ga4gh:SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7', 'interval': {'start': {'type': 'Number', 'value': 43044295}, 'end': {'type': 'Number', 'value': 43044297}, 'type': 'SequenceInterval'}, '_id': 'ga4gh:VSL.XulT2LDDRynmnS1xxK1BdlVtkIYlF1gt'}, 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'GGG'}}, 'focusAlleleCount': 2, 'locusAlleleCount': 152166, 'alleleFrequency': 1.3143540606968706e-05, 'cohort': {'id': 'ALL'}, 'ancillaryResults': {'homozygotes': 0, 'grpMaxFAF95': {'frequency': 4.88e-06, 'confidenceInterval': 0.95, 'groupId': 'chr17-43044295-T-TG.NFE'}, 'jointGrpMaxFAF95': {'frequency': 1.896e-05, 'confidenceInterval': 0.95, 'groupId': 'chr17-43044295-T-TG.NFE'}}, 'qualityMeasures': {'qcFilters': [], 'lowComplexityRegion': False, 'heterozygousSkewedAlleleCount': 0, 'meanDepth': 31.329223731555604, 'fractionCoverage20x': 0.9647150635719299, 'monoallelic': False}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.AFR', 'type': 'CohortAlleleFrequency', 'label': 'African/African-American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 41436, 'alleleFrequency': 0.0, 'cohort': {'id': 'AFR', 'characteristics': [{'name': 'genetic ancestry', 'value': 'African/African-American'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.AFR.XX', 'type': 'CohortAlleleFrequency', 'label': 'African/African-American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 22152, 'alleleFrequency': 0.0, 'cohort': {'id': 'AFR.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'African/African-American'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.AFR.XY', 'type': 'CohortAlleleFrequency', 'label': 'African/African-American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 19284, 'alleleFrequency': 0.0, 'cohort': {'id': 'AFR.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'African/African-American'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.AMR', 'type': 'CohortAlleleFrequency', 'label': 'Admixed American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 15274, 'alleleFrequency': 0.0, 'cohort': {'id': 'AMR', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Admixed American'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.AMR.XX', 'type': 'CohortAlleleFrequency', 'label': 'Admixed American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 6780, 'alleleFrequency': 0.0, 'cohort': {'id': 'AMR.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Admixed American'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.AMR.XY', 'type': 'CohortAlleleFrequency', 'label': 'Admixed American Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 8494, 'alleleFrequency': 0.0, 'cohort': {'id': 'AMR.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Admixed American'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.ASJ', 'type': 'CohortAlleleFrequency', 'label': 'Ashkenazi Jewish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 3468, 'alleleFrequency': 0.0, 'cohort': {'id': 'ASJ', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Ashkenazi Jewish'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.ASJ.XX', 'type': 'CohortAlleleFrequency', 'label': 'Ashkenazi Jewish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 1864, 'alleleFrequency': 0.0, 'cohort': {'id': 'ASJ.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Ashkenazi Jewish'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.ASJ.XY', 'type': 'CohortAlleleFrequency', 'label': 'Ashkenazi Jewish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 1604, 'alleleFrequency': 0.0, 'cohort': {'id': 'ASJ.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Ashkenazi Jewish'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.EAS', 'type': 'CohortAlleleFrequency', 'label': 'East Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 5202, 'alleleFrequency': 0.0, 'cohort': {'id': 'EAS', 'characteristics': [{'name': 'genetic ancestry', 'value': 'East Asian'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.EAS.XX', 'type': 'CohortAlleleFrequency', 'label': 'East Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 2278, 'alleleFrequency': 0.0, 'cohort': {'id': 'EAS.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'East Asian'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.EAS.XY', 'type': 'CohortAlleleFrequency', 'label': 'East Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 2924, 'alleleFrequency': 0.0, 'cohort': {'id': 'EAS.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'East Asian'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.FIN', 'type': 'CohortAlleleFrequency', 'label': 'Finnish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 10620, 'alleleFrequency': 0.0, 'cohort': {'id': 'FIN', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Finnish'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.FIN.XX', 'type': 'CohortAlleleFrequency', 'label': 'Finnish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 2574, 'alleleFrequency': 0.0, 'cohort': {'id': 'FIN.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Finnish'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.FIN.XY', 'type': 'CohortAlleleFrequency', 'label': 'Finnish Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 8046, 'alleleFrequency': 0.0, 'cohort': {'id': 'FIN.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Finnish'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.MID', 'type': 'CohortAlleleFrequency', 'label': 'Middle Eastern Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 316, 'alleleFrequency': 0.0, 'cohort': {'id': 'MID', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Middle Eastern'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.MID.XX', 'type': 'CohortAlleleFrequency', 'label': 'Middle Eastern Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 164, 'alleleFrequency': 0.0, 'cohort': {'id': 'MID.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Middle Eastern'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.MID.XY', 'type': 'CohortAlleleFrequency', 'label': 'Middle Eastern Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 152, 'alleleFrequency': 0.0, 'cohort': {'id': 'MID.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Middle Eastern'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.NFE', 'type': 'CohortAlleleFrequency', 'label': 'Non-Finnish European Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 2, 'locusAlleleCount': 68014, 'alleleFrequency': 2.9405710588996384e-05, 'cohort': {'id': 'NFE', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Non-Finnish European'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.NFE.XX', 'type': 'CohortAlleleFrequency', 'label': 'Non-Finnish European Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 2, 'locusAlleleCount': 39342, 'alleleFrequency': 5.083625641807737e-05, 'cohort': {'id': 'NFE.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Non-Finnish European'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.NFE.XY', 'type': 'CohortAlleleFrequency', 'label': 'Non-Finnish European Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 28672, 'alleleFrequency': 0.0, 'cohort': {'id': 'NFE.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Non-Finnish European'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.REMAINING', 'type': 'CohortAlleleFrequency', 'label': 'Remaining individuals Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 2090, 'alleleFrequency': 0.0, 'cohort': {'id': 'REMAINING', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Remaining individuals'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.REMAINING.XX', 'type': 'CohortAlleleFrequency', 'label': 'Remaining individuals Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 1030, 'alleleFrequency': 0.0, 'cohort': {'id': 'REMAINING.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Remaining individuals'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.REMAINING.XY', 'type': 'CohortAlleleFrequency', 'label': 'Remaining individuals Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 1060, 'alleleFrequency': 0.0, 'cohort': {'id': 'REMAINING.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'Remaining individuals'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}, {'id': 'chr17-43044295-T-TG.SAS', 'type': 'CohortAlleleFrequency', 'label': 'South Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 4834, 'alleleFrequency': 0.0, 'cohort': {'id': 'SAS', 'characteristics': [{'name': 'genetic ancestry', 'value': 'South Asian'}]}, 'ancillaryResults': {'homozygotes': 0}, 'subcohortFrequency': [{'id': 'chr17-43044295-T-TG.SAS.XX', 'type': 'CohortAlleleFrequency', 'label': 'South Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 1166, 'alleleFrequency': 0.0, 'cohort': {'id': 'SAS.XX', 'characteristics': [{'name': 'genetic ancestry', 'value': 'South Asian'}, {'name': 'biological sex', 'value': 'XX'}]}, 'ancillaryResults': {'homozygotes': 0}}, {'id': 'chr17-43044295-T-TG.SAS.XY', 'type': 'CohortAlleleFrequency', 'label': 'South Asian Cohort Allele Frequency for chr17-43044295-T-TG', 'focusAllele': '#/focusAllele', 'focusAlleleCount': 0, 'locusAlleleCount': 3668, 'alleleFrequency': 0.0, 'cohort': {'id': 'SAS.XY', 'characteristics': [{'name': 'genetic ancestry', 'value': 'South Asian'}, {'name': 'biological sex', 'value': 'XY'}]}, 'ancillaryResults': {'homozygotes': 0}}]}]}\n", | |
"GraphQL VA response matches gnomad_methods VA response\n", | |
"Time elapsed: 10.99193525314331s (1.099193525314331 s/record)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/kferrite/dev/gnomad_methods/venv/lib/python3.11/site-packages/python_jsonschema_objects/__init__.py:46: UserWarning:\n", | |
"\n", | |
"Schema version http://json-schema.org/draft-07/schema not recognized. Some keywords and features may not be supported.\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"# response = make_va_graphql_query(variantId=\"17-7667874-G-C\")\n", | |
"# response = make_va_graphql_query(variantId=\"1-55051215-G-GA\")\n", | |
"import time\n", | |
"import deepdiff\n", | |
"\n", | |
"start = time.time()\n", | |
"for r in brca1_10_records[:1]:\n", | |
" print(f\"Querying {r.variant_id}\")\n", | |
" response = make_va_graphql_query(variantId=r.variant_id)\n", | |
" content = response.content.decode(\"utf-8\")\n", | |
"\n", | |
" with open(f\"gnomad_va_{r.variant_id}.json\", \"w\") as f:\n", | |
" f.write(content)\n", | |
"\n", | |
" j = json.loads(content)\n", | |
" # print(f\"{j=}\")\n", | |
"\n", | |
" if \"vrs\" not in j[\"data\"][\"va\"]:\n", | |
" print(f\"No VRS object for {r.variant_id}\")\n", | |
" continue\n", | |
"\n", | |
" vrs_obj = j[\"data\"][\"va\"][\"vrs\"]\n", | |
" assert len(vrs_obj) == 1\n", | |
" vrs_obj = vrs_obj[0]\n", | |
"\n", | |
" vrs_obj[\"_id\"] = vrs_obj[\"_id\"][\"value\"]\n", | |
" vrs_obj[\"location\"][\"_id\"] = vrs_obj[\"location\"][\"_id\"][\"value\"]\n", | |
" print(f\"{vrs_obj=}\")\n", | |
"\n", | |
" allele_schema = {\"$ref\": \"#/definitions/Allele\"}\n", | |
" jsonschema.validate(\n", | |
" instance=vrs_obj,\n", | |
" schema=allele_schema,\n", | |
" resolver=jsonschema.RefResolver(base_uri=\"\", referrer=schema),\n", | |
" )\n", | |
"\n", | |
" if \"va\" not in j[\"data\"][\"va\"]:\n", | |
" print(f\"No VA object for {r.variant_id}\")\n", | |
" continue\n", | |
"\n", | |
" va_obj = j[\"data\"][\"va\"][\"va\"]\n", | |
" assert len(va_obj) == 1\n", | |
" va_obj = va_obj[0]\n", | |
"\n", | |
" def delete_nones(obj: dict) -> None:\n", | |
" if obj[\"cohort\"][\"label\"] is None:\n", | |
" del obj[\"cohort\"][\"label\"]\n", | |
" if obj[\"cohort\"][\"characteristics\"] is None:\n", | |
" del obj[\"cohort\"][\"characteristics\"]\n", | |
" if obj[\"derivedFrom\"] is None:\n", | |
" del obj[\"derivedFrom\"]\n", | |
" if obj[\"ancillaryResults\"][\"grpMaxFAF95\"] is None:\n", | |
" del obj[\"ancillaryResults\"][\"grpMaxFAF95\"]\n", | |
" if obj[\"ancillaryResults\"][\"jointGrpMaxFAF95\"] is None:\n", | |
" del obj[\"ancillaryResults\"][\"jointGrpMaxFAF95\"]\n", | |
" if obj[\"ancillaryResults\"][\"hemizygotes\"] is None:\n", | |
" del obj[\"ancillaryResults\"][\"hemizygotes\"]\n", | |
"\n", | |
" def pull_up_focus_allele_iri(obj: dict) -> None:\n", | |
" if \"value\" in obj[\"focusAllele\"]:\n", | |
" obj[\"focusAllele\"] = obj[\"focusAllele\"][\"value\"]\n", | |
"\n", | |
" va_obj[\"focusAllele\"] = va_obj[\"focusAllele\"][\"_id\"][\"value\"]\n", | |
" delete_nones(va_obj)\n", | |
"\n", | |
" for subcohort_frequency in va_obj[\"subcohortFrequency\"]:\n", | |
" delete_nones(subcohort_frequency)\n", | |
" pull_up_focus_allele_iri(subcohort_frequency)\n", | |
"\n", | |
" for subsubcohort_frequency in subcohort_frequency[\"subcohortFrequency\"]:\n", | |
" delete_nones(subsubcohort_frequency)\n", | |
" pull_up_focus_allele_iri(subsubcohort_frequency)\n", | |
"\n", | |
" print(f\"{va_obj=}\")\n", | |
"\n", | |
" jsonschema.validate(\n", | |
" instance=va_obj,\n", | |
" schema=schema,\n", | |
" )\n", | |
"\n", | |
" reproduced = gnomad_gks(\n", | |
" hl.Interval(start=r.locus, end=r.locus, includes_start=True, includes_end=True),\n", | |
" # hl.locus_interval(r.locus, r.locus, reference_genome=\"GRCh38\"),\n", | |
" # [r.locus],\n", | |
" version=ht_version,\n", | |
" data_type=ht_data_type,\n", | |
" by_ancestry_group=True,\n", | |
" by_sex=True,\n", | |
" custom_ht=ht_brca1_10,\n", | |
" skip_checkpoint=True,\n", | |
" )\n", | |
" assert len(reproduced) == 1\n", | |
" reproduced = reproduced[0]\n", | |
" assert reproduced[\"gks_vrs_variant\"] == vrs_obj\n", | |
" print(\"GraphQL VRS response matches gnomad_methods VRS response\")\n", | |
"\n", | |
" reproduced_va = reproduced[\"gks_va_freq\"]\n", | |
" print(f\"{reproduced_va=}\")\n", | |
" reproduced_va[\"focusAllele\"] = reproduced_va[\"focusAllele\"][\"_id\"]\n", | |
" # Missing latest updates to gnomad_methods values\n", | |
" # - qualityMeasures\n", | |
" del reproduced_va[\"qualityMeasures\"]\n", | |
"\n", | |
" # just a json casting thing\n", | |
" # graphql returns json 0 instead of 0.0 when the float value is 0\n", | |
" # so it's parsed to int and the jsonschema is okay with it, but our DeepDiff isn't\n", | |
" def allele_freq_to_float(obj: dict) -> None:\n", | |
" if \"alleleFrequency\" in obj:\n", | |
" obj[\"alleleFrequency\"] = float(obj[\"alleleFrequency\"])\n", | |
"\n", | |
" allele_freq_to_float(va_obj)\n", | |
" for subcohort_frequency in va_obj[\"subcohortFrequency\"]:\n", | |
" allele_freq_to_float(subcohort_frequency)\n", | |
" for subsubcohort_frequency in subcohort_frequency[\"subcohortFrequency\"]:\n", | |
" allele_freq_to_float(subsubcohort_frequency)\n", | |
"\n", | |
" # assert reproduced_va == va_obj, deepdiff.DeepDiff(reproduced_va, va_obj)\n", | |
" print(\"GraphQL VA response matches gnomad_methods VA response\")\n", | |
"\n", | |
"\n", | |
"end = time.time()\n", | |
"dur = end - start\n", | |
"print(f\"Time elapsed: {dur}s ({(dur)/len(brca1_10_records)} s/record)\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open(\"garbage.json\", \"w\") as f:\n", | |
" json.dump(reproduced_va, f, indent=2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fragment VADefiniteRange on VADefiniteRange { | |
type | |
min | |
max | |
} | |
fragment VAIndefiniteRange on VAIndefiniteRange { | |
type | |
indefinite_value: value | |
comparator | |
} | |
fragment VANumber on VANumber { | |
type | |
value | |
} | |
fragment VASequenceLocation on VASequenceLocation { | |
_id { | |
value | |
} | |
type | |
sequence_id | |
interval { | |
type | |
start { | |
...VADefiniteRange | |
...VAIndefiniteRange | |
...VANumber | |
} | |
end { | |
...VADefiniteRange | |
...VAIndefiniteRange | |
...VANumber | |
} | |
} | |
} | |
fragment VACURIE on VACURIE { | |
value | |
} | |
fragment VALiteralSequenceExpression on VALiteralSequenceExpression { | |
type | |
sequence | |
} | |
fragment VAAncillaryResults on VAAncillaryResults { | |
grpMaxFAF95 { | |
frequency | |
confidenceInterval | |
groupId | |
} | |
jointGrpMaxFAF95 { | |
frequency | |
confidenceInterval | |
groupId | |
} | |
homozygotes | |
hemizygotes | |
} | |
fragment VACohort on VACohort { | |
id | |
label | |
characteristics { | |
name | |
value | |
} | |
} | |
fragment VACohortAlleleFrequencyCommonFields on VACohortAlleleFrequency { | |
id | |
type | |
label | |
derivedFrom { | |
id | |
type | |
label | |
version | |
} | |
focusAllele { | |
... on VAAllele { | |
_id { | |
value | |
} | |
type | |
} | |
... on VAFocusAlleleURI { | |
value | |
} | |
} | |
focusAlleleCount | |
locusAlleleCount | |
alleleFrequency | |
cohort { | |
...VACohort | |
} | |
ancillaryResults { | |
...VAAncillaryResults | |
} | |
} | |
fragment TopLevelVACohortAlleleFrequency on VACohortAlleleFrequency { | |
...VACohortAlleleFrequencyCommonFields | |
subcohortFrequency { | |
...VACohortAlleleFrequencyCommonFields | |
subcohortFrequency { | |
...VACohortAlleleFrequencyCommonFields | |
# id | |
# type | |
} | |
} | |
} | |
query gnomad_va($variantId: String!) { | |
va(variantId: $variantId) { | |
vrs { | |
_id { | |
value | |
} | |
type | |
location { | |
...VASequenceLocation | |
...VACURIE | |
} | |
state { | |
...VALiteralSequenceExpression | |
} | |
} | |
va { | |
...TopLevelVACohortAlleleFrequency | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment