Created
January 18, 2021 19:25
-
-
Save eric-czech/bdff3402d27b5cccd7d8aacab0957a93 to your computer and use it in GitHub Desktop.
GWAS Simulation Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Sgkit GWAS Workflow Example\n", | |
"\n", | |
"This simulated workflow generates data to emulate large-scale GWAS regressions.\n", | |
"\n", | |
"See https://github.com/pystatgen/sgkit/issues/438 for more details." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import xarray as xr\n", | |
"import dask.array as da\n", | |
"from dask.array import stats\n", | |
"import fsspec\n", | |
"fs = fsspec.filesystem('gs')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'gs://sgkit-dev/optimization/gwas/sim_ds_1000_1000_3.zarr'" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Debug settings\n", | |
"n = 1000 # Number of variants (i.e. genomic locations)\n", | |
"m = 1000 # Number of individuals (i.e. people)\n", | |
"c = 3 # Number of covariates (i.e. confounders)\n", | |
"\n", | |
"# Representative settings for single (small) UK Biobank chromosome:\n", | |
"# n, m, c = 141910, 365941, 25\n", | |
"\n", | |
"path = f\"gs://sgkit-dev/optimization/gwas/sim_ds_{n}_{m}_{c}.zarr\"\n", | |
"path" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Saving simulated data to gs://sgkit-dev/optimization/gwas/sim_ds_1000_1000_3.zarr\n" | |
] | |
} | |
], | |
"source": [ | |
"# Create the dataset on cloud storage if not already present\n", | |
"if not fs.exists(path):\n", | |
" rs = da.random.RandomState(0)\n", | |
" XL, BL = rs.randint(0, 128, size=(n, m), chunks=(5216, 5792)), da.array([1] + [0] * (m - 1))\n", | |
" XC, BC = rs.normal(size=(m, c)), rs.normal(size=(c,))\n", | |
" Y = (XL * BL).sum(axis=0) + XC @ BC + rs.normal(scale=.001, size=m)\n", | |
" ds = xr.Dataset(dict(\n", | |
" # This is a proxy for discretized allele dosages (between 0 and 2)\n", | |
" XL=(('variants', 'samples'), (2 * XL / 127).astype('f2')),\n", | |
" # This value represents covariates for samples, e.g. age, sex, ancestry, etc.\n", | |
" XC=(('samples', 'covariates'), XC.astype('f4')),\n", | |
" # This is the outcome on which all variant data will be regressed separately\n", | |
" Y=(('samples', 'outcomes'), Y[:, np.newaxis].astype('f4')),\n", | |
" ))\n", | |
" print(f'Saving simulated data to {path}')\n", | |
" ds.to_zarr(fsspec.get_mapper(path), mode='w', consolidated=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n", | |
"<defs>\n", | |
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n", | |
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n", | |
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n", | |
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n", | |
"</symbol>\n", | |
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n", | |
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n", | |
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"</symbol>\n", | |
"</defs>\n", | |
"</svg>\n", | |
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n", | |
" *\n", | |
" */\n", | |
"\n", | |
":root {\n", | |
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n", | |
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n", | |
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n", | |
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n", | |
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n", | |
" --xr-background-color: var(--jp-layout-color0, white);\n", | |
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n", | |
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", | |
"}\n", | |
"\n", | |
"html[theme=dark],\n", | |
"body.vscode-dark {\n", | |
" --xr-font-color0: rgba(255, 255, 255, 1);\n", | |
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n", | |
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n", | |
" --xr-border-color: #1F1F1F;\n", | |
" --xr-disabled-color: #515151;\n", | |
" --xr-background-color: #111111;\n", | |
" --xr-background-color-row-even: #111111;\n", | |
" --xr-background-color-row-odd: #313131;\n", | |
"}\n", | |
"\n", | |
".xr-wrap {\n", | |
" display: block;\n", | |
" min-width: 300px;\n", | |
" max-width: 700px;\n", | |
"}\n", | |
"\n", | |
".xr-text-repr-fallback {\n", | |
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-header {\n", | |
" padding-top: 6px;\n", | |
" padding-bottom: 6px;\n", | |
" margin-bottom: 4px;\n", | |
" border-bottom: solid 1px var(--xr-border-color);\n", | |
"}\n", | |
"\n", | |
".xr-header > div,\n", | |
".xr-header > ul {\n", | |
" display: inline;\n", | |
" margin-top: 0;\n", | |
" margin-bottom: 0;\n", | |
"}\n", | |
"\n", | |
".xr-obj-type,\n", | |
".xr-array-name {\n", | |
" margin-left: 2px;\n", | |
" margin-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-obj-type {\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-sections {\n", | |
" padding-left: 0 !important;\n", | |
" display: grid;\n", | |
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n", | |
"}\n", | |
"\n", | |
".xr-section-item {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-section-item input {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-section-item input + label {\n", | |
" color: var(--xr-disabled-color);\n", | |
"}\n", | |
"\n", | |
".xr-section-item input:enabled + label {\n", | |
" cursor: pointer;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-section-item input:enabled + label:hover {\n", | |
" color: var(--xr-font-color0);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary {\n", | |
" grid-column: 1;\n", | |
" color: var(--xr-font-color2);\n", | |
" font-weight: 500;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary > span {\n", | |
" display: inline-block;\n", | |
" padding-left: 0.5em;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:disabled + label {\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in + label:before {\n", | |
" display: inline-block;\n", | |
" content: '►';\n", | |
" font-size: 11px;\n", | |
" width: 15px;\n", | |
" text-align: center;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:disabled + label:before {\n", | |
" color: var(--xr-disabled-color);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked + label:before {\n", | |
" content: '▼';\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked + label > span {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary,\n", | |
".xr-section-inline-details {\n", | |
" padding-top: 4px;\n", | |
" padding-bottom: 4px;\n", | |
"}\n", | |
"\n", | |
".xr-section-inline-details {\n", | |
" grid-column: 2 / -1;\n", | |
"}\n", | |
"\n", | |
".xr-section-details {\n", | |
" display: none;\n", | |
" grid-column: 1 / -1;\n", | |
" margin-bottom: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked ~ .xr-section-details {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-array-wrap {\n", | |
" grid-column: 1 / -1;\n", | |
" display: grid;\n", | |
" grid-template-columns: 20px auto;\n", | |
"}\n", | |
"\n", | |
".xr-array-wrap > label {\n", | |
" grid-column: 1;\n", | |
" vertical-align: top;\n", | |
"}\n", | |
"\n", | |
".xr-preview {\n", | |
" color: var(--xr-font-color3);\n", | |
"}\n", | |
"\n", | |
".xr-array-preview,\n", | |
".xr-array-data {\n", | |
" padding: 0 5px !important;\n", | |
" grid-column: 2;\n", | |
"}\n", | |
"\n", | |
".xr-array-data,\n", | |
".xr-array-in:checked ~ .xr-array-preview {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-array-in:checked ~ .xr-array-data,\n", | |
".xr-array-preview {\n", | |
" display: inline-block;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list {\n", | |
" display: inline-block !important;\n", | |
" list-style: none;\n", | |
" padding: 0 !important;\n", | |
" margin: 0;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list li {\n", | |
" display: inline-block;\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list:before {\n", | |
" content: '(';\n", | |
"}\n", | |
"\n", | |
".xr-dim-list:after {\n", | |
" content: ')';\n", | |
"}\n", | |
"\n", | |
".xr-dim-list li:not(:last-child):after {\n", | |
" content: ',';\n", | |
" padding-right: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-has-index {\n", | |
" font-weight: bold;\n", | |
"}\n", | |
"\n", | |
".xr-var-list,\n", | |
".xr-var-item {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-var-item > div,\n", | |
".xr-var-item label,\n", | |
".xr-var-item > .xr-var-name span {\n", | |
" background-color: var(--xr-background-color-row-even);\n", | |
" margin-bottom: 0;\n", | |
"}\n", | |
"\n", | |
".xr-var-item > .xr-var-name:hover span {\n", | |
" padding-right: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-var-list > li:nth-child(odd) > div,\n", | |
".xr-var-list > li:nth-child(odd) > label,\n", | |
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n", | |
" background-color: var(--xr-background-color-row-odd);\n", | |
"}\n", | |
"\n", | |
".xr-var-name {\n", | |
" grid-column: 1;\n", | |
"}\n", | |
"\n", | |
".xr-var-dims {\n", | |
" grid-column: 2;\n", | |
"}\n", | |
"\n", | |
".xr-var-dtype {\n", | |
" grid-column: 3;\n", | |
" text-align: right;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-var-preview {\n", | |
" grid-column: 4;\n", | |
"}\n", | |
"\n", | |
".xr-var-name,\n", | |
".xr-var-dims,\n", | |
".xr-var-dtype,\n", | |
".xr-preview,\n", | |
".xr-attrs dt {\n", | |
" white-space: nowrap;\n", | |
" overflow: hidden;\n", | |
" text-overflow: ellipsis;\n", | |
" padding-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-var-name:hover,\n", | |
".xr-var-dims:hover,\n", | |
".xr-var-dtype:hover,\n", | |
".xr-attrs dt:hover {\n", | |
" overflow: visible;\n", | |
" width: auto;\n", | |
" z-index: 1;\n", | |
"}\n", | |
"\n", | |
".xr-var-attrs,\n", | |
".xr-var-data {\n", | |
" display: none;\n", | |
" background-color: var(--xr-background-color) !important;\n", | |
" padding-bottom: 5px !important;\n", | |
"}\n", | |
"\n", | |
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", | |
".xr-var-data-in:checked ~ .xr-var-data {\n", | |
" display: block;\n", | |
"}\n", | |
"\n", | |
".xr-var-data > table {\n", | |
" float: right;\n", | |
"}\n", | |
"\n", | |
".xr-var-name span,\n", | |
".xr-var-data,\n", | |
".xr-attrs {\n", | |
" padding-left: 25px !important;\n", | |
"}\n", | |
"\n", | |
".xr-attrs,\n", | |
".xr-var-attrs,\n", | |
".xr-var-data {\n", | |
" grid-column: 1 / -1;\n", | |
"}\n", | |
"\n", | |
"dl.xr-attrs {\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
" display: grid;\n", | |
" grid-template-columns: 125px auto;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt, dd {\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
" float: left;\n", | |
" padding-right: 10px;\n", | |
" width: auto;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt {\n", | |
" font-weight: normal;\n", | |
" grid-column: 1;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt:hover span {\n", | |
" display: inline-block;\n", | |
" background: var(--xr-background-color);\n", | |
" padding-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dd {\n", | |
" grid-column: 2;\n", | |
" white-space: pre-wrap;\n", | |
" word-break: break-all;\n", | |
"}\n", | |
"\n", | |
".xr-icon-database,\n", | |
".xr-icon-file-text2 {\n", | |
" display: inline-block;\n", | |
" vertical-align: middle;\n", | |
" width: 1em;\n", | |
" height: 1.5em !important;\n", | |
" stroke-width: 0;\n", | |
" stroke: currentColor;\n", | |
" fill: currentColor;\n", | |
"}\n", | |
"</style><pre class='xr-text-repr-fallback'><xarray.Dataset>\n", | |
"Dimensions: (covariates: 3, outcomes: 1, samples: 1000, variants: 1000)\n", | |
"Dimensions without coordinates: covariates, outcomes, samples, variants\n", | |
"Data variables:\n", | |
" XC (samples, covariates) float32 dask.array<chunksize=(1000, 3), meta=np.ndarray>\n", | |
" XL (variants, samples) float16 dask.array<chunksize=(1000, 1000), meta=np.ndarray>\n", | |
" Y (samples, outcomes) float32 dask.array<chunksize=(1000, 1), meta=np.ndarray></pre><div class='xr-wrap' hidden><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-df9375a3-0f92-426f-97f7-9ce3445c34e6' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-df9375a3-0f92-426f-97f7-9ce3445c34e6' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span>covariates</span>: 3</li><li><span>outcomes</span>: 1</li><li><span>samples</span>: 1000</li><li><span>variants</span>: 1000</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-a8e74f22-476b-4b35-af4f-e03b3007657f' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-a8e74f22-476b-4b35-af4f-e03b3007657f' class='xr-section-summary' title='Expand/collapse section'>Coordinates: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'></ul></div></li><li class='xr-section-item'><input id='section-895f9ba7-a717-4082-9895-f438996883db' class='xr-section-summary-in' type='checkbox' checked><label for='section-895f9ba7-a717-4082-9895-f438996883db' class='xr-section-summary' >Data variables: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>XC</span></div><div class='xr-var-dims'>(samples, covariates)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>dask.array<chunksize=(1000, 3), meta=np.ndarray></div><input id='attrs-92d6f08c-04c4-4e26-9624-f8909fa3edd9' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-92d6f08c-04c4-4e26-9624-f8909fa3edd9' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-b226b7cc-2e00-4ee4-9c37-96fbe154fb2d' class='xr-var-data-in' type='checkbox'><label for='data-b226b7cc-2e00-4ee4-9c37-96fbe154fb2d' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 12.00 kB </td> <td> 12.00 kB </td></tr>\n", | |
" <tr><th> Shape </th><td> (1000, 3) </td> <td> (1000, 3) </td></tr>\n", | |
" <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >3</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>XL</span></div><div class='xr-var-dims'>(variants, samples)</div><div class='xr-var-dtype'>float16</div><div class='xr-var-preview xr-preview'>dask.array<chunksize=(1000, 1000), meta=np.ndarray></div><input id='attrs-683a7a53-7c49-451c-a78f-ce8fc15f7ffb' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-683a7a53-7c49-451c-a78f-ce8fc15f7ffb' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-c546d977-1069-4ff8-9c01-af7f706f7663' class='xr-var-data-in' type='checkbox'><label for='data-c546d977-1069-4ff8-9c01-af7f706f7663' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 2.00 MB </td> <td> 2.00 MB </td></tr>\n", | |
" <tr><th> Shape </th><td> (1000, 1000) </td> <td> (1000, 1000) </td></tr>\n", | |
" <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> float16 </td><td> numpy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1000</text>\n", | |
" <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,140.000000,60.000000)\">1000</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>Y</span></div><div class='xr-var-dims'>(samples, outcomes)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>dask.array<chunksize=(1000, 1), meta=np.ndarray></div><input id='attrs-060df05a-4dd1-428e-944c-dcac78ece985' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-060df05a-4dd1-428e-944c-dcac78ece985' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-5a1e128e-22d1-4489-9867-d30a5a870013' class='xr-var-data-in' type='checkbox'><label for='data-5a1e128e-22d1-4489-9867-d30a5a870013' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 4.00 kB </td> <td> 4.00 kB </td></tr>\n", | |
" <tr><th> Shape </th><td> (1000, 1) </td> <td> (1000, 1) </td></tr>\n", | |
" <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table></div></li></ul></div></li><li class='xr-section-item'><input id='section-94585102-eb79-433b-9aaf-6b17e5ec3557' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-94585102-eb79-433b-9aaf-6b17e5ec3557' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>" | |
], | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (covariates: 3, outcomes: 1, samples: 1000, variants: 1000)\n", | |
"Dimensions without coordinates: covariates, outcomes, samples, variants\n", | |
"Data variables:\n", | |
" XC (samples, covariates) float32 dask.array<chunksize=(1000, 3), meta=np.ndarray>\n", | |
" XL (variants, samples) float16 dask.array<chunksize=(1000, 1000), meta=np.ndarray>\n", | |
" Y (samples, outcomes) float32 dask.array<chunksize=(1000, 1), meta=np.ndarray>" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds = xr.open_zarr(fsspec.get_mapper(path), consolidated=True)\n", | |
"ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def gwas(XL, XC, Y):\n", | |
" # Add intercept\n", | |
" XC = da.concatenate([da.ones((XC.shape[0], 1), dtype=XC.dtype), XC], axis=1)\n", | |
" \n", | |
" # Rechunk along short axes\n", | |
" XC = XC.rechunk((None, -1))\n", | |
" Y = Y.rechunk((None, -1))\n", | |
" dof = Y.shape[0] - XC.shape[1] - 1\n", | |
" \n", | |
" # Apply orthogonal projection to eliminate core covariates\n", | |
" XLP = XL - XC @ da.linalg.lstsq(XC, XL)[0]\n", | |
" YP = Y - XC @ da.linalg.lstsq(XC, Y)[0]\n", | |
"\n", | |
" # Estimate coefficients for each loop covariate\n", | |
" XLPS = (XLP ** 2).sum(axis=0, keepdims=True).T\n", | |
" B = (XLP.T @ YP) / XLPS\n", | |
"\n", | |
" # Compute residuals for each loop covariate and outcome separately\n", | |
" YR = YP[:, np.newaxis, :] - XLP[..., np.newaxis] * B[np.newaxis, ...]\n", | |
" RSS = (YR ** 2).sum(axis=0)\n", | |
" \n", | |
" # Get t-statistics for coefficient estimates and match to p-values\n", | |
" T = B / np.sqrt(RSS / dof / XLPS)\n", | |
" P = da.map_blocks(\n", | |
" lambda t: 2 * stats.distributions.t.sf(np.abs(t), dof), T, dtype=\"float64\"\n", | |
" )\n", | |
" return xr.Dataset(dict(\n", | |
" beta=(('variants','outcomes'), B), \n", | |
" pval=(('variants','outcomes'), P)\n", | |
" ))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n", | |
"<defs>\n", | |
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n", | |
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n", | |
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n", | |
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n", | |
"</symbol>\n", | |
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n", | |
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n", | |
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"</symbol>\n", | |
"</defs>\n", | |
"</svg>\n", | |
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n", | |
" *\n", | |
" */\n", | |
"\n", | |
":root {\n", | |
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n", | |
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n", | |
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n", | |
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n", | |
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n", | |
" --xr-background-color: var(--jp-layout-color0, white);\n", | |
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n", | |
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", | |
"}\n", | |
"\n", | |
"html[theme=dark],\n", | |
"body.vscode-dark {\n", | |
" --xr-font-color0: rgba(255, 255, 255, 1);\n", | |
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n", | |
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n", | |
" --xr-border-color: #1F1F1F;\n", | |
" --xr-disabled-color: #515151;\n", | |
" --xr-background-color: #111111;\n", | |
" --xr-background-color-row-even: #111111;\n", | |
" --xr-background-color-row-odd: #313131;\n", | |
"}\n", | |
"\n", | |
".xr-wrap {\n", | |
" display: block;\n", | |
" min-width: 300px;\n", | |
" max-width: 700px;\n", | |
"}\n", | |
"\n", | |
".xr-text-repr-fallback {\n", | |
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-header {\n", | |
" padding-top: 6px;\n", | |
" padding-bottom: 6px;\n", | |
" margin-bottom: 4px;\n", | |
" border-bottom: solid 1px var(--xr-border-color);\n", | |
"}\n", | |
"\n", | |
".xr-header > div,\n", | |
".xr-header > ul {\n", | |
" display: inline;\n", | |
" margin-top: 0;\n", | |
" margin-bottom: 0;\n", | |
"}\n", | |
"\n", | |
".xr-obj-type,\n", | |
".xr-array-name {\n", | |
" margin-left: 2px;\n", | |
" margin-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-obj-type {\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-sections {\n", | |
" padding-left: 0 !important;\n", | |
" display: grid;\n", | |
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n", | |
"}\n", | |
"\n", | |
".xr-section-item {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-section-item input {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-section-item input + label {\n", | |
" color: var(--xr-disabled-color);\n", | |
"}\n", | |
"\n", | |
".xr-section-item input:enabled + label {\n", | |
" cursor: pointer;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-section-item input:enabled + label:hover {\n", | |
" color: var(--xr-font-color0);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary {\n", | |
" grid-column: 1;\n", | |
" color: var(--xr-font-color2);\n", | |
" font-weight: 500;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary > span {\n", | |
" display: inline-block;\n", | |
" padding-left: 0.5em;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:disabled + label {\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in + label:before {\n", | |
" display: inline-block;\n", | |
" content: '►';\n", | |
" font-size: 11px;\n", | |
" width: 15px;\n", | |
" text-align: center;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:disabled + label:before {\n", | |
" color: var(--xr-disabled-color);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked + label:before {\n", | |
" content: '▼';\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked + label > span {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary,\n", | |
".xr-section-inline-details {\n", | |
" padding-top: 4px;\n", | |
" padding-bottom: 4px;\n", | |
"}\n", | |
"\n", | |
".xr-section-inline-details {\n", | |
" grid-column: 2 / -1;\n", | |
"}\n", | |
"\n", | |
".xr-section-details {\n", | |
" display: none;\n", | |
" grid-column: 1 / -1;\n", | |
" margin-bottom: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked ~ .xr-section-details {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-array-wrap {\n", | |
" grid-column: 1 / -1;\n", | |
" display: grid;\n", | |
" grid-template-columns: 20px auto;\n", | |
"}\n", | |
"\n", | |
".xr-array-wrap > label {\n", | |
" grid-column: 1;\n", | |
" vertical-align: top;\n", | |
"}\n", | |
"\n", | |
".xr-preview {\n", | |
" color: var(--xr-font-color3);\n", | |
"}\n", | |
"\n", | |
".xr-array-preview,\n", | |
".xr-array-data {\n", | |
" padding: 0 5px !important;\n", | |
" grid-column: 2;\n", | |
"}\n", | |
"\n", | |
".xr-array-data,\n", | |
".xr-array-in:checked ~ .xr-array-preview {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-array-in:checked ~ .xr-array-data,\n", | |
".xr-array-preview {\n", | |
" display: inline-block;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list {\n", | |
" display: inline-block !important;\n", | |
" list-style: none;\n", | |
" padding: 0 !important;\n", | |
" margin: 0;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list li {\n", | |
" display: inline-block;\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list:before {\n", | |
" content: '(';\n", | |
"}\n", | |
"\n", | |
".xr-dim-list:after {\n", | |
" content: ')';\n", | |
"}\n", | |
"\n", | |
".xr-dim-list li:not(:last-child):after {\n", | |
" content: ',';\n", | |
" padding-right: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-has-index {\n", | |
" font-weight: bold;\n", | |
"}\n", | |
"\n", | |
".xr-var-list,\n", | |
".xr-var-item {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-var-item > div,\n", | |
".xr-var-item label,\n", | |
".xr-var-item > .xr-var-name span {\n", | |
" background-color: var(--xr-background-color-row-even);\n", | |
" margin-bottom: 0;\n", | |
"}\n", | |
"\n", | |
".xr-var-item > .xr-var-name:hover span {\n", | |
" padding-right: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-var-list > li:nth-child(odd) > div,\n", | |
".xr-var-list > li:nth-child(odd) > label,\n", | |
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n", | |
" background-color: var(--xr-background-color-row-odd);\n", | |
"}\n", | |
"\n", | |
".xr-var-name {\n", | |
" grid-column: 1;\n", | |
"}\n", | |
"\n", | |
".xr-var-dims {\n", | |
" grid-column: 2;\n", | |
"}\n", | |
"\n", | |
".xr-var-dtype {\n", | |
" grid-column: 3;\n", | |
" text-align: right;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-var-preview {\n", | |
" grid-column: 4;\n", | |
"}\n", | |
"\n", | |
".xr-var-name,\n", | |
".xr-var-dims,\n", | |
".xr-var-dtype,\n", | |
".xr-preview,\n", | |
".xr-attrs dt {\n", | |
" white-space: nowrap;\n", | |
" overflow: hidden;\n", | |
" text-overflow: ellipsis;\n", | |
" padding-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-var-name:hover,\n", | |
".xr-var-dims:hover,\n", | |
".xr-var-dtype:hover,\n", | |
".xr-attrs dt:hover {\n", | |
" overflow: visible;\n", | |
" width: auto;\n", | |
" z-index: 1;\n", | |
"}\n", | |
"\n", | |
".xr-var-attrs,\n", | |
".xr-var-data {\n", | |
" display: none;\n", | |
" background-color: var(--xr-background-color) !important;\n", | |
" padding-bottom: 5px !important;\n", | |
"}\n", | |
"\n", | |
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", | |
".xr-var-data-in:checked ~ .xr-var-data {\n", | |
" display: block;\n", | |
"}\n", | |
"\n", | |
".xr-var-data > table {\n", | |
" float: right;\n", | |
"}\n", | |
"\n", | |
".xr-var-name span,\n", | |
".xr-var-data,\n", | |
".xr-attrs {\n", | |
" padding-left: 25px !important;\n", | |
"}\n", | |
"\n", | |
".xr-attrs,\n", | |
".xr-var-attrs,\n", | |
".xr-var-data {\n", | |
" grid-column: 1 / -1;\n", | |
"}\n", | |
"\n", | |
"dl.xr-attrs {\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
" display: grid;\n", | |
" grid-template-columns: 125px auto;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt, dd {\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
" float: left;\n", | |
" padding-right: 10px;\n", | |
" width: auto;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt {\n", | |
" font-weight: normal;\n", | |
" grid-column: 1;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt:hover span {\n", | |
" display: inline-block;\n", | |
" background: var(--xr-background-color);\n", | |
" padding-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dd {\n", | |
" grid-column: 2;\n", | |
" white-space: pre-wrap;\n", | |
" word-break: break-all;\n", | |
"}\n", | |
"\n", | |
".xr-icon-database,\n", | |
".xr-icon-file-text2 {\n", | |
" display: inline-block;\n", | |
" vertical-align: middle;\n", | |
" width: 1em;\n", | |
" height: 1.5em !important;\n", | |
" stroke-width: 0;\n", | |
" stroke: currentColor;\n", | |
" fill: currentColor;\n", | |
"}\n", | |
"</style><pre class='xr-text-repr-fallback'><xarray.Dataset>\n", | |
"Dimensions: (outcomes: 1, variants: 1000)\n", | |
"Dimensions without coordinates: outcomes, variants\n", | |
"Data variables:\n", | |
" beta (variants, outcomes) float32 dask.array<chunksize=(652, 1), meta=np.ndarray>\n", | |
" pval (variants, outcomes) float64 dask.array<chunksize=(652, 1), meta=np.ndarray></pre><div class='xr-wrap' hidden><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-8b8771dc-cc40-4942-b461-0b15191e8f9e' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-8b8771dc-cc40-4942-b461-0b15191e8f9e' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span>outcomes</span>: 1</li><li><span>variants</span>: 1000</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-0eab2007-4bc6-4ffb-a7e0-e25b85c65b28' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-0eab2007-4bc6-4ffb-a7e0-e25b85c65b28' class='xr-section-summary' title='Expand/collapse section'>Coordinates: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'></ul></div></li><li class='xr-section-item'><input id='section-c5fb0e35-0611-452f-865d-ab1ed783a352' class='xr-section-summary-in' type='checkbox' checked><label for='section-c5fb0e35-0611-452f-865d-ab1ed783a352' class='xr-section-summary' >Data variables: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>beta</span></div><div class='xr-var-dims'>(variants, outcomes)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>dask.array<chunksize=(652, 1), meta=np.ndarray></div><input id='attrs-85dbf599-c9ea-4f28-a9b4-d40a91538534' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-85dbf599-c9ea-4f28-a9b4-d40a91538534' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-dec04547-fde4-42e0-9da9-6958e5cd6231' class='xr-var-data-in' type='checkbox'><label for='data-dec04547-fde4-42e0-9da9-6958e5cd6231' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 4.00 kB </td> <td> 2.61 kB </td></tr>\n", | |
" <tr><th> Shape </th><td> (1000, 1) </td> <td> (652, 1) </td></tr>\n", | |
" <tr><th> Count </th><td> 55 Tasks </td><td> 2 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"78\" x2=\"25\" y2=\"78\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>pval</span></div><div class='xr-var-dims'>(variants, outcomes)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>dask.array<chunksize=(652, 1), meta=np.ndarray></div><input id='attrs-c2201045-0ebd-42ef-86a8-96f28bf79ef7' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-c2201045-0ebd-42ef-86a8-96f28bf79ef7' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-881f3a2a-bb18-4a47-94ea-1f65b0a61e1f' class='xr-var-data-in' type='checkbox'><label for='data-881f3a2a-bb18-4a47-94ea-1f65b0a61e1f' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 8.00 kB </td> <td> 5.22 kB </td></tr>\n", | |
" <tr><th> Shape </th><td> (1000, 1) </td> <td> (652, 1) </td></tr>\n", | |
" <tr><th> Count </th><td> 80 Tasks </td><td> 2 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> float64 </td><td> numpy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"78\" x2=\"25\" y2=\"78\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table></div></li></ul></div></li><li class='xr-section-item'><input id='section-79729e83-be6f-45de-a391-ab17871e91fc' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-79729e83-be6f-45de-a391-ab17871e91fc' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>" | |
], | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (outcomes: 1, variants: 1000)\n", | |
"Dimensions without coordinates: outcomes, variants\n", | |
"Data variables:\n", | |
" beta (variants, outcomes) float32 dask.array<chunksize=(652, 1), meta=np.ndarray>\n", | |
" pval (variants, outcomes) float64 dask.array<chunksize=(652, 1), meta=np.ndarray>" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Define the GWAS regressions\n", | |
"dsr = gwas(\n", | |
" # Note: This (the largest) array needs to be rechunked due to scalability \n", | |
" # issues with da.matmul, specifically https://github.com/dask/dask/pull/6924.\n", | |
" # See here for more details:\n", | |
" # https://github.com/pystatgen/sgkit/issues/390#issuecomment-730660134\n", | |
" ds.XL.data.rechunk((652, 5792)).T.astype('f4'), \n", | |
" ds.XC.data, \n", | |
" ds.Y.data\n", | |
")\n", | |
"dsr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Results saved to gs://sgkit-dev/optimization/gwas/sim_res_1000_1000_3.zarr\n" | |
] | |
} | |
], | |
"source": [ | |
"# Compute and save betas/p-values\n", | |
"output_path = f\"gs://sgkit-dev/optimization/gwas/sim_res_{n}_{m}_{c}.zarr\"\n", | |
"dsr.to_zarr(fsspec.get_mapper(output_path), mode='w', consolidated=True)\n", | |
"print(f'Results saved to {output_path}')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment