Skip to content

Instantly share code, notes, and snippets.

@eric-czech
Created January 18, 2021 19:25
Show Gist options
  • Save eric-czech/bdff3402d27b5cccd7d8aacab0957a93 to your computer and use it in GitHub Desktop.
Save eric-czech/bdff3402d27b5cccd7d8aacab0957a93 to your computer and use it in GitHub Desktop.
GWAS Simulation Example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sgkit GWAS Workflow Example\n",
"\n",
"This simulated workflow generates data to emulate large-scale GWAS regressions.\n",
"\n",
"See https://github.com/pystatgen/sgkit/issues/438 for more details."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import xarray as xr\n",
"import dask.array as da\n",
"from dask.array import stats\n",
"import fsspec\n",
"fs = fsspec.filesystem('gs')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'gs://sgkit-dev/optimization/gwas/sim_ds_1000_1000_3.zarr'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Debug settings\n",
"n = 1000 # Number of variants (i.e. genomic locations)\n",
"m = 1000 # Number of individuals (i.e. people)\n",
"c = 3 # Number of covariates (i.e. confounders)\n",
"\n",
"# Representative settings for single (small) UK Biobank chromosome:\n",
"# n, m, c = 141910, 365941, 25\n",
"\n",
"path = f\"gs://sgkit-dev/optimization/gwas/sim_ds_{n}_{m}_{c}.zarr\"\n",
"path"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saving simulated data to gs://sgkit-dev/optimization/gwas/sim_ds_1000_1000_3.zarr\n"
]
}
],
"source": [
"# Create the dataset on cloud storage if not already present\n",
"if not fs.exists(path):\n",
" rs = da.random.RandomState(0)\n",
" XL, BL = rs.randint(0, 128, size=(n, m), chunks=(5216, 5792)), da.array([1] + [0] * (m - 1))\n",
" XC, BC = rs.normal(size=(m, c)), rs.normal(size=(c,))\n",
" Y = (XL * BL).sum(axis=0) + XC @ BC + rs.normal(scale=.001, size=m)\n",
" ds = xr.Dataset(dict(\n",
" # This is a proxy for discretized allele dosages (between 0 and 2)\n",
" XL=(('variants', 'samples'), (2 * XL / 127).astype('f2')),\n",
" # This value represents covariates for samples, e.g. age, sex, ancestry, etc.\n",
" XC=(('samples', 'covariates'), XC.astype('f4')),\n",
" # This is the outcome on which all variant data will be regressed separately\n",
" Y=(('samples', 'outcomes'), Y[:, np.newaxis].astype('f4')),\n",
" ))\n",
" print(f'Saving simulated data to {path}')\n",
" ds.to_zarr(fsspec.get_mapper(path), mode='w', consolidated=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt, dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2 {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
"Dimensions: (covariates: 3, outcomes: 1, samples: 1000, variants: 1000)\n",
"Dimensions without coordinates: covariates, outcomes, samples, variants\n",
"Data variables:\n",
" XC (samples, covariates) float32 dask.array&lt;chunksize=(1000, 3), meta=np.ndarray&gt;\n",
" XL (variants, samples) float16 dask.array&lt;chunksize=(1000, 1000), meta=np.ndarray&gt;\n",
" Y (samples, outcomes) float32 dask.array&lt;chunksize=(1000, 1), meta=np.ndarray&gt;</pre><div class='xr-wrap' hidden><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-df9375a3-0f92-426f-97f7-9ce3445c34e6' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-df9375a3-0f92-426f-97f7-9ce3445c34e6' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span>covariates</span>: 3</li><li><span>outcomes</span>: 1</li><li><span>samples</span>: 1000</li><li><span>variants</span>: 1000</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-a8e74f22-476b-4b35-af4f-e03b3007657f' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-a8e74f22-476b-4b35-af4f-e03b3007657f' class='xr-section-summary' title='Expand/collapse section'>Coordinates: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'></ul></div></li><li class='xr-section-item'><input id='section-895f9ba7-a717-4082-9895-f438996883db' class='xr-section-summary-in' type='checkbox' checked><label for='section-895f9ba7-a717-4082-9895-f438996883db' class='xr-section-summary' >Data variables: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>XC</span></div><div class='xr-var-dims'>(samples, covariates)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(1000, 3), meta=np.ndarray&gt;</div><input id='attrs-92d6f08c-04c4-4e26-9624-f8909fa3edd9' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-92d6f08c-04c4-4e26-9624-f8909fa3edd9' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-b226b7cc-2e00-4ee4-9c37-96fbe154fb2d' class='xr-var-data-in' type='checkbox'><label for='data-b226b7cc-2e00-4ee4-9c37-96fbe154fb2d' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 12.00 kB </td> <td> 12.00 kB </td></tr>\n",
" <tr><th> Shape </th><td> (1000, 3) </td> <td> (1000, 3) </td></tr>\n",
" <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >3</text>\n",
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>XL</span></div><div class='xr-var-dims'>(variants, samples)</div><div class='xr-var-dtype'>float16</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(1000, 1000), meta=np.ndarray&gt;</div><input id='attrs-683a7a53-7c49-451c-a78f-ce8fc15f7ffb' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-683a7a53-7c49-451c-a78f-ce8fc15f7ffb' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-c546d977-1069-4ff8-9c01-af7f706f7663' class='xr-var-data-in' type='checkbox'><label for='data-c546d977-1069-4ff8-9c01-af7f706f7663' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 2.00 MB </td> <td> 2.00 MB </td></tr>\n",
" <tr><th> Shape </th><td> (1000, 1000) </td> <td> (1000, 1000) </td></tr>\n",
" <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float16 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1000</text>\n",
" <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,140.000000,60.000000)\">1000</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>Y</span></div><div class='xr-var-dims'>(samples, outcomes)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(1000, 1), meta=np.ndarray&gt;</div><input id='attrs-060df05a-4dd1-428e-944c-dcac78ece985' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-060df05a-4dd1-428e-944c-dcac78ece985' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-5a1e128e-22d1-4489-9867-d30a5a870013' class='xr-var-data-in' type='checkbox'><label for='data-5a1e128e-22d1-4489-9867-d30a5a870013' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 4.00 kB </td> <td> 4.00 kB </td></tr>\n",
" <tr><th> Shape </th><td> (1000, 1) </td> <td> (1000, 1) </td></tr>\n",
" <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1</text>\n",
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table></div></li></ul></div></li><li class='xr-section-item'><input id='section-94585102-eb79-433b-9aaf-6b17e5ec3557' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-94585102-eb79-433b-9aaf-6b17e5ec3557' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (covariates: 3, outcomes: 1, samples: 1000, variants: 1000)\n",
"Dimensions without coordinates: covariates, outcomes, samples, variants\n",
"Data variables:\n",
" XC (samples, covariates) float32 dask.array<chunksize=(1000, 3), meta=np.ndarray>\n",
" XL (variants, samples) float16 dask.array<chunksize=(1000, 1000), meta=np.ndarray>\n",
" Y (samples, outcomes) float32 dask.array<chunksize=(1000, 1), meta=np.ndarray>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds = xr.open_zarr(fsspec.get_mapper(path), consolidated=True)\n",
"ds"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def gwas(XL, XC, Y):\n",
" # Add intercept\n",
" XC = da.concatenate([da.ones((XC.shape[0], 1), dtype=XC.dtype), XC], axis=1)\n",
" \n",
" # Rechunk along short axes\n",
" XC = XC.rechunk((None, -1))\n",
" Y = Y.rechunk((None, -1))\n",
" dof = Y.shape[0] - XC.shape[1] - 1\n",
" \n",
" # Apply orthogonal projection to eliminate core covariates\n",
" XLP = XL - XC @ da.linalg.lstsq(XC, XL)[0]\n",
" YP = Y - XC @ da.linalg.lstsq(XC, Y)[0]\n",
"\n",
" # Estimate coefficients for each loop covariate\n",
" XLPS = (XLP ** 2).sum(axis=0, keepdims=True).T\n",
" B = (XLP.T @ YP) / XLPS\n",
"\n",
" # Compute residuals for each loop covariate and outcome separately\n",
" YR = YP[:, np.newaxis, :] - XLP[..., np.newaxis] * B[np.newaxis, ...]\n",
" RSS = (YR ** 2).sum(axis=0)\n",
" \n",
" # Get t-statistics for coefficient estimates and match to p-values\n",
" T = B / np.sqrt(RSS / dof / XLPS)\n",
" P = da.map_blocks(\n",
" lambda t: 2 * stats.distributions.t.sf(np.abs(t), dof), T, dtype=\"float64\"\n",
" )\n",
" return xr.Dataset(dict(\n",
" beta=(('variants','outcomes'), B), \n",
" pval=(('variants','outcomes'), P)\n",
" ))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt, dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2 {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
"Dimensions: (outcomes: 1, variants: 1000)\n",
"Dimensions without coordinates: outcomes, variants\n",
"Data variables:\n",
" beta (variants, outcomes) float32 dask.array&lt;chunksize=(652, 1), meta=np.ndarray&gt;\n",
" pval (variants, outcomes) float64 dask.array&lt;chunksize=(652, 1), meta=np.ndarray&gt;</pre><div class='xr-wrap' hidden><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-8b8771dc-cc40-4942-b461-0b15191e8f9e' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-8b8771dc-cc40-4942-b461-0b15191e8f9e' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span>outcomes</span>: 1</li><li><span>variants</span>: 1000</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-0eab2007-4bc6-4ffb-a7e0-e25b85c65b28' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-0eab2007-4bc6-4ffb-a7e0-e25b85c65b28' class='xr-section-summary' title='Expand/collapse section'>Coordinates: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'></ul></div></li><li class='xr-section-item'><input id='section-c5fb0e35-0611-452f-865d-ab1ed783a352' class='xr-section-summary-in' type='checkbox' checked><label for='section-c5fb0e35-0611-452f-865d-ab1ed783a352' class='xr-section-summary' >Data variables: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>beta</span></div><div class='xr-var-dims'>(variants, outcomes)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(652, 1), meta=np.ndarray&gt;</div><input id='attrs-85dbf599-c9ea-4f28-a9b4-d40a91538534' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-85dbf599-c9ea-4f28-a9b4-d40a91538534' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-dec04547-fde4-42e0-9da9-6958e5cd6231' class='xr-var-data-in' type='checkbox'><label for='data-dec04547-fde4-42e0-9da9-6958e5cd6231' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 4.00 kB </td> <td> 2.61 kB </td></tr>\n",
" <tr><th> Shape </th><td> (1000, 1) </td> <td> (652, 1) </td></tr>\n",
" <tr><th> Count </th><td> 55 Tasks </td><td> 2 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"78\" x2=\"25\" y2=\"78\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1</text>\n",
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>pval</span></div><div class='xr-var-dims'>(variants, outcomes)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(652, 1), meta=np.ndarray&gt;</div><input id='attrs-c2201045-0ebd-42ef-86a8-96f28bf79ef7' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-c2201045-0ebd-42ef-86a8-96f28bf79ef7' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-881f3a2a-bb18-4a47-94ea-1f65b0a61e1f' class='xr-var-data-in' type='checkbox'><label for='data-881f3a2a-bb18-4a47-94ea-1f65b0a61e1f' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 8.00 kB </td> <td> 5.22 kB </td></tr>\n",
" <tr><th> Shape </th><td> (1000, 1) </td> <td> (652, 1) </td></tr>\n",
" <tr><th> Count </th><td> 80 Tasks </td><td> 2 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float64 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"78\" x2=\"25\" y2=\"78\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1</text>\n",
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">1000</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table></div></li></ul></div></li><li class='xr-section-item'><input id='section-79729e83-be6f-45de-a391-ab17871e91fc' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-79729e83-be6f-45de-a391-ab17871e91fc' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (outcomes: 1, variants: 1000)\n",
"Dimensions without coordinates: outcomes, variants\n",
"Data variables:\n",
" beta (variants, outcomes) float32 dask.array<chunksize=(652, 1), meta=np.ndarray>\n",
" pval (variants, outcomes) float64 dask.array<chunksize=(652, 1), meta=np.ndarray>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Define the GWAS regressions\n",
"dsr = gwas(\n",
" # Note: This (the largest) array needs to be rechunked due to scalability \n",
" # issues with da.matmul, specifically https://github.com/dask/dask/pull/6924.\n",
" # See here for more details:\n",
" # https://github.com/pystatgen/sgkit/issues/390#issuecomment-730660134\n",
" ds.XL.data.rechunk((652, 5792)).T.astype('f4'), \n",
" ds.XC.data, \n",
" ds.Y.data\n",
")\n",
"dsr"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Results saved to gs://sgkit-dev/optimization/gwas/sim_res_1000_1000_3.zarr\n"
]
}
],
"source": [
"# Compute and save betas/p-values\n",
"output_path = f\"gs://sgkit-dev/optimization/gwas/sim_res_{n}_{m}_{c}.zarr\"\n",
"dsr.to_zarr(fsspec.get_mapper(output_path), mode='w', consolidated=True)\n",
"print(f'Results saved to {output_path}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment