Created
August 23, 2023 14:10
-
-
Save nvictus/6acd49d53001feed268cfe3193a017b0 to your computer and use it in GitHub Desktop.
File-likes in pyoxbow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import oxbow as ox\n", | |
"import polars as pl\n", | |
"import smart_open" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## File-like on HTSlib" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><style>\n", | |
".dataframe > thead > tr > th,\n", | |
".dataframe > tbody > tr > td {\n", | |
" text-align: right;\n", | |
"}\n", | |
"</style>\n", | |
"<small>shape: (62_042, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>pos</th><th>id</th><th>ref</th><th>alt</th><th>qual</th><th>filter</th><th>info</th><th>format</th></tr><tr><td>cat</td><td>i32</td><td>str</td><td>str</td><td>str</td><td>f32</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>"Y"</td><td>2655180</td><td>"rs11575897"</td><td>"G"</td><td>"A"</td><td>100.0</td><td>"PASS"</td><td>"AA=G;AC=22;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655471</td><td>""</td><td>"A"</td><td>"C"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=5;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655754</td><td>""</td><td>"A"</td><td>"T"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655800</td><td>""</td><td>"A"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655989</td><td>""</td><td>"A"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655994</td><td>""</td><td>"C"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656126</td><td>""</td><td>"C"</td><td>"T"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656127</td><td>""</td><td>"G"</td><td>"C"</td><td>100.0</td><td>"PASS"</td><td>"AA=G;AC=14;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656276</td><td>""</td><td>"G"</td><td>"A"</td><td>100.0</td><td>"PASS"</td><td>"AA=G;AC=2;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656677</td><td>""</td><td>"A"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2657176</td><td>"rs2534636"</td><td>"C"</td><td>"T"</td><td>100.0</td><td>"PASS"</td><td>"AA=T;AC=89;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2657205</td><td>""</td><td>"C"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"Y"</td><td>28769764</td><td>""</td><td>"C"</td><td>"T"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28769939</td><td>""</td><td>"T"</td><td>"C"</td><td>100.0</td><td>"PASS"</td><td>"AA=T;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770057</td><td>""</td><td>"G"</td><td>"A"</td><td>100.0</td><td>"PASS"</td><td>"AA=G;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770125</td><td>""</td><td>"T"</td><td>"C"</td><td>100.0</td><td>"PASS"</td><td>"AA=T;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770156</td><td>""</td><td>"A"</td><td>"C"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770565</td><td>""</td><td>"C"</td><td>"T"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770633</td><td>""</td><td>"T"</td><td>"TA"</td><td>100.0</td><td>"PASS"</td><td>"AA=T;AC=4;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770651</td><td>""</td><td>"G"</td><td>"A"</td><td>100.0</td><td>"PASS"</td><td>"AA=G;AC=14;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770656</td><td>""</td><td>"A"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=A;AC=16;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770756</td><td>""</td><td>"C"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=9;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770875</td><td>""</td><td>"C"</td><td>"G"</td><td>100.0</td><td>"PASS"</td><td>"AA=C;AC=3;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770931</td><td>""</td><td>"T"</td><td>"C"</td><td>100.0</td><td>"PASS"</td><td>"AA=T;AC=1;AF=0…</td><td>"GT"</td></tr></tbody></table></div>" | |
], | |
"text/plain": [ | |
"shape: (62_042, 9)\n", | |
"┌───────┬──────────┬────────────┬─────┬───┬───────┬────────┬──────────────────────────────┬────────┐\n", | |
"│ chrom ┆ pos ┆ id ┆ ref ┆ … ┆ qual ┆ filter ┆ info ┆ format │\n", | |
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", | |
"│ cat ┆ i32 ┆ str ┆ str ┆ ┆ f32 ┆ str ┆ str ┆ str │\n", | |
"╞═══════╪══════════╪════════════╪═════╪═══╪═══════╪════════╪══════════════════════════════╪════════╡\n", | |
"│ Y ┆ 2655180 ┆ rs11575897 ┆ G ┆ … ┆ 100.0 ┆ PASS ┆ AA=G;AC=22;AF=0.0178427;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"│ Y ┆ 2655471 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=5;AF=0.00405515;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"│ Y ┆ 2655754 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=1;AF=0.00081103;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"│ Y ┆ 2655800 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=1;AF=0.00081103;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", | |
"│ Y ┆ 28770656 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=16;AF=0.0139616;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 146;… ┆ │\n", | |
"│ Y ┆ 28770756 ┆ ┆ C ┆ … ┆ 100.0 ┆ PASS ┆ AA=C;AC=9;AF=0.00729927;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"│ Y ┆ 28770875 ┆ ┆ C ┆ … ┆ 100.0 ┆ PASS ┆ AA=C;AC=3;AF=0.00243309;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"│ Y ┆ 28770931 ┆ ┆ T ┆ … ┆ 100.0 ┆ PASS ┆ AA=T;AC=1;AF=0.00081103;AN=1 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n", | |
"└───────┴──────────┴────────────┴─────┴───┴───────┴────────┴──────────────────────────────┴────────┘" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = open(\"../fixtures/ALL.chrY.phase3_integrated_v1a.20130502.genotypes.vcf.gz\", \"rb\")\n", | |
"g = open(\"../fixtures/ALL.chrY.phase3_integrated_v1a.20130502.genotypes.vcf.gz.tbi\", \"rb\")\n", | |
"ipc = ox.read_vcf(f, index=g)\n", | |
"pl.read_ipc(ipc)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><style>\n", | |
".dataframe > thead > tr > th,\n", | |
".dataframe > tbody > tr > td {\n", | |
" text-align: right;\n", | |
"}\n", | |
"</style>\n", | |
"<small>shape: (62_042, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>pos</th><th>id</th><th>ref</th><th>alt</th><th>qual</th><th>filter</th><th>info</th><th>format</th></tr><tr><td>cat</td><td>i32</td><td>str</td><td>str</td><td>str</td><td>f32</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>"Y"</td><td>2655180</td><td>"rs11575897"</td><td>"G"</td><td>"A"</td><td>34439.5</td><td>"PASS"</td><td>"AA=G;AC=22;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655471</td><td>""</td><td>"A"</td><td>"C"</td><td>7014.370117</td><td>"PASS"</td><td>"AA=A;AC=5;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655754</td><td>""</td><td>"A"</td><td>"T"</td><td>238.684006</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655800</td><td>""</td><td>"A"</td><td>"G"</td><td>6.08381</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655989</td><td>""</td><td>"A"</td><td>"G"</td><td>71.4925</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2655994</td><td>""</td><td>"C"</td><td>"G"</td><td>4.55831</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656126</td><td>""</td><td>"C"</td><td>"T"</td><td>42.006199</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656127</td><td>""</td><td>"G"</td><td>"C"</td><td>539.66803</td><td>"PASS"</td><td>"AA=G;AC=14;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656276</td><td>""</td><td>"G"</td><td>"A"</td><td>153.227005</td><td>"PASS"</td><td>"AA=G;AC=2;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2656677</td><td>""</td><td>"A"</td><td>"G"</td><td>82.752701</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2657176</td><td>"rs2534636"</td><td>"C"</td><td>"T"</td><td>11169.099609</td><td>"PASS"</td><td>"AA=T;AC=89;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>2657205</td><td>""</td><td>"C"</td><td>"G"</td><td>206.488007</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"Y"</td><td>28769764</td><td>""</td><td>"C"</td><td>"T"</td><td>1224.76001</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28769939</td><td>""</td><td>"T"</td><td>"C"</td><td>65.424599</td><td>"PASS"</td><td>"AA=T;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770057</td><td>""</td><td>"G"</td><td>"A"</td><td>22.034401</td><td>"PASS"</td><td>"AA=G;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770125</td><td>""</td><td>"T"</td><td>"C"</td><td>51.250301</td><td>"PASS"</td><td>"AA=T;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770156</td><td>""</td><td>"A"</td><td>"C"</td><td>9.07688</td><td>"PASS"</td><td>"AA=A;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770565</td><td>""</td><td>"C"</td><td>"T"</td><td>27.900499</td><td>"PASS"</td><td>"AA=C;AC=1;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770633</td><td>""</td><td>"T"</td><td>"TA"</td><td>388.31601</td><td>"PASS"</td><td>"AA=T;AC=4;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770651</td><td>""</td><td>"G"</td><td>"A"</td><td>152.626999</td><td>"PASS"</td><td>"AA=G;AC=14;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770656</td><td>""</td><td>"A"</td><td>"G"</td><td>239.639999</td><td>"PASS"</td><td>"AA=A;AC=16;AF=…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770756</td><td>""</td><td>"C"</td><td>"G"</td><td>718.411011</td><td>"PASS"</td><td>"AA=C;AC=9;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770875</td><td>""</td><td>"C"</td><td>"G"</td><td>261.834015</td><td>"PASS"</td><td>"AA=C;AC=3;AF=0…</td><td>"GT"</td></tr><tr><td>"Y"</td><td>28770931</td><td>""</td><td>"T"</td><td>"C"</td><td>59.303299</td><td>"PASS"</td><td>"AA=T;AC=1;AF=0…</td><td>"GT"</td></tr></tbody></table></div>" | |
], | |
"text/plain": [ | |
"shape: (62_042, 9)\n", | |
"┌───────┬──────────┬────────────┬─────┬───┬─────────────┬────────┬────────────────────────┬────────┐\n", | |
"│ chrom ┆ pos ┆ id ┆ ref ┆ … ┆ qual ┆ filter ┆ info ┆ format │\n", | |
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", | |
"│ cat ┆ i32 ┆ str ┆ str ┆ ┆ f32 ┆ str ┆ str ┆ str │\n", | |
"╞═══════╪══════════╪════════════╪═════╪═══╪═════════════╪════════╪════════════════════════╪════════╡\n", | |
"│ Y ┆ 2655180 ┆ rs11575897 ┆ G ┆ … ┆ 34439.5 ┆ PASS ┆ AA=G;AC=22;AF=0.017842 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 7;AN=1233;… ┆ │\n", | |
"│ Y ┆ 2655471 ┆ ┆ A ┆ … ┆ 7014.370117 ┆ PASS ┆ AA=A;AC=5;AF=0.0040551 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 5;AN=1233;… ┆ │\n", | |
"│ Y ┆ 2655754 ┆ ┆ A ┆ … ┆ 238.684006 ┆ PASS ┆ AA=A;AC=1;AF=0.0008110 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 3;AN=1233;… ┆ │\n", | |
"│ Y ┆ 2655800 ┆ ┆ A ┆ … ┆ 6.08381 ┆ PASS ┆ AA=A;AC=1;AF=0.0008110 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 3;AN=1233;… ┆ │\n", | |
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", | |
"│ Y ┆ 28770656 ┆ ┆ A ┆ … ┆ 239.639999 ┆ PASS ┆ AA=A;AC=16;AF=0.013961 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 6;AN=1146;… ┆ │\n", | |
"│ Y ┆ 28770756 ┆ ┆ C ┆ … ┆ 718.411011 ┆ PASS ┆ AA=C;AC=9;AF=0.0072992 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 7;AN=1233;… ┆ │\n", | |
"│ Y ┆ 28770875 ┆ ┆ C ┆ … ┆ 261.834015 ┆ PASS ┆ AA=C;AC=3;AF=0.0024330 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 9;AN=1233;… ┆ │\n", | |
"│ Y ┆ 28770931 ┆ ┆ T ┆ … ┆ 59.303299 ┆ PASS ┆ AA=T;AC=1;AF=0.0008110 ┆ GT │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 3;AN=1233;… ┆ │\n", | |
"└───────┴──────────┴────────────┴─────┴───┴─────────────┴────────┴────────────────────────┴────────┘" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = open(\"../fixtures/ALL.chrY.phase3_shapeit2_mvncall_integrated.20130502.genotypes.bcf\", \"rb\")\n", | |
"g = open(\"../fixtures/ALL.chrY.phase3_shapeit2_mvncall_integrated.20130502.genotypes.bcf.csi\", \"rb\")\n", | |
"ipc = ox.read_bcf(f, index=g)\n", | |
"pl.read_ipc(ipc)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><style>\n", | |
".dataframe > thead > tr > th,\n", | |
".dataframe > tbody > tr > td {\n", | |
" text-align: right;\n", | |
"}\n", | |
"</style>\n", | |
"<small>shape: (6, 12)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>qname</th><th>flag</th><th>rname</th><th>pos</th><th>mapq</th><th>cigar</th><th>rnext</th><th>pnext</th><th>tlen</th><th>seq</th><th>qual</th><th>end</th></tr><tr><td>str</td><td>u16</td><td>cat</td><td>i32</td><td>u8</td><td>str</td><td>cat</td><td>i32</td><td>i32</td><td>str</td><td>str</td><td>i32</td></tr></thead><tbody><tr><td>"SOLEXA-1GA-2_2…</td><td>0</td><td>"chr1"</td><td>10145</td><td>25</td><td>"36M"</td><td>null</td><td>null</td><td>0</td><td>"AACCCCTAACCCTA…</td><td>"hhhhHcWhhHTghc…</td><td>10180</td></tr><tr><td>"SOLEXA-1GA-2_2…</td><td>0</td><td>"chr1"</td><td>10148</td><td>25</td><td>"36M"</td><td>null</td><td>null</td><td>0</td><td>"CCCTAACCCTAACC…</td><td>"hbfhhhXUYhT_UL…</td><td>10183</td></tr><tr><td>"SOLEXA-1GA-2_2…</td><td>0</td><td>"chr1"</td><td>999994</td><td>25</td><td>"36M"</td><td>null</td><td>null</td><td>0</td><td>"GGGCCGTGGGCACA…</td><td>"hhhYcaJdhSAMXN…</td><td>1000029</td></tr><tr><td>"SOLEXA-1GA-2_2…</td><td>0</td><td>"chr1"</td><td>1002179</td><td>25</td><td>"36M"</td><td>null</td><td>null</td><td>0</td><td>"CTGCGGGCAAAGAG…</td><td>"hhhhhhcOLTIXJL…</td><td>1002214</td></tr><tr><td>"SOLEXA-1GA-2_2…</td><td>16</td><td>"chr3"</td><td>79027</td><td>25</td><td>"36M"</td><td>null</td><td>null</td><td>0</td><td>"CTCTGCGCCTGGCT…</td><td>"?IMR@RTVP\\N^hh…</td><td>79062</td></tr><tr><td>"SOLEXA-1GA-2_2…</td><td>16</td><td>"chr3"</td><td>86026</td><td>25</td><td>"36M"</td><td>null</td><td>null</td><td>0</td><td>"TGTTAGTGTGTGAT…</td><td>"IKLD>GBKOKJNBB…</td><td>86061</td></tr></tbody></table></div>" | |
], | |
"text/plain": [ | |
"shape: (6, 12)\n", | |
"┌─────────────────┬──────┬───────┬─────────┬───┬──────┬─────────────────┬────────────────┬─────────┐\n", | |
"│ qname ┆ flag ┆ rname ┆ pos ┆ … ┆ tlen ┆ seq ┆ qual ┆ end │\n", | |
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", | |
"│ str ┆ u16 ┆ cat ┆ i32 ┆ ┆ i32 ┆ str ┆ str ┆ i32 │\n", | |
"╞═════════════════╪══════╪═══════╪═════════╪═══╪══════╪═════════════════╪════════════════╪═════════╡\n", | |
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 10145 ┆ … ┆ 0 ┆ AACCCCTAACCCTAA ┆ hhhhHcWhhHTghc ┆ 10180 │\n", | |
"│ FC20EMB:5:251:9 ┆ ┆ ┆ ┆ ┆ ┆ CCCTAACCCTAACCC ┆ KA_ONhAAEEBZE? ┆ │\n", | |
"│ 79… ┆ ┆ ┆ ┆ ┆ ┆ TA… ┆ H?CB… ┆ │\n", | |
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 10148 ┆ … ┆ 0 ┆ CCCTAACCCTAACCC ┆ hbfhhhXUYhT_UL ┆ 10183 │\n", | |
"│ FC20EMB:5:102:2 ┆ ┆ ┆ ┆ ┆ ┆ TAACCCTAACCCTAA ┆ ZdLRTKNIMIKGLJ ┆ │\n", | |
"│ 14… ┆ ┆ ┆ ┆ ┆ ┆ CC… ┆ CHFF… ┆ │\n", | |
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 999994 ┆ … ┆ 0 ┆ GGGCCGTGGGCACAG ┆ hhhYcaJdhSAMXN ┆ 1000029 │\n", | |
"│ FC20EMB:5:116:7 ┆ ┆ ┆ ┆ ┆ ┆ CCTCACCCAGGAAAG ┆ SMAPHBI?ECIBDC ┆ │\n", | |
"│ 91… ┆ ┆ ┆ ┆ ┆ ┆ CA… ┆ =CEA… ┆ │\n", | |
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 1002179 ┆ … ┆ 0 ┆ CTGCGGGCAAAGAGG ┆ hhhhhhcOLTIXJL ┆ 1002214 │\n", | |
"│ FC20EMB:5:20:52 ┆ ┆ ┆ ┆ ┆ ┆ CAGGGGGAGGCCCCC ┆ QJDQPJ>L=IBDCB ┆ │\n", | |
"│ 3:… ┆ ┆ ┆ ┆ ┆ ┆ GA… ┆ >?A?… ┆ │\n", | |
"│ SOLEXA-1GA-2_2_ ┆ 16 ┆ chr3 ┆ 79027 ┆ … ┆ 0 ┆ CTCTGCGCCTGGCTA ┆ ?IMR@RTVP\\N^hh ┆ 79062 │\n", | |
"│ FC20EMB:5:276:8 ┆ ┆ ┆ ┆ ┆ ┆ ATTTTTGTATTTTTA ┆ bh]hUhhhRhhhhh ┆ │\n", | |
"│ 95… ┆ ┆ ┆ ┆ ┆ ┆ GT… ┆ hhhh… ┆ │\n", | |
"│ SOLEXA-1GA-2_2_ ┆ 16 ┆ chr3 ┆ 86026 ┆ … ┆ 0 ┆ TGTTAGTGTGTGATT ┆ IKLD>GBKOKJNBB ┆ 86061 │\n", | |
"│ FC20EMB:5:229:3 ┆ ┆ ┆ ┆ ┆ ┆ GTGTGTGTGTGTGTG ┆ LORVZehhegV_Qh ┆ │\n", | |
"│ 13… ┆ ┆ ┆ ┆ ┆ ┆ TG… ┆ hhhh… ┆ │\n", | |
"└─────────────────┴──────┴───────┴─────────┴───┴──────┴─────────────────┴────────────────┴─────────┘" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = open(\"../fixtures/sample.bam\", \"rb\")\n", | |
"g = open(\"../fixtures/sample.bam.bai\", \"rb\")\n", | |
"ipc = ox.read_bam(f, index=g)\n", | |
"pl.read_ipc(ipc)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><style>\n", | |
".dataframe > thead > tr > th,\n", | |
".dataframe > tbody > tr > td {\n", | |
" text-align: right;\n", | |
"}\n", | |
"</style>\n", | |
"<small>shape: (160_178, 12)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>qname</th><th>flag</th><th>rname</th><th>pos</th><th>mapq</th><th>cigar</th><th>rnext</th><th>pnext</th><th>tlen</th><th>seq</th><th>qual</th><th>end</th></tr><tr><td>str</td><td>u16</td><td>cat</td><td>i32</td><td>u8</td><td>str</td><td>cat</td><td>i32</td><td>i32</td><td>str</td><td>str</td><td>i32</td></tr></thead><tbody><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>82736</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82517</td><td>-319</td><td>"TAAAAAAGAATGCA…</td><td>"??????????????…</td><td>82835</td></tr><tr><td>"SRR4435251::::…</td><td>147</td><td>"chr1"</td><td>82742</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82511</td><td>-331</td><td>"AGAATGCAGATATT…</td><td>"??????????????…</td><td>82841</td></tr><tr><td>"SRR4435251::::…</td><td>163</td><td>"chr1"</td><td>82744</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82982</td><td>338</td><td>"AATGCAGATATTAC…</td><td>"??????????????…</td><td>82843</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>82748</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82488</td><td>-360</td><td>"CAGATATTACAAAA…</td><td>"??????????????…</td><td>82847</td></tr><tr><td>"SRR4435251::::…</td><td>147</td><td>"chr1"</td><td>82749</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82506</td><td>-343</td><td>"AGATATTACAAAAC…</td><td>"??????????????…</td><td>82848</td></tr><tr><td>"SRR4435251::::…</td><td>147</td><td>"chr1"</td><td>82757</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82506</td><td>-351</td><td>"CAAAACCAGTTTAC…</td><td>"??????????????…</td><td>82856</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>82759</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82487</td><td>-372</td><td>"AAACCAGTTTACAA…</td><td>"?????????5????…</td><td>82858</td></tr><tr><td>"SRR4435251::::…</td><td>147</td><td>"chr1"</td><td>82771</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82342</td><td>-529</td><td>"AAAAGTTACTAAAC…</td><td>"????????5?5???…</td><td>82870</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>82781</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82360</td><td>-521</td><td>"AAACAAATAAAAAC…</td><td>"??????????????…</td><td>82880</td></tr><tr><td>"SRR4435251::::…</td><td>177</td><td>"chr1"</td><td>82781</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82816</td><td>0</td><td>"AAACAAATAAAAAC…</td><td>"?????5++?5'5+?…</td><td>82880</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>82790</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82492</td><td>-398</td><td>"AAAACTACATCCCA…</td><td>"??????????????…</td><td>82889</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>82790</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>82489</td><td>-401</td><td>"AAAACTACATCCCA…</td><td>"??????????????…</td><td>82889</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"SRR4435251::::…</td><td>163</td><td>"chr1"</td><td>528781</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>529030</td><td>349</td><td>"TGATCAGTCCTTGT…</td><td>"??????????????…</td><td>528880</td></tr><tr><td>"SRR4435251::::…</td><td>163</td><td>"chr1"</td><td>528781</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>529022</td><td>341</td><td>"TGATCAGTCCTTGT…</td><td>"??????????????…</td><td>528880</td></tr><tr><td>"SRR4435251::::…</td><td>99</td><td>"chr1"</td><td>528786</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528833</td><td>147</td><td>"AGTCCTTGTCTGGT…</td><td>"??????????????…</td><td>528885</td></tr><tr><td>"SRR4435251::::…</td><td>99</td><td>"chr1"</td><td>528786</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>529077</td><td>391</td><td>"AGTCCTTGTCTGGT…</td><td>"??????????????…</td><td>528885</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>528788</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528591</td><td>-297</td><td>"TCCTTGTCTGGTCT…</td><td>"??????????????…</td><td>528887</td></tr><tr><td>"SRR4435251::::…</td><td>99</td><td>"chr1"</td><td>528790</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>529022</td><td>332</td><td>"CTTGTCTGGTCTGG…</td><td>"???????????5??…</td><td>528889</td></tr><tr><td>"SRR4435251::::…</td><td>99</td><td>"chr1"</td><td>528790</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>529006</td><td>316</td><td>"CTTGTCTGGTCTGG…</td><td>"??????????????…</td><td>528889</td></tr><tr><td>"SRR4435251::::…</td><td>147</td><td>"chr1"</td><td>528790</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528599</td><td>-291</td><td>"CTTGTCTGGTCTGG…</td><td>"??????????????…</td><td>528889</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>528792</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528483</td><td>-409</td><td>"TGTCTGGTCTGGCT…</td><td>"??????????????…</td><td>528891</td></tr><tr><td>"SRR4435251::::…</td><td>83</td><td>"chr1"</td><td>528800</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528695</td><td>-205</td><td>"CTGGCTCTGCCCCA…</td><td>"??5????????5?5…</td><td>528899</td></tr><tr><td>"SRR4435251::::…</td><td>147</td><td>"chr1"</td><td>528803</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528567</td><td>-336</td><td>"GCTCTGCCCCACTC…</td><td>"????????5?5???…</td><td>528902</td></tr><tr><td>"SRR4435251::::…</td><td>99</td><td>"chr1"</td><td>528805</td><td>0</td><td>"100M"</td><td>"chr1"</td><td>528978</td><td>268</td><td>"TCTGCCCCACTCTC…</td><td>"???????+??????…</td><td>528904</td></tr></tbody></table></div>" | |
], | |
"text/plain": [ | |
"shape: (160_178, 12)\n", | |
"┌──────────────────┬──────┬───────┬────────┬───┬──────┬─────────────────┬─────────────────┬────────┐\n", | |
"│ qname ┆ flag ┆ rname ┆ pos ┆ … ┆ tlen ┆ seq ┆ qual ┆ end │\n", | |
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", | |
"│ str ┆ u16 ┆ cat ┆ i32 ┆ ┆ i32 ┆ str ┆ str ┆ i32 │\n", | |
"╞══════════════════╪══════╪═══════╪════════╪═══╪══════╪═════════════════╪═════════════════╪════════╡\n", | |
"│ SRR4435251::::31 ┆ 83 ┆ chr1 ┆ 82736 ┆ … ┆ -319 ┆ TAAAAAAGAATGCAG ┆ ??????????????? ┆ 82835 │\n", | |
"│ 3654063 ┆ ┆ ┆ ┆ ┆ ┆ ATATTACAAAACCAG ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ TT… ┆ ??… ┆ │\n", | |
"│ SRR4435251::::13 ┆ 147 ┆ chr1 ┆ 82742 ┆ … ┆ -331 ┆ AGAATGCAGATATTA ┆ ??????????????? ┆ 82841 │\n", | |
"│ 56039 ┆ ┆ ┆ ┆ ┆ ┆ CAAAACCAGTTTACA ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ AA… ┆ ??… ┆ │\n", | |
"│ SRR4435251::::27 ┆ 163 ┆ chr1 ┆ 82744 ┆ … ┆ 338 ┆ AATGCAGATATTACA ┆ ??????????????? ┆ 82843 │\n", | |
"│ 0890793 ┆ ┆ ┆ ┆ ┆ ┆ AAACCAGTTTACAAA ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ AG… ┆ ??… ┆ │\n", | |
"│ SRR4435251::::13 ┆ 83 ┆ chr1 ┆ 82748 ┆ … ┆ -360 ┆ CAGATATTACAAAAC ┆ ??????????????? ┆ 82847 │\n", | |
"│ 56040 ┆ ┆ ┆ ┆ ┆ ┆ CAGTTTACAAAAGTT ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ AC… ┆ ??… ┆ │\n", | |
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", | |
"│ SRR4435251::::45 ┆ 83 ┆ chr1 ┆ 528792 ┆ … ┆ -409 ┆ TGTCTGGTCTGGCTC ┆ ??????????????? ┆ 528891 │\n", | |
"│ 5831805 ┆ ┆ ┆ ┆ ┆ ┆ TGCCCCACTCTCCTT ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ CT… ┆ ??… ┆ │\n", | |
"│ SRR4435251::::13 ┆ 83 ┆ chr1 ┆ 528800 ┆ … ┆ -205 ┆ CTGGCTCTGCCCCAC ┆ ??5????????5?5? ┆ 528899 │\n", | |
"│ 78464 ┆ ┆ ┆ ┆ ┆ ┆ TCTCCTTCTCTCCTA ┆ ??5'55+???+55?? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ GT… ┆ ??… ┆ │\n", | |
"│ SRR4435251::::13 ┆ 147 ┆ chr1 ┆ 528803 ┆ … ┆ -336 ┆ GCTCTGCCCCACTCT ┆ ????????5?5???? ┆ 528902 │\n", | |
"│ 78465 ┆ ┆ ┆ ┆ ┆ ┆ CCTTCTCACCTAGTT ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ GG… ┆ ??… ┆ │\n", | |
"│ SRR4435251::::27 ┆ 99 ┆ chr1 ┆ 528805 ┆ … ┆ 268 ┆ TCTGCCCCACTCTCC ┆ ???????+??????? ┆ 528904 │\n", | |
"│ 0862085 ┆ ┆ ┆ ┆ ┆ ┆ TTTTCACCTAGTTGG ┆ ??????????????? ┆ │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ AA… ┆ 5?… ┆ │\n", | |
"└──────────────────┴──────┴───────┴────────┴───┴──────┴─────────────────┴─────────────────┴────────┘" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = smart_open.open(\"https://oxbow-ngs.s3.us-east-2.amazonaws.com/example.bam\", \"rb\")\n", | |
"g = smart_open.open(\"https://oxbow-ngs.s3.us-east-2.amazonaws.com/example.bam.bai\", \"rb\")\n", | |
"ipc = ox.read_bam(f, index=g)\n", | |
"pl.read_ipc(ipc)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## File-like on BBI" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"f = smart_open.open(\n", | |
" \"https://oxbow-ngs.s3.us-east-2.amazonaws.com/valid.bigWig\", \"rb\"\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><style>\n", | |
".dataframe > thead > tr > th,\n", | |
".dataframe > tbody > tr > td {\n", | |
" text-align: right;\n", | |
"}\n", | |
"</style>\n", | |
"<small>shape: (100_000, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>start</th><th>end</th><th>value</th></tr><tr><td>cat</td><td>u32</td><td>u32</td><td>f32</td></tr></thead><tbody><tr><td>"chr17"</td><td>59898</td><td>59900</td><td>0.06792</td></tr><tr><td>"chr17"</td><td>59900</td><td>59947</td><td>0.16627</td></tr><tr><td>"chr17"</td><td>59947</td><td>59999</td><td>0.85137</td></tr><tr><td>"chr17"</td><td>59999</td><td>60044</td><td>0.86883</td></tr><tr><td>"chr17"</td><td>60044</td><td>60046</td><td>0.80091</td></tr><tr><td>"chr17"</td><td>60046</td><td>60072</td><td>0.70256</td></tr><tr><td>"chr17"</td><td>60072</td><td>60145</td><td>0.01746</td></tr><tr><td>"chr17"</td><td>60484</td><td>60485</td><td>0.19197</td></tr><tr><td>"chr17"</td><td>60485</td><td>60486</td><td>0.30831</td></tr><tr><td>"chr17"</td><td>60486</td><td>60493</td><td>0.40066</td></tr><tr><td>"chr17"</td><td>60493</td><td>60496</td><td>0.40898</td></tr><tr><td>"chr17"</td><td>60496</td><td>60497</td><td>0.4499</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"chr17"</td><td>216049</td><td>216050</td><td>283.279999</td></tr><tr><td>"chr17"</td><td>216050</td><td>216051</td><td>277.049011</td></tr><tr><td>"chr17"</td><td>216051</td><td>216052</td><td>272.397003</td></tr><tr><td>"chr17"</td><td>216052</td><td>216053</td><td>265.96701</td></tr><tr><td>"chr17"</td><td>216053</td><td>216054</td><td>257.497986</td></tr><tr><td>"chr17"</td><td>216054</td><td>216055</td><td>251.712997</td></tr><tr><td>"chr17"</td><td>216055</td><td>216056</td><td>245.505005</td></tr><tr><td>"chr17"</td><td>216056</td><td>216057</td><td>238.621994</td></tr><tr><td>"chr17"</td><td>216057</td><td>216058</td><td>233.033005</td></tr><tr><td>"chr17"</td><td>216058</td><td>216059</td><td>226.184006</td></tr><tr><td>"chr17"</td><td>216059</td><td>216060</td><td>221.373993</td></tr><tr><td>"chr17"</td><td>216060</td><td>216061</td><td>216.485001</td></tr></tbody></table></div>" | |
], | |
"text/plain": [ | |
"shape: (100_000, 4)\n", | |
"┌───────┬────────┬────────┬────────────┐\n", | |
"│ chrom ┆ start ┆ end ┆ value │\n", | |
"│ --- ┆ --- ┆ --- ┆ --- │\n", | |
"│ cat ┆ u32 ┆ u32 ┆ f32 │\n", | |
"╞═══════╪════════╪════════╪════════════╡\n", | |
"│ chr17 ┆ 59898 ┆ 59900 ┆ 0.06792 │\n", | |
"│ chr17 ┆ 59900 ┆ 59947 ┆ 0.16627 │\n", | |
"│ chr17 ┆ 59947 ┆ 59999 ┆ 0.85137 │\n", | |
"│ chr17 ┆ 59999 ┆ 60044 ┆ 0.86883 │\n", | |
"│ … ┆ … ┆ … ┆ … │\n", | |
"│ chr17 ┆ 216057 ┆ 216058 ┆ 233.033005 │\n", | |
"│ chr17 ┆ 216058 ┆ 216059 ┆ 226.184006 │\n", | |
"│ chr17 ┆ 216059 ┆ 216060 ┆ 221.373993 │\n", | |
"│ chr17 ┆ 216060 ┆ 216061 ┆ 216.485001 │\n", | |
"└───────┴────────┴────────┴────────────┘" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ipc = ox.read_bigwig(f)\n", | |
"pl.read_ipc(ipc)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g = smart_open.open(\n", | |
" \"https://oxbow-ngs.s3.us-east-2.amazonaws.com/small.bigBed\", \"rb\"\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><style>\n", | |
".dataframe > thead > tr > th,\n", | |
".dataframe > tbody > tr > td {\n", | |
" text-align: right;\n", | |
"}\n", | |
"</style>\n", | |
"<small>shape: (27, 11)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>start</th><th>end</th><th>name</th><th>score</th><th>strand</th><th>thickStart</th><th>thickEnd</th><th>reserved</th><th>ccre</th><th>classification</th></tr><tr><td>cat</td><td>u32</td><td>u32</td><td>str</td><td>u32</td><td>str</td><td>u32</td><td>u32</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>"chr17"</td><td>118343</td><td>118595</td><td>"EH38E1838787"</td><td>0</td><td>"."</td><td>118343</td><td>118595</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>142029</td><td>142378</td><td>"EH38E1838788"</td><td>0</td><td>"."</td><td>142029</td><td>142378</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>143819</td><td>144169</td><td>"EH38E1838789"</td><td>0</td><td>"."</td><td>143819</td><td>144169</td><td>6218147</td><td>"DNase-only"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>156467</td><td>156703</td><td>"EH38E1838794"</td><td>0</td><td>"."</td><td>156467</td><td>156703</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>163674</td><td>163938</td><td>"EH38E1838796"</td><td>0</td><td>"."</td><td>163674</td><td>163938</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>164413</td><td>164566</td><td>"EH38E1838797"</td><td>0</td><td>"."</td><td>164413</td><td>164566</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>164585</td><td>164908</td><td>"EH38E1838798"</td><td>0</td><td>"."</td><td>164585</td><td>164908</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>177817</td><td>178148</td><td>"EH38E1838801"</td><td>0</td><td>"."</td><td>177817</td><td>178148</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>178300</td><td>178458</td><td>"EH38E1838802"</td><td>0</td><td>"."</td><td>178300</td><td>178458</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>178776</td><td>179115</td><td>"EH38E1838803"</td><td>0</td><td>"."</td><td>178776</td><td>179115</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>179132</td><td>179349</td><td>"EH38E1838804"</td><td>0</td><td>"."</td><td>179132</td><td>179349</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>179796</td><td>180018</td><td>"EH38E1838805"</td><td>0</td><td>"."</td><td>179796</td><td>180018</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"chr17"</td><td>181270</td><td>181500</td><td>"EH38E1838809"</td><td>0</td><td>"."</td><td>181270</td><td>181500</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>181586</td><td>181802</td><td>"EH38E1838810"</td><td>0</td><td>"."</td><td>181586</td><td>181802</td><td>6218147</td><td>"DNase-only"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>182023</td><td>182199</td><td>"EH38E1838811"</td><td>0</td><td>"."</td><td>182023</td><td>182199</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>182262</td><td>182430</td><td>"EH38E1838812"</td><td>0</td><td>"."</td><td>182262</td><td>182430</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>182459</td><td>182806</td><td>"EH38E1838813"</td><td>0</td><td>"."</td><td>182459</td><td>182806</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>183025</td><td>183365</td><td>"EH38E1838814"</td><td>0</td><td>"."</td><td>183025</td><td>183365</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>185648</td><td>185892</td><td>"EH38E1838816"</td><td>0</td><td>"."</td><td>185648</td><td>185892</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>186808</td><td>187143</td><td>"EH38E1838818"</td><td>0</td><td>"."</td><td>186808</td><td>187143</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>193945</td><td>194105</td><td>"EH38E1838821"</td><td>0</td><td>"."</td><td>193945</td><td>194105</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>194418</td><td>194762</td><td>"EH38E1838822"</td><td>0</td><td>"."</td><td>194418</td><td>194762</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>195326</td><td>195486</td><td>"EH38E1838824"</td><td>0</td><td>"."</td><td>195326</td><td>195486</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr><tr><td>"chr17"</td><td>199123</td><td>199461</td><td>"EH38E1838826"</td><td>0</td><td>"."</td><td>199123</td><td>199461</td><td>225225225</td><td>"Low-DNase"</td><td>"Missing-data/P…</td></tr></tbody></table></div>" | |
], | |
"text/plain": [ | |
"shape: (27, 11)\n", | |
"┌───────┬────────┬────────┬──────────────┬───┬──────────┬───────────┬────────────┬─────────────────┐\n", | |
"│ chrom ┆ start ┆ end ┆ name ┆ … ┆ thickEnd ┆ reserved ┆ ccre ┆ classification │\n", | |
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", | |
"│ cat ┆ u32 ┆ u32 ┆ str ┆ ┆ u32 ┆ u32 ┆ str ┆ str │\n", | |
"╞═══════╪════════╪════════╪══════════════╪═══╪══════════╪═══════════╪════════════╪═════════════════╡\n", | |
"│ chr17 ┆ 118343 ┆ 118595 ┆ EH38E1838787 ┆ … ┆ 118595 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ chr17 ┆ 142029 ┆ 142378 ┆ EH38E1838788 ┆ … ┆ 142378 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ chr17 ┆ 143819 ┆ 144169 ┆ EH38E1838789 ┆ … ┆ 144169 ┆ 6218147 ┆ DNase-only ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ chr17 ┆ 156467 ┆ 156703 ┆ EH38E1838794 ┆ … ┆ 156703 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", | |
"│ chr17 ┆ 193945 ┆ 194105 ┆ EH38E1838821 ┆ … ┆ 194105 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ chr17 ┆ 194418 ┆ 194762 ┆ EH38E1838822 ┆ … ┆ 194762 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ chr17 ┆ 195326 ┆ 195486 ┆ EH38E1838824 ┆ … ┆ 195486 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"│ chr17 ┆ 199123 ┆ 199461 ┆ EH38E1838826 ┆ … ┆ 199461 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n", | |
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n", | |
"└───────┴────────┴────────┴──────────────┴───┴──────────┴───────────┴────────────┴─────────────────┘" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ipc = ox.read_bigbed(g)\n", | |
"pl.read_ipc(ipc)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "oxbow", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.4" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment