Skip to content

Instantly share code, notes, and snippets.

@alexpreynolds
Created July 15, 2022 06:51
Show Gist options
  • Save alexpreynolds/549e17359b8749b5dcf706dd6ab7beda to your computer and use it in GitHub Desktop.
Save alexpreynolds/549e17359b8749b5dcf706dd6ab7beda to your computer and use it in GitHub Desktop.
Split scores file by chromosome
#!/usr/bin/env python
import gzip
ASSEMBLY = "hg38"
chromosome_sizes = { "hg38" :
[
{
"name": 1,
"size": 248956422
},
{
"name": 2,
"size": 242193529
},
{
"name": 3,
"size": 198295559
},
{
"name": 4,
"size": 190214555
},
{
"name": 5,
"size": 181538259
},
{
"name": 6,
"size": 170805979
},
{
"name": 7,
"size": 159345973
},
{
"name": 8,
"size": 145138636
},
{
"name": 9,
"size": 138394717
},
{
"name": 10,
"size": 133797422
},
{
"name": 11,
"size": 135086622
},
{
"name": 12,
"size": 133275309
},
{
"name": 13,
"size": 114364328
},
{
"name": 14,
"size": 107043718
},
{
"name": 15,
"size": 101991189
},
{
"name": 16,
"size": 90338345
},
{
"name": 17,
"size": 83257441
},
{
"name": 18,
"size": 80373285
},
{
"name": 19,
"size": 58617616
},
{
"name": 20,
"size": 64444167
},
{
"name": 21,
"size": 46709983
},
{
"name": 22,
"size": 50818468
},
{
"name": "X",
"size": 156040895
},
{
"name": "Y",
"size": 57227415
}
]
}
'''
open file handles
'''
ofhs = {}
for cse in chromosome_sizes[ASSEMBLY]:
chrom = 'chr{}'.format(cse['name'])
ofn = 'scores.{}.txt.gz'.format(chrom)
ofh = gzip.open(ofn, 'wb')
ofhs[chrom] = ofh
'''
open master file and write data to chroms
'''
with gzip.open('scores.txt.gz', 'r') as ifh:
for l in ifh:
e = l.decode().rstrip().split('\t')
chrom = e[0]
ofhs[chrom].write(l)
'''
close file handles
'''
for cse in chromosome_sizes[ASSEMBLY]:
chrom = 'chr{}'.format(cse['name'])
ofh = ofhs[chrom]
ofh.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment