Skip to content

Instantly share code, notes, and snippets.

@alexpreynolds
Created February 16, 2021 17:48
Show Gist options
  • Save alexpreynolds/aee9f5d220e13abfe66435dc086178ed to your computer and use it in GitHub Desktop.
Save alexpreynolds/aee9f5d220e13abfe66435dc086178ed to your computer and use it in GitHub Desktop.
Create an indexed tabix file from a Pandas dataframe via subprocess
#!/usr/bin/env python
'''
Create an indexed tabix file from a Pandas dataframe
via temporary intermediate file and subprocess
'''
import os
import io
import pandas as pd
import tempfile
import subprocess
ds = io.StringIO('''chr1 842320 842327
chr1 842328 842330
chr1 842328 842330
chr1 855426 855427
chr1 855739 855740''')
df = pd.read_csv(ds, delimiter='\t', header=None)
with tempfile.NamedTemporaryFile() as temp_fh:
df.to_csv(temp_fh, sep='\t', header=False, index=False)
out_bgz_fn = "test_pd_subprocess.bed.gz"
with open(out_bgz_fn, 'wb') as out_bgz_fh:
res = subprocess.call(['bgzip', '-c', temp_fh.name], stdout=out_bgz_fh)
if res != 0 or os.stat(out_bgz_fn).st_size == 0:
raise Exception("Error: Could not create bgzip archive")
out_index_fn = "{}.tbi".format(out_bgz_fn)
if not os.path.exists(out_index_fn):
subprocess.call(['tabix', out_bgz_fn, '-p', 'bed'])
if not os.path.exists(out_index_fn) or os.stat(out_index_fn).st_size == 0:
raise Exception("Error: Could not create index of bgzip archive")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment