Created
February 16, 2021 17:48
-
-
Save alexpreynolds/aee9f5d220e13abfe66435dc086178ed to your computer and use it in GitHub Desktop.
Create an indexed tabix file from a Pandas dataframe via subprocess
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
Create an indexed tabix file from a Pandas dataframe | |
via temporary intermediate file and subprocess | |
''' | |
import os | |
import io | |
import pandas as pd | |
import tempfile | |
import subprocess | |
ds = io.StringIO('''chr1 842320 842327 | |
chr1 842328 842330 | |
chr1 842328 842330 | |
chr1 855426 855427 | |
chr1 855739 855740''') | |
df = pd.read_csv(ds, delimiter='\t', header=None) | |
with tempfile.NamedTemporaryFile() as temp_fh: | |
df.to_csv(temp_fh, sep='\t', header=False, index=False) | |
out_bgz_fn = "test_pd_subprocess.bed.gz" | |
with open(out_bgz_fn, 'wb') as out_bgz_fh: | |
res = subprocess.call(['bgzip', '-c', temp_fh.name], stdout=out_bgz_fh) | |
if res != 0 or os.stat(out_bgz_fn).st_size == 0: | |
raise Exception("Error: Could not create bgzip archive") | |
out_index_fn = "{}.tbi".format(out_bgz_fn) | |
if not os.path.exists(out_index_fn): | |
subprocess.call(['tabix', out_bgz_fn, '-p', 'bed']) | |
if not os.path.exists(out_index_fn) or os.stat(out_index_fn).st_size == 0: | |
raise Exception("Error: Could not create index of bgzip archive") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment