Created
October 17, 2020 23:57
-
-
Save moritzschaefer/08e15e840c5ba2e858b4e204874549b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Download a file from SRA (using SRX accessions or anything else that | |
is queryable by esearch) and rename them according to a yaml file. | |
Example yaml file: | |
CXXC1: | |
signal: | |
- SRX373166 | |
control: | |
- SRX373169 | |
E2F1: | |
signal: | |
- SRX2897263 | |
- SRX2897262 | |
control: | |
''' | |
import os | |
import subprocess | |
import yaml | |
with open('samples.yaml') as f: | |
data = yaml.load(f) | |
for tf, samples in data.items(): | |
for sample_type in ['signal', 'control']: | |
for srx in samples[sample_type]: | |
print(f'Downloading {srx}') | |
p = subprocess.Popen(f'esearch -db sra -query {srx} | efetch --format runinfo | cut -d "," -f 1 | grep SRR', | |
shell=True, | |
stdin=subprocess.PIPE, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
srrs = [s.strip() for s in p.stdout.readlines()] | |
p2 = subprocess.Popen(f'fasterq-dump --split-files {" ".join(srrs)}', | |
shell=True, | |
stdin=subprocess.PIPE, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
if p.result != 0: | |
nl = "\n" | |
print(f'downloading {srx} failed: {nl.join(p.stdout.readlines(-1))}\n\n{nl.join(p.stderr.readlines(-1))}') | |
else: | |
print(f'Succesfully downloaded {srx}') | |
for i, srr in enumerate(srrs): | |
try: | |
os.rename(f'{srr}.fastq', f'{tf}_{sample_type}_{i+1}_R1.fastq') | |
except Exception as e: | |
print(e) | |
for r in range(1,3): | |
os.rename(f'{srr}_{r}.fastq', f'{tf}_{sample_type}_{i+1}_R{r}.fastq') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment