Skip to content

Instantly share code, notes, and snippets.

@moritzschaefer
Created October 17, 2020 23:57
Show Gist options
  • Save moritzschaefer/08e15e840c5ba2e858b4e204874549b9 to your computer and use it in GitHub Desktop.
Save moritzschaefer/08e15e840c5ba2e858b4e204874549b9 to your computer and use it in GitHub Desktop.
'''
Download a file from SRA (using SRX accessions or anything else that
is queryable by esearch) and rename them according to a yaml file.
Example yaml file:
CXXC1:
signal:
- SRX373166
control:
- SRX373169
E2F1:
signal:
- SRX2897263
- SRX2897262
control:
'''
import os
import subprocess
import yaml
with open('samples.yaml') as f:
data = yaml.load(f)
for tf, samples in data.items():
for sample_type in ['signal', 'control']:
for srx in samples[sample_type]:
print(f'Downloading {srx}')
p = subprocess.Popen(f'esearch -db sra -query {srx} | efetch --format runinfo | cut -d "," -f 1 | grep SRR',
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
srrs = [s.strip() for s in p.stdout.readlines()]
p2 = subprocess.Popen(f'fasterq-dump --split-files {" ".join(srrs)}',
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if p.result != 0:
nl = "\n"
print(f'downloading {srx} failed: {nl.join(p.stdout.readlines(-1))}\n\n{nl.join(p.stderr.readlines(-1))}')
else:
print(f'Succesfully downloaded {srx}')
for i, srr in enumerate(srrs):
try:
os.rename(f'{srr}.fastq', f'{tf}_{sample_type}_{i+1}_R1.fastq')
except Exception as e:
print(e)
for r in range(1,3):
os.rename(f'{srr}_{r}.fastq', f'{tf}_{sample_type}_{i+1}_R{r}.fastq')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment