Created
February 7, 2020 11:23
-
-
Save thanhleviet/b3d2b096d0781bab74f94a12310253e2 to your computer and use it in GitHub Desktop.
Gen SampleSheet for irida
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Author: thanh le viet 12-2019 | |
import pathlib | |
import click | |
import re | |
@click.command() | |
@click.option('--pid', default='', help='Project ID') | |
@click.option('--pe/--se', default=True, help='Paired-End reads?') | |
@click.option('--sort/--no-sort', default=False, help='Sort samples [Experiment]') | |
@click.option('--path', default='', help='Path to fastq files') | |
@click.option('--pattern', default='*_R1_001.fastq.gz', help='Path to fastq files') | |
def gen_sample_list(path, pattern, pid, pe, sort): | |
""" | |
:param sort: Sorting sample in numeric order | |
:param path: path to the folder of fastq files | |
:param pattern: pattern to scan, by default: *_R1_001.fastq.gz | |
:param pid: IRIDA project ID, you need to manually create in IRIDA control management | |
:param pe: Are the reads paired-end? | |
:return: A csv file SampleList.csv | |
""" | |
regex = re.compile("_S[0-9]{1,3}") | |
p = pathlib.Path(path) | |
fastqs = p.rglob(pattern) | |
fastq_names = [fq.name for fq in fastqs] | |
_sorted_fastq_names = fastq_names | |
# Sort sample | |
if sort: | |
_sorted_fastq_names = sorted( | |
fastq_names, key=lambda s: int(regex.split(s)[0].split("_")[-1])) | |
sample_ids = [regex.split(sp)[0] for sp in _sorted_fastq_names] | |
sample_file = p / 'SampleList.csv' | |
# print(sorted(fastq_names, key=lambda s: int(regex.split(s)[0].split("_")[-1]))) | |
with sample_file.open(mode='w') as fh: | |
fh.write('[Data]\n') | |
fh.write("Sample_Name,Project_ID,File_Forward,File_Reverse\n") | |
reverse_reads = "" | |
for i in range(0, len(_sorted_fastq_names)): | |
if pe: | |
reverse_reads = _sorted_fastq_names[i].replace("_R1", "_R2") | |
fh.write( | |
f"{sample_ids[i]}, {pid}, {_sorted_fastq_names[i]}, {reverse_reads}\n") | |
print("Finish!") | |
if __name__ == '__main__': | |
gen_sample_list() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment