Created
May 23, 2017 12:33
-
-
Save geocarvalho/023f87998165f1320e13f6ff0ddb118a to your computer and use it in GitHub Desktop.
Format samplesheet to use in bcl2fastq
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio.Seq import Seq | |
import pandas as pd | |
import csv | |
import sys | |
import os | |
def reverse_complement(sequence): | |
""" | |
Give me the sequence and I give you the reverse-complement of it | |
""" | |
seq = Seq(str(sequence)) | |
reverse = str(seq.reverse_complement()) | |
return reverse | |
def main(): | |
path, arq = os.path.split(os.path.abspath(sys.argv[1])) | |
sample_sheet = path+"/"+arq | |
new_arq = path+"/"+arq.split(".")[0]+"-new.csv" | |
# Organize the DataFrame with the reverse complement | |
sample_df = pd.read_csv(sample_sheet, skiprows=20) | |
sample_df["index2"] = sample_df["index2"].apply(reverse_complement) | |
with open(sample_sheet) as master, open(new_arq, "w") as matched: | |
cr = csv.reader(master) | |
cw = csv.writer(matched) | |
for i in range(20): | |
cw.writerow(next(cr)) | |
sample_df.set_index("Sample_ID").to_csv(matched) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment