Skip to content

Instantly share code, notes, and snippets.

@nickloman
Created November 20, 2012 15:18
Show Gist options
  • Select an option

  • Save nickloman/4118536 to your computer and use it in GitHub Desktop.

Select an option

Save nickloman/4118536 to your computer and use it in GitHub Desktop.
Extract random pairs from a FASTA file and output fake FASTQ
from Bio import SeqIO
import sys
import random
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next()
insert_size = int(sys.argv[2])
rec_length = len(rec)
read_length = int(sys.argv[3])
number_seqs = int(sys.argv[4])
prefix = sys.argv[5]
fh1 = open("%s_1.fastq" % (prefix,), "w")
fh2 = open("%s_2.fastq" % (prefix,), "w")
for n in xrange(0, number_seqs):
random_pos = random.randint( 0 + insert_size, rec_length - insert_size )
insert = rec[random_pos : random_pos + insert_size]
fh = open("%s_1.fastq" % (prefix,))
print >>fh1, "@read%s/1" % (n,)
print >>fh1, str(insert[0 : read_length].seq)
print >>fh1, "+"
print >>fh1, "A" * read_length
print >>fh2, "@read%s/2" % (n,)
print >>fh2, str(insert[0-read_length :].reverse_complement().seq)
print >>fh2, "+"
print >>fh2, "A" * read_length
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment