Skip to content

Instantly share code, notes, and snippets.

@wdecoster
Created January 8, 2019 21:57
Show Gist options
  • Save wdecoster/bcebd528dea8fbebb435594c85a05b3d to your computer and use it in GitHub Desktop.
Save wdecoster/bcebd528dea8fbebb435594c85a05b3d to your computer and use it in GitHub Desktop.
from argparse import ArgumentParser
from Bio import SeqIO
from collections import Counter, defaultdict
import gzip
def main():
args = get_args()
primers = defaultdict(int)
for pattern in args.pattern:
for read in SeqIO.parse(gzip.open(args.fastq, 'rt'), "fastq"):
primer = get_pattern(str(read.seq), pattern, length=22)
if primer:
primers[primer] += 1
for sequence, number in Counter(primers).most_common(n=20):
print("{}\t{}x".format(sequence, number))
def get_pattern(seq, pattern, length):
if pattern in seq:
pos = seq.index(pattern)
return seq[pos - (length - len(pattern)): pos + len(pattern)]
def get_args():
parser = ArgumentParser(description="Get primers based on a fastq www.biostars.org/p/356286")
parser.add_argument("--pattern", help="pattern(s) to search for", nargs="*", required=True)
parser.add_argument("--fastq", help="gzip compressed fastq file", required=True)
return parser.parse_args()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment