Created
October 14, 2014 14:37
-
-
Save brantfaircloth/0e97c126d0dcbd015cb6 to your computer and use it in GitHub Desktop.
Easy Illumina Index Counter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
(c) 2014 Brant Faircloth || http://faircloth-lab.org/ | |
All rights reserved. | |
This code is distributed under a 3-clause BSD license. Please see | |
LICENSE.txt for more information. | |
Created on 14 October 2014 09:35 CDT (-0500) | |
""" | |
import gzip | |
from collections import Counter | |
barcode_combo_count = Counter() | |
with gzip.open('Undetermined_S0_L001_R1_001.fastq.gz', 'rb') as infile: | |
for line in infile: | |
if line.startswith('@'): | |
ls =line.strip().split(' ') | |
barcodes = ls[-1] | |
barcode_combo_count.update([barcodes.split(":")[-1]]) | |
# get most common 10 | |
barcode_combo_count.most_common(10) | |
# get total count of all reads | |
sum(barcode_combo_count.values()) | |
# get a subset of reads matching some barcode pattern - this | |
# assumes dual-indexes | |
new_counter = Counter({i:j for i,j in barcode_combo_count.iteritems() if "+AGATCTCG" in i}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment