Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Last active December 15, 2015 22:18
Show Gist options
  • Save BenLangmead/5331666 to your computer and use it in GitHub Desktop.
Save BenLangmead/5331666 to your computer and use it in GitHub Desktop.
def neighbors1mm(kmer, alpha):
""" Generate all neighbors at Hamming distance 1 from kmer """
for j in xrange(len(kmer)-1, -1, -1):
for c in alpha:
if c == kmer[j]: continue
yield kmer[:j] + c + kmer[j+1:]
def correct1mm(read, k, kmerhist, alpha, thresh):
""" Return an error-corrected version of read. k = k-mer length
for k-mer count profile. kmerhist maps distinct kmers to their
counts. alpha is the alphabet. thresh is threshold above
which k-mers are considered "frequent". """
# Iterate over k-mers in read
for i in xrange(0, len(read)-(k-1)):
kmer = read[i:i+k]
# If k-mer is infrequent...
if kmerhist.get(kmer, 0) <= thresh:
# Look for a frequent neighbor
for newkmer in neighbors1mm(kmer, alpha):
if kmerhist.get(newkmer, 0) > thresh:
# Found a frequent neighbor; replace old kmer
# with neighbor
read = read[:i] + newkmer + read[i+k:]
break
# Return possibly-corrected read
return read
def kmerHist(reads, k):
""" Return map from distinct k-mer to # times it occurs """
kmerhist = {}
for read in reads:
for kmer in [ read[i:i+k] for i in xrange(0, len(read)-(k-1)) ]:
kmerhist[kmer] = kmerhist.get(kmer, 0) + 1
return kmerhist
def correctAll1mm(reads, k, alpha, thresh=1):
""" Return list of error-corrected reads """
khist = kmerHist(reads, k)
return [ correct1mm(read, k, khist, alpha, thresh) for read in reads ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment