Skip to content

Instantly share code, notes, and snippets.

@Winterflower
Created June 25, 2014 13:07
Show Gist options
  • Save Winterflower/e5f27356c0d88ef18305 to your computer and use it in GitHub Desktop.
Save Winterflower/e5f27356c0d88ef18305 to your computer and use it in GitHub Desktop.
generating a kmer tree with python
# -*- coding: utf-8 -*-
def kmer(sequence,k):
length=len(sequence)
#if the kmer size if greater than the lenght of the sequene
if k>=length:
print "You specified a kmer size, which if greater or equal to the length of the sequence"
return
stepsize=k-1
i=0
kmers=[]
while i+stepsize<length:
kmers.append(sequence[i:i+k])
i+=1
print kmers
kmer("ABCDGFGFGFGFGFGG",90)
#OUTPUT ALL OF THE POSSIBLE KMERS FROM A PREDEFINED ALPHABET
def all_kmers(alphabet,k):
nodenumber=len(alphabet)
kmerTree=[]
return kmerTree
#returns a list of lists
def makesubtree(rootnode, number_of_child_nodes):
subtree=[[] for i in range(4)]
subtree.insert(0,rootnode)
return subtree
def make_alphabetsubtree(alphabet):
#initialise the alphabet subtree as an empty list
alphabet_subtree=[]
for letter in alphabet:
alphabet_subtree.append([letter])
return alphabet_subtree
#test the makesubtree function
print makesubtree('/',4)
#make the main tree
maintree=makesubtree('/',4)
print maintree
#insert a subtree into the maintree
sub1=['A']
maintree.append(sub1)
print maintree
sub2=make_alphabetsubtree('ACGT')
print sub2
sub2.insert(0,'/')
#append alphabet subtree
def appendtotree(roottree,parent,node):
if parent<1 or parent>4:
print "Please select a tree level between 1 and 4"
roottree[parent].append(node)
return roottree
print appendtotree(sub2,1,sub1)
#make the final roottree
final_tree=make_alphabetsubtree('ACGT')
final_tree.insert(0,'/')
#try to append an 'A' node to all parents in level1
for i in range(4):
for letter in 'ATCG':
appendtotree(final_tree,i+1,[letter])
print final_tree
for k in range(4):
for i in range(4):
for letter in 'ATCG':
appendtotree(final_tree[k+1],i+1,[letter])
print "new tree"
print ""
print final_tree
print "=======NEW SECTION========="
#make a test tree
test_tree=make_alphabetsubtree('ACD')
test_tree.insert(0,'/')
for i in range(3):
for letter in 'ACD':
appendtotree(test_tree,i+1,[letter])
print test_tree
print test_tree[1:4]
print "========PARENTNODE==========="
for parentnode in test_tree[1:4]:
print parentnode[0]
for childnode in parentnode:
print childnode
print "=================TESTING CONCANTENATION================"
#make an empty dictionary to hold the kmers
kmer_dict={}
for parentnode in test_tree[1:4]:
kmer_dict[parentnode[0]]=[]
#initialise the root string
for childnode in parentnode:
root=parentnode[0]
root+=childnode[0]
print root
kmer_dict[parentnode[0]].append(root)
print "==================FINAL KMER DICT==========="
print kmer_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment