Created
June 25, 2014 13:07
-
-
Save Winterflower/e5f27356c0d88ef18305 to your computer and use it in GitHub Desktop.
generating a kmer tree with python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
def kmer(sequence,k): | |
length=len(sequence) | |
#if the kmer size if greater than the lenght of the sequene | |
if k>=length: | |
print "You specified a kmer size, which if greater or equal to the length of the sequence" | |
return | |
stepsize=k-1 | |
i=0 | |
kmers=[] | |
while i+stepsize<length: | |
kmers.append(sequence[i:i+k]) | |
i+=1 | |
print kmers | |
kmer("ABCDGFGFGFGFGFGG",90) | |
#OUTPUT ALL OF THE POSSIBLE KMERS FROM A PREDEFINED ALPHABET | |
def all_kmers(alphabet,k): | |
nodenumber=len(alphabet) | |
kmerTree=[] | |
return kmerTree | |
#returns a list of lists | |
def makesubtree(rootnode, number_of_child_nodes): | |
subtree=[[] for i in range(4)] | |
subtree.insert(0,rootnode) | |
return subtree | |
def make_alphabetsubtree(alphabet): | |
#initialise the alphabet subtree as an empty list | |
alphabet_subtree=[] | |
for letter in alphabet: | |
alphabet_subtree.append([letter]) | |
return alphabet_subtree | |
#test the makesubtree function | |
print makesubtree('/',4) | |
#make the main tree | |
maintree=makesubtree('/',4) | |
print maintree | |
#insert a subtree into the maintree | |
sub1=['A'] | |
maintree.append(sub1) | |
print maintree | |
sub2=make_alphabetsubtree('ACGT') | |
print sub2 | |
sub2.insert(0,'/') | |
#append alphabet subtree | |
def appendtotree(roottree,parent,node): | |
if parent<1 or parent>4: | |
print "Please select a tree level between 1 and 4" | |
roottree[parent].append(node) | |
return roottree | |
print appendtotree(sub2,1,sub1) | |
#make the final roottree | |
final_tree=make_alphabetsubtree('ACGT') | |
final_tree.insert(0,'/') | |
#try to append an 'A' node to all parents in level1 | |
for i in range(4): | |
for letter in 'ATCG': | |
appendtotree(final_tree,i+1,[letter]) | |
print final_tree | |
for k in range(4): | |
for i in range(4): | |
for letter in 'ATCG': | |
appendtotree(final_tree[k+1],i+1,[letter]) | |
print "new tree" | |
print "" | |
print final_tree | |
print "=======NEW SECTION=========" | |
#make a test tree | |
test_tree=make_alphabetsubtree('ACD') | |
test_tree.insert(0,'/') | |
for i in range(3): | |
for letter in 'ACD': | |
appendtotree(test_tree,i+1,[letter]) | |
print test_tree | |
print test_tree[1:4] | |
print "========PARENTNODE===========" | |
for parentnode in test_tree[1:4]: | |
print parentnode[0] | |
for childnode in parentnode: | |
print childnode | |
print "=================TESTING CONCANTENATION================" | |
#make an empty dictionary to hold the kmers | |
kmer_dict={} | |
for parentnode in test_tree[1:4]: | |
kmer_dict[parentnode[0]]=[] | |
#initialise the root string | |
for childnode in parentnode: | |
root=parentnode[0] | |
root+=childnode[0] | |
print root | |
kmer_dict[parentnode[0]].append(root) | |
print "==================FINAL KMER DICT===========" | |
print kmer_dict | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment