Skip to content

Instantly share code, notes, and snippets.

@lmiller1990
Last active December 21, 2024 13:26
Show Gist options
  • Save lmiller1990/03047ffa69cff6a202d0da8a78611e8a to your computer and use it in GitHub Desktop.
Save lmiller1990/03047ffa69cff6a202d0da8a78611e8a to your computer and use it in GitHub Desktop.
debruijn
from typing import Optional
class Node:
def __init__(self, label: str):
self.label = label
self.edge: Optional["Edge"] = None
class Edge:
def __init__(self, label: str, parent: Node):
self.label = label
self.parent = parent
self.next: Optional[Node] = None
def make_graph(dna: str, k=3) -> Node:
first: Optional[Node] = None
prev_node: Optional[Node] = None
for i in range(len(dna) - k + 2):
kmer = dna[i : i + 3]
node = Node(label=kmer[0 : k - 1])
edge = Edge(label=kmer, parent=node)
node.edge = edge
if not prev_node:
first = node
elif prev_node.edge:
prev_node.edge.next = node
prev_node = node
assert first is not None
return first
def group_by_node_labels(node: Node) -> dict[str, list[Node]]:
group: dict[str, list[Node]] = dict()
while node and node.edge:
if node.label in group:
group[node.label].append(node)
else:
group[node.label] = [node]
if node.edge.next:
node = node.edge.next
else:
break
return group
def print_graph(node: Node):
while node and node.edge and node.edge.next:
print(f"{node.label} -> {node.edge.label} -> {node.edge.next.label}")
node = node.edge.next
dna = "TAATGCCATGGGATGTT"
node = make_graph(dna)
print_graph(node)
groups = group_by_node_labels(node)
print(len(groups.keys()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment