Skip to content

Instantly share code, notes, and snippets.

@rolph-recto
Last active November 14, 2024 22:17
Show Gist options
  • Save rolph-recto/0e1ffb042f690fd09387920d24677ba4 to your computer and use it in GitHub Desktop.
Save rolph-recto/0e1ffb042f690fd09387920d24677ba4 to your computer and use it in GitHub Desktop.
Tarjan's path expressions algorithm
class Regex:
def simplify(self):
raise NotImplementedError
def __add__(self, other):
return RegexPlus(self, other)
def __mul__(self, other):
return RegexTimes(self, other)
class RegexOne(Regex):
def __init__(self):
pass
def simplify(self):
return self
def __str__(self):
return "1"
class RegexZero(Regex):
def __init__(self):
pass
def simplify(self):
return self
def __str__(self):
return "0"
class RegexPlus(Regex):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def simplify(self):
new_lhs = self.lhs.simplify()
new_rhs = self.rhs.simplify()
if isinstance(new_lhs, RegexZero):
return new_rhs
elif isinstance(new_rhs, RegexZero):
return new_lhs
else:
return RegexPlus(new_lhs, new_rhs)
def __str__(self):
return f"({self.lhs} + {self.rhs})"
class RegexTimes(Regex):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def simplify(self):
new_lhs = self.lhs.simplify()
new_rhs = self.rhs.simplify()
if isinstance(new_lhs, RegexZero) or isinstance(new_rhs, RegexZero):
return RegexZero()
elif isinstance(new_lhs, RegexOne):
return new_rhs
elif isinstance(new_rhs, RegexOne):
return new_lhs
else:
return RegexTimes(new_lhs, new_rhs)
def __str__(self):
return f"({self.lhs} * {self.rhs})"
class RegexStar(Regex):
def __init__(self, op):
self.op = op
def simplify(self):
new_op = self.op.simplify()
if isinstance(new_op, RegexZero) or isinstance(new_op, RegexOne):
return RegexOne()
else:
return RegexStar(new_op)
def __str__(self):
return f"{self.op}^*"
class RegexAtom(Regex):
def __init__(self, atom):
self.atom = atom
def simplify(self):
return self
def __str__(self):
return str(self.atom)
class DominatorTree:
def __init__(self, tree):
self.tree = tree
def immediateDominator(self, node):
for parent, children in self.tree.items():
if node in children:
return parent
return None
def children(self, node):
return self.tree[node]
def siblings(self, node):
for parent, children in self.tree.items():
if node in children:
return children
return None
def dominates(self, node1, node2):
if node1 == node2:
return True
elif node2 in self.tree[node1]:
return True
else:
for child in self.tree[node1]:
if self.dominates(child, node2):
return True
return False
def getDominatorInSet(self, node, nodeSet):
for curNode in nodeSet:
if self.dominates(curNode, node):
return curNode
return None
def getDominatorPath(self, start, end):
if start == end:
return [start]
else:
idom = self.immediateDominator(end)
return self.getDominatorPath(start, idom) + [end]
# compute derived graph
def computeDerivedGraph(cfg, domtree):
derivedEdges = []
edgeMap = dict()
for edge in cfg["edges"]:
# non-tree edge
if domtree.immediateDominator(edge[1]) != edge[0]:
siblingDom = domtree.getDominatorInSet(edge[0], domtree.siblings(edge[1]))
derivedEdge = (siblingDom, edge[1], edge)
derivedEdges.append(derivedEdge)
edgeMap[edge] = derivedEdge
return derivedEdges, edgeMap
# compute path expr representing paths from start to edge.dest
# that end in edge, using the values of dpath
def edgePaths(edge, start, dpath, domtree):
cur = RegexOne()
for dom in domtree.getDominatorPath(start, edge[0])[1:]:
cur = cur * dpath[dom]
return cur * RegexAtom(edge)
# same "eliminate" procedure from Tarjan's paper, similar in spirit to Kleene's algorithm
# compute a path sequence for the input graph
def eliminate(nodes, edges):
P = dict(((n1,n2), RegexZero()) for n1 in nodes for n2 in nodes)
for edge in edges:
if edge[0] in nodes and edge[1] in nodes:
P[(edge[0],edge[1])] = (P[(edge[0],edge[1])] + RegexAtom(edge)).simplify()
for i, ni in enumerate(nodes):
P[(ni,ni)] = RegexStar(P[(ni,ni)]).simplify()
for nj in nodes[i+1:]:
if not isinstance(P[(nj,ni)], RegexZero):
P[(nj,ni)] = (P[(nj,ni)] * P[(ni,ni)]).simplify()
for nk in nodes[i+1:]:
if not isinstance(P[(ni,nk)], RegexZero):
P[(nj,nk)] = (P[(nj,nk)] + (P[(nj,ni)] * P[(ni,nk)])).simplify()
pathSequence = []
for i, ni in enumerate(nodes):
for nj in nodes[i:]:
pathExpr = P[(ni,nj)]
if not isinstance(pathExpr, RegexOne) and not isinstance(pathExpr, RegexZero):
pathSequence.append((pathExpr, ni, nj))
for i in range(len(nodes)-1,-1,-1):
for j in range(i):
pathExpr = P[(nodes[i],nodes[j])]
if not isinstance(pathExpr, RegexZero):
pathSequence.append((pathExpr, nodes[i], nodes[j]))
return pathSequence
# compute path expressions between nodes
# this uses path sequences from "eliminate" algorithm above
def computePathExpressions(nodes, edges):
path = dict()
pathSequence = eliminate(nodes, edges)
for root in nodes:
for node in nodes:
path[(root,node)] = RegexOne() if root == node else RegexZero()
for pathExpr, src, dst in pathSequence:
if src == dst:
path[(root,src)] = (path[(root,src)] * pathExpr).simplify()
else:
path[(root,dst)] = (path[(root,dst)] + (path[(root,src)] * pathExpr)).simplify()
return path
# map path expr in derived graph to path expr in control flow graph,
# using image of edges in derived graph
def pathImg(imageMap, pathExpr):
if isinstance(pathExpr, RegexAtom):
return imageMap[pathExpr.atom]
elif isinstance(pathExpr, RegexStar):
return RegexStar(pathImg(imageMap, pathExpr.op))
elif isinstance(pathExpr, RegexPlus):
return RegexPlus(pathImg(imageMap, pathExpr.lhs), pathImg(imageMap, pathExpr.rhs))
elif isinstance(pathExpr, RegexTimes):
return RegexTimes(pathImg(imageMap, pathExpr.lhs), pathImg(imageMap, pathExpr.rhs))
elif isinstance(pathExpr, RegexOne) or isinstance(pathExpr, RegexZero):
return pathExpr
# postorder traversal of tree
def traversePostorder(root, tree):
nodes = []
for child in tree[root]:
nodes.extend(traversePostorder(child, tree))
nodes.append(root)
return nodes
# get tree edges for node
def tree(node, cfg, domtree):
edges = []
for edge in cfg["edges"]:
if node == edge[1]:
if domtree.immediateDominator(edge[1]) == edge[0]:
edges.append(edge)
return edges
# get non-tree edges for node
def nontree(node, cfg, domtree):
edges = []
for edge in cfg["edges"]:
if node == edge[1]:
if domtree.immediateDominator(edge[1]) != edge[0]:
edges.append(edge)
return edges
# Tarjan's "decompose and sequence" path expression algorithm
def tarjan(cfg, domtree):
global edgePaths
# returns derivedGraph
# edgeMap maps edges in CFG to edges in derived graph
derivedEdges, edgeMap = computeDerivedGraph(cfg, domtree)
path = dict()
dpath = dict((node, RegexZero()) for node in cfg["nodes"])
dpathTree = dict((node, RegexZero()) for node in cfg["nodes"])
# compute dpath in post-order traversal of domtree
postorderNodes = traversePostorder(cfg["entry"], domtree.tree)
for node in postorderNodes:
children = domtree.children(node)
imageMap = dict()
# compute dpathTree and image of edges in derived graph
for child in children:
dpathTree[child] = RegexZero()
for edge in tree(child, cfg, domtree):
dpathTree[child] = dpathTree[child] + RegexAtom(edge)
for edge in nontree(child, cfg, domtree):
siblingDom = edgeMap[edge][0]
imageMap[edgeMap[edge]] = edgePaths(edge, siblingDom, dpath, domtree)
# compute path expressions in derivedGraph between sibling nodes
derivedPath = computePathExpressions(children, derivedEdges)
# compute dpath
for child in children:
dpath[child] = RegexZero()
for sibling in children:
# compute image of path expr in derived graph
pathImage = pathImg(imageMap, derivedPath[(sibling,child)])
siblingPaths = dpathTree[sibling] * pathImage
dpath[child] = (dpath[child] + siblingPaths).simplify()
# compute path in reverse post-order traversal of domtree
for node in reversed(postorderNodes):
# special case for computing path[entry]
if node == cfg["entry"]:
path[cfg["entry"]] = RegexZero()
for edge in nontree(cfg["entry"], cfg, domtree):
edgePaths = edgePaths(edge, cfg["entry"], dpath, domtree)
path[cfg["entry"]] = path[cfg["entry"]] + edgePaths
path[cfg["entry"]] = RegexStar(path[cfg["entry"]]).simplify()
# for non-entry nodes, compute path by appending dpath[node] to path[idom]
else:
idom = domtree.immediateDominator(node)
path[node] = (path[idom] * dpath[node]).simplify()
return path
cfg = {
"entry": 1,
"edges": [(1,2),(2,3),(3,2),(2,4)],
"nodes": [1,2,3,4]
}
domtree = DominatorTree({
1: [2],
2: [3,4],
3: [],
4: []
})
path = tarjan(cfg, domtree)
for node in cfg["nodes"]:
print(f"path[{node}] = {path[node]}")
# expected output:
# path[1] = 1
# path[2] = ((1, 2) * ((2, 3) * (3, 2))^*)
# path[3] = (((1, 2) * ((2, 3) * (3, 2))^*) * (2, 3))
# path[4] = (((1, 2) * ((2, 3) * (3, 2))^*) * (2, 4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment