Last active
November 14, 2024 22:17
-
-
Save rolph-recto/0e1ffb042f690fd09387920d24677ba4 to your computer and use it in GitHub Desktop.
Tarjan's path expressions algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Regex: | |
def simplify(self): | |
raise NotImplementedError | |
def __add__(self, other): | |
return RegexPlus(self, other) | |
def __mul__(self, other): | |
return RegexTimes(self, other) | |
class RegexOne(Regex): | |
def __init__(self): | |
pass | |
def simplify(self): | |
return self | |
def __str__(self): | |
return "1" | |
class RegexZero(Regex): | |
def __init__(self): | |
pass | |
def simplify(self): | |
return self | |
def __str__(self): | |
return "0" | |
class RegexPlus(Regex): | |
def __init__(self, lhs, rhs): | |
self.lhs = lhs | |
self.rhs = rhs | |
def simplify(self): | |
new_lhs = self.lhs.simplify() | |
new_rhs = self.rhs.simplify() | |
if isinstance(new_lhs, RegexZero): | |
return new_rhs | |
elif isinstance(new_rhs, RegexZero): | |
return new_lhs | |
else: | |
return RegexPlus(new_lhs, new_rhs) | |
def __str__(self): | |
return f"({self.lhs} + {self.rhs})" | |
class RegexTimes(Regex): | |
def __init__(self, lhs, rhs): | |
self.lhs = lhs | |
self.rhs = rhs | |
def simplify(self): | |
new_lhs = self.lhs.simplify() | |
new_rhs = self.rhs.simplify() | |
if isinstance(new_lhs, RegexZero) or isinstance(new_rhs, RegexZero): | |
return RegexZero() | |
elif isinstance(new_lhs, RegexOne): | |
return new_rhs | |
elif isinstance(new_rhs, RegexOne): | |
return new_lhs | |
else: | |
return RegexTimes(new_lhs, new_rhs) | |
def __str__(self): | |
return f"({self.lhs} * {self.rhs})" | |
class RegexStar(Regex): | |
def __init__(self, op): | |
self.op = op | |
def simplify(self): | |
new_op = self.op.simplify() | |
if isinstance(new_op, RegexZero) or isinstance(new_op, RegexOne): | |
return RegexOne() | |
else: | |
return RegexStar(new_op) | |
def __str__(self): | |
return f"{self.op}^*" | |
class RegexAtom(Regex): | |
def __init__(self, atom): | |
self.atom = atom | |
def simplify(self): | |
return self | |
def __str__(self): | |
return str(self.atom) | |
class DominatorTree: | |
def __init__(self, tree): | |
self.tree = tree | |
def immediateDominator(self, node): | |
for parent, children in self.tree.items(): | |
if node in children: | |
return parent | |
return None | |
def children(self, node): | |
return self.tree[node] | |
def siblings(self, node): | |
for parent, children in self.tree.items(): | |
if node in children: | |
return children | |
return None | |
def dominates(self, node1, node2): | |
if node1 == node2: | |
return True | |
elif node2 in self.tree[node1]: | |
return True | |
else: | |
for child in self.tree[node1]: | |
if self.dominates(child, node2): | |
return True | |
return False | |
def getDominatorInSet(self, node, nodeSet): | |
for curNode in nodeSet: | |
if self.dominates(curNode, node): | |
return curNode | |
return None | |
def getDominatorPath(self, start, end): | |
if start == end: | |
return [start] | |
else: | |
idom = self.immediateDominator(end) | |
return self.getDominatorPath(start, idom) + [end] | |
# compute derived graph | |
def computeDerivedGraph(cfg, domtree): | |
derivedEdges = [] | |
edgeMap = dict() | |
for edge in cfg["edges"]: | |
# non-tree edge | |
if domtree.immediateDominator(edge[1]) != edge[0]: | |
siblingDom = domtree.getDominatorInSet(edge[0], domtree.siblings(edge[1])) | |
derivedEdge = (siblingDom, edge[1], edge) | |
derivedEdges.append(derivedEdge) | |
edgeMap[edge] = derivedEdge | |
return derivedEdges, edgeMap | |
# compute path expr representing paths from start to edge.dest | |
# that end in edge, using the values of dpath | |
def edgePaths(edge, start, dpath, domtree): | |
cur = RegexOne() | |
for dom in domtree.getDominatorPath(start, edge[0])[1:]: | |
cur = cur * dpath[dom] | |
return cur * RegexAtom(edge) | |
# same "eliminate" procedure from Tarjan's paper, similar in spirit to Kleene's algorithm | |
# compute a path sequence for the input graph | |
def eliminate(nodes, edges): | |
P = dict(((n1,n2), RegexZero()) for n1 in nodes for n2 in nodes) | |
for edge in edges: | |
if edge[0] in nodes and edge[1] in nodes: | |
P[(edge[0],edge[1])] = (P[(edge[0],edge[1])] + RegexAtom(edge)).simplify() | |
for i, ni in enumerate(nodes): | |
P[(ni,ni)] = RegexStar(P[(ni,ni)]).simplify() | |
for nj in nodes[i+1:]: | |
if not isinstance(P[(nj,ni)], RegexZero): | |
P[(nj,ni)] = (P[(nj,ni)] * P[(ni,ni)]).simplify() | |
for nk in nodes[i+1:]: | |
if not isinstance(P[(ni,nk)], RegexZero): | |
P[(nj,nk)] = (P[(nj,nk)] + (P[(nj,ni)] * P[(ni,nk)])).simplify() | |
pathSequence = [] | |
for i, ni in enumerate(nodes): | |
for nj in nodes[i:]: | |
pathExpr = P[(ni,nj)] | |
if not isinstance(pathExpr, RegexOne) and not isinstance(pathExpr, RegexZero): | |
pathSequence.append((pathExpr, ni, nj)) | |
for i in range(len(nodes)-1,-1,-1): | |
for j in range(i): | |
pathExpr = P[(nodes[i],nodes[j])] | |
if not isinstance(pathExpr, RegexZero): | |
pathSequence.append((pathExpr, nodes[i], nodes[j])) | |
return pathSequence | |
# compute path expressions between nodes | |
# this uses path sequences from "eliminate" algorithm above | |
def computePathExpressions(nodes, edges): | |
path = dict() | |
pathSequence = eliminate(nodes, edges) | |
for root in nodes: | |
for node in nodes: | |
path[(root,node)] = RegexOne() if root == node else RegexZero() | |
for pathExpr, src, dst in pathSequence: | |
if src == dst: | |
path[(root,src)] = (path[(root,src)] * pathExpr).simplify() | |
else: | |
path[(root,dst)] = (path[(root,dst)] + (path[(root,src)] * pathExpr)).simplify() | |
return path | |
# map path expr in derived graph to path expr in control flow graph, | |
# using image of edges in derived graph | |
def pathImg(imageMap, pathExpr): | |
if isinstance(pathExpr, RegexAtom): | |
return imageMap[pathExpr.atom] | |
elif isinstance(pathExpr, RegexStar): | |
return RegexStar(pathImg(imageMap, pathExpr.op)) | |
elif isinstance(pathExpr, RegexPlus): | |
return RegexPlus(pathImg(imageMap, pathExpr.lhs), pathImg(imageMap, pathExpr.rhs)) | |
elif isinstance(pathExpr, RegexTimes): | |
return RegexTimes(pathImg(imageMap, pathExpr.lhs), pathImg(imageMap, pathExpr.rhs)) | |
elif isinstance(pathExpr, RegexOne) or isinstance(pathExpr, RegexZero): | |
return pathExpr | |
# postorder traversal of tree | |
def traversePostorder(root, tree): | |
nodes = [] | |
for child in tree[root]: | |
nodes.extend(traversePostorder(child, tree)) | |
nodes.append(root) | |
return nodes | |
# get tree edges for node | |
def tree(node, cfg, domtree): | |
edges = [] | |
for edge in cfg["edges"]: | |
if node == edge[1]: | |
if domtree.immediateDominator(edge[1]) == edge[0]: | |
edges.append(edge) | |
return edges | |
# get non-tree edges for node | |
def nontree(node, cfg, domtree): | |
edges = [] | |
for edge in cfg["edges"]: | |
if node == edge[1]: | |
if domtree.immediateDominator(edge[1]) != edge[0]: | |
edges.append(edge) | |
return edges | |
# Tarjan's "decompose and sequence" path expression algorithm | |
def tarjan(cfg, domtree): | |
global edgePaths | |
# returns derivedGraph | |
# edgeMap maps edges in CFG to edges in derived graph | |
derivedEdges, edgeMap = computeDerivedGraph(cfg, domtree) | |
path = dict() | |
dpath = dict((node, RegexZero()) for node in cfg["nodes"]) | |
dpathTree = dict((node, RegexZero()) for node in cfg["nodes"]) | |
# compute dpath in post-order traversal of domtree | |
postorderNodes = traversePostorder(cfg["entry"], domtree.tree) | |
for node in postorderNodes: | |
children = domtree.children(node) | |
imageMap = dict() | |
# compute dpathTree and image of edges in derived graph | |
for child in children: | |
dpathTree[child] = RegexZero() | |
for edge in tree(child, cfg, domtree): | |
dpathTree[child] = dpathTree[child] + RegexAtom(edge) | |
for edge in nontree(child, cfg, domtree): | |
siblingDom = edgeMap[edge][0] | |
imageMap[edgeMap[edge]] = edgePaths(edge, siblingDom, dpath, domtree) | |
# compute path expressions in derivedGraph between sibling nodes | |
derivedPath = computePathExpressions(children, derivedEdges) | |
# compute dpath | |
for child in children: | |
dpath[child] = RegexZero() | |
for sibling in children: | |
# compute image of path expr in derived graph | |
pathImage = pathImg(imageMap, derivedPath[(sibling,child)]) | |
siblingPaths = dpathTree[sibling] * pathImage | |
dpath[child] = (dpath[child] + siblingPaths).simplify() | |
# compute path in reverse post-order traversal of domtree | |
for node in reversed(postorderNodes): | |
# special case for computing path[entry] | |
if node == cfg["entry"]: | |
path[cfg["entry"]] = RegexZero() | |
for edge in nontree(cfg["entry"], cfg, domtree): | |
edgePaths = edgePaths(edge, cfg["entry"], dpath, domtree) | |
path[cfg["entry"]] = path[cfg["entry"]] + edgePaths | |
path[cfg["entry"]] = RegexStar(path[cfg["entry"]]).simplify() | |
# for non-entry nodes, compute path by appending dpath[node] to path[idom] | |
else: | |
idom = domtree.immediateDominator(node) | |
path[node] = (path[idom] * dpath[node]).simplify() | |
return path | |
cfg = { | |
"entry": 1, | |
"edges": [(1,2),(2,3),(3,2),(2,4)], | |
"nodes": [1,2,3,4] | |
} | |
domtree = DominatorTree({ | |
1: [2], | |
2: [3,4], | |
3: [], | |
4: [] | |
}) | |
path = tarjan(cfg, domtree) | |
for node in cfg["nodes"]: | |
print(f"path[{node}] = {path[node]}") | |
# expected output: | |
# path[1] = 1 | |
# path[2] = ((1, 2) * ((2, 3) * (3, 2))^*) | |
# path[3] = (((1, 2) * ((2, 3) * (3, 2))^*) * (2, 3)) | |
# path[4] = (((1, 2) * ((2, 3) * (3, 2))^*) * (2, 4)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment