Last active
December 24, 2015 12:19
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Suffix tree built with simple O(m^2)-time algorithm.\n", | |
"class SuffixTree(object):\n", | |
" \n", | |
" class Node(object):\n", | |
" def __init__(self, depth, off, ln, lab=None):\n", | |
" self.depth = depth\n", | |
" self.off = off # offset into T of substring\n", | |
" self.ln = ln # length of substring\n", | |
" self.out = {} # outgoing edges; maps characters to nodes\n", | |
" \n", | |
" def __init__(self, t):\n", | |
" \"\"\" Make suffix tree, without suffix links, from s in quadratic time\n", | |
" and linear space \"\"\"\n", | |
" t += '$'\n", | |
" self.t = t\n", | |
" self.root = self.Node(0, 0, 0, None)\n", | |
" self.root.out[t[0]] = self.Node(len(t), 0, len(t), t)\n", | |
" self.nodes = []\n", | |
" for i in xrange(1, len(t)):\n", | |
" cur = self.root\n", | |
" j = i\n", | |
" while j < len(t):\n", | |
" if t[j] in cur.out:\n", | |
" child = cur.out[t[j]]\n", | |
" lab = t[child.off:child.off+child.ln]\n", | |
" k = j+1 # Walk along edge\n", | |
" while k-j < len(lab) and t[k] == lab[k-j]:\n", | |
" k += 1\n", | |
" if k-j == len(lab):\n", | |
" cur = child # exhausted the edge\n", | |
" j = k\n", | |
" else:\n", | |
" # fell off in middle of edge\n", | |
" cExist, cNew = lab[k-j], t[k]\n", | |
" mid = self.Node(cur.depth + k-j, child.off, k-j, lab[:k-j])\n", | |
" mid.out[cNew] = self.Node(mid.depth + len(t[k:]), k, len(t[k:]), t[k:])\n", | |
" self.nodes.append(mid)\n", | |
" self.nodes.append(mid.out[cNew])\n", | |
" mid.out[cExist] = child\n", | |
" child.off += (k-j)\n", | |
" child.ln -= (k-j)\n", | |
" cur.out[t[j]] = mid\n", | |
" else:\n", | |
" # Create a new edge hanging off of this node\n", | |
" cur.out[t[j]] = self.Node(cur.depth + len(t[j:]), j, len(t[j:]), t[j:])\n", | |
" self.nodes.append(cur.out[t[j]])\n", | |
" \n", | |
" def saLcp(self):\n", | |
" # Return suffix array and an LCP1 array corresponding to this\n", | |
" # suffix tree. self.root is root, self.t is the text.\n", | |
" self.minSinceLeaf = 0\n", | |
" sa, lcp1 = [], []\n", | |
" def __visit(n):\n", | |
" if len(n.out) == 0:\n", | |
" # leaf node, record offset and LCP1 with previous leaf\n", | |
" sa.append(len(self.t) - n.depth)\n", | |
" lcp1.append(self.minSinceLeaf)\n", | |
" # reset LCP1 to depth of this leaf\n", | |
" self.minSinceLeaf = n.depth\n", | |
" # visit children in lexicographical order\n", | |
" for c, child in sorted(n.out.iteritems()):\n", | |
" __visit(child)\n", | |
" # after each child visit, perhaps decrease\n", | |
" # minimum-depth-since-last-leaf value\n", | |
" self.minSinceLeaf = min(self.minSinceLeaf, n.depth)\n", | |
" __visit(self.root)\n", | |
" return sa, lcp1[1:]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"st = SuffixTree('abaaba')\n", | |
"sa, lcp1 = st.saLcp()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"sa, lcp1" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"([6, 5, 2, 3, 0, 4, 1], [0, 1, 1, 3, 0, 2])" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment