Created
June 9, 2014 21:17
-
-
Save anonymous/cd24f98fad9ad352a889 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Trie.swift | |
// SceneTest | |
// | |
// Created by Greg Titus on 6/7/14. | |
// Copyright (c) 2014 The Omni Group. All rights reserved. | |
// | |
// This is essentially the same data structure as OFTrie in OmniFoundation, except that the OmniFoundation version is mutable and ephemeral, | |
// and this version is immutable and persistent. Every time you add a string to the trie, it replaces all the nodes along the string's path | |
// to the root, and the only mutable change is reassigning the root. Old copies of the trie are unaffected. | |
// 100 or so lines of code here, 600 or so in 3 classes (6 .h or .m files) in the Objective-C version. | |
// Speed improvements at some slight expense of readability: | |
// (1) Taking successively smaller slices of UnicodeScalarView turns out to be slow, so wrap it and a start index in TrieUnicharBuffer. | |
// (2) Dictionaries much slower than fixed length arrays, so add a new .LetterBranch variant that has 'a'..'z' subnode slots. | |
// If you insert a string with non-lowercase-ascii characters, nodes convert themselves to the slower dictionary-based .Branch | |
// Adding all of /usr/share/dict/words is about 3.1s on my system, compared to about 1.5s in Obj-C, still. | |
let asciiLowercaseA : Int = 97 | |
extension UnicodeScalar { | |
var lowercaseLetterIndex : Int { | |
let index = Int(self.value) | |
return index - asciiLowercaseA | |
} | |
var isLowercaseLetter : Bool { | |
let index = self.lowercaseLetterIndex | |
return index >= 0 && index < 26 | |
} | |
} | |
struct TrieUnicharBuffer : Equatable { | |
var index: String.UnicodeScalarView.IndexType | |
var buffer: String.UnicodeScalarView | |
init(_ s: String) { | |
buffer = s.unicodeScalars | |
index = buffer.startIndex | |
} | |
init(index: String.UnicodeScalarView.IndexType, buffer: String.UnicodeScalarView) { | |
self.index = index | |
self.buffer = buffer | |
} | |
var isEmpty : Bool { return index == buffer.endIndex } | |
var head : UnicodeScalar { return buffer[index] } | |
var tail : TrieUnicharBuffer { return TrieUnicharBuffer(index: index.succ(), buffer: buffer) } | |
var emptySuffix : TrieUnicharBuffer { | |
return TrieUnicharBuffer(index: buffer.endIndex, buffer: buffer) | |
} | |
var slice : String.UnicodeScalarView { return buffer[index .. buffer.endIndex] } | |
func isPrefix(other : TrieUnicharBuffer) -> Bool { | |
var a = self | |
var b = other | |
while (!a.isEmpty && !b.isEmpty) { | |
if (a.head != b.head) { | |
return false | |
} | |
a = a.tail | |
b = b.tail | |
} | |
return b.isEmpty | |
} | |
} | |
func ==(lhs: TrieUnicharBuffer, rhs: TrieUnicharBuffer) -> Bool { | |
return lhs.slice == rhs.slice | |
} | |
struct TrieNode { | |
typealias Node = TrieNode | |
enum TrieChildren { | |
// either a single leaf value, with a trailing suffix | |
case Leaf(TrieUnicharBuffer) | |
// common case of possible a-z leaves | |
case LetterBranch(ContiguousArray<Node?>) | |
// or a full branch, mapping any unichar to child node | |
case Branch(Dictionary<UnicodeScalar, Node>) | |
} | |
var value : AnyObject? | |
var children : TrieChildren | |
init(_ val: AnyObject?, prefix: UnicodeScalar, subnode: Node) { | |
value = val | |
if (prefix.isLowercaseLetter) { | |
var array = ContiguousArray<Node?>(count: 26, repeatedValue: nil) | |
var index = prefix.lowercaseLetterIndex | |
array[index] = subnode | |
children = .LetterBranch(array) | |
} else { | |
children = .Branch([prefix: subnode]) | |
} | |
} | |
init(_ val: AnyObject?, suffix: TrieUnicharBuffer) { | |
value = val | |
children = .Leaf(suffix) | |
} | |
init(value: AnyObject?, children: TrieChildren) { | |
self.value = value | |
self.children = children | |
} | |
func nodeByInsertingValue(newValue: AnyObject, forBuffer buffer: TrieUnicharBuffer) -> Node { | |
switch children { | |
case .Leaf(let suffix) where suffix == buffer: | |
return Node(newValue, suffix: suffix) | |
case .Leaf(let suffix) where buffer.isEmpty: | |
let oldHead = suffix.head | |
let oldLeaf = Node(value, suffix: suffix.tail) | |
return Node(newValue, prefix: oldHead, subnode: oldLeaf) | |
case .Leaf(let suffix) where suffix.isEmpty: | |
let newHead = buffer.head | |
let newLeaf = Node(newValue, suffix: buffer.tail) | |
return Node(value, prefix: newHead, subnode: newLeaf) | |
case .Leaf(let suffix): | |
let oldHead = suffix.head | |
let newHead = buffer.head | |
let oldLeaf = Node(value, suffix: suffix.tail) | |
if (oldHead == newHead) { | |
let newChild = oldLeaf.nodeByInsertingValue(newValue, forBuffer: buffer.tail) | |
return Node(nil, prefix: newHead, subnode: newChild) | |
} else { | |
let partial = Node(nil, prefix: oldHead, subnode:oldLeaf) | |
return partial.nodeByInsertingValue(newValue, forBuffer: buffer) | |
} | |
case .LetterBranch(_) where buffer.isEmpty: | |
return Node(value: newValue, children: children) | |
case .LetterBranch(var array) where buffer.head.isLowercaseLetter: | |
let letterIndex = buffer.head.lowercaseLetterIndex | |
if let child = array[letterIndex] { | |
array[letterIndex] = child.nodeByInsertingValue(newValue, forBuffer: buffer.tail) | |
} else { | |
array[letterIndex] = Node(newValue, suffix: buffer.tail) | |
} | |
return Node(value: value, children: .LetterBranch(array)) | |
case .LetterBranch(let array): | |
let newLeaf = Node(newValue, suffix: buffer.tail) | |
var newSubnodes = [buffer.head: newLeaf] | |
for i in 0..26 { | |
if let child = array[i] { | |
newSubnodes[unicodeScalarFromLetterIndex(i)] = child | |
} | |
} | |
return Node(value: value, children: .Branch(newSubnodes)) | |
case .Branch(_) where buffer.isEmpty: | |
return Node(value: newValue, children: children) | |
case .Branch(let subnodes): | |
var newChild : Node | |
let charHead = buffer.head | |
if let child = subnodes[charHead] { | |
newChild = child.nodeByInsertingValue(newValue, forBuffer: buffer.tail) | |
} else { | |
newChild = Node(newValue, suffix: buffer.tail) | |
} | |
var newSubnodes = subnodes | |
newSubnodes[charHead] = newChild | |
return Node(value: value, children: .Branch(newSubnodes)) | |
} | |
} | |
func subnodeForPrefix(buffer : TrieUnicharBuffer, exactMatch: Bool) -> TrieNode? { | |
switch children { | |
case .Leaf(let suffix) where exactMatch: | |
return suffix == buffer ? self : nil | |
case .Leaf(let suffix): | |
return suffix.isPrefix(buffer) ? self : nil | |
case .LetterBranch(let array): | |
if (buffer.isEmpty) { | |
return self | |
} | |
let scalar = buffer.head | |
if (scalar.isLowercaseLetter) { | |
return array[scalar.lowercaseLetterIndex]?.subnodeForPrefix(buffer.tail, exactMatch: exactMatch) | |
} else { | |
return nil | |
} | |
case .Branch(let subnodes): | |
if (buffer.isEmpty) { | |
return self | |
} | |
return subnodes[buffer.head]?.subnodeForPrefix(buffer.tail, exactMatch: exactMatch) | |
} | |
} | |
} | |
class Trie { | |
var root : TrieNode? = nil | |
func insert(value: String, forString: String) { | |
let buffer = TrieUnicharBuffer(forString) | |
if root { | |
root = root!.nodeByInsertingValue(value, forBuffer: buffer) | |
} else { | |
root = TrieNode(value, suffix: buffer) | |
} | |
} | |
func lookup(str : String) -> AnyObject? { | |
let buffer = TrieUnicharBuffer(str) | |
return root?.subnodeForPrefix(buffer, exactMatch: true)?.value | |
} | |
} | |
// Make a TrieNode generate all child nodes, by just making an array of them and returning the array generator. | |
extension TrieNode : Sequence { | |
typealias GeneratorType = Array<TrieNode>.GeneratorType | |
func generate() -> GeneratorType { | |
var nodes: TrieNode[] = [] | |
switch children { | |
case .LetterBranch(let array): | |
for o in array { | |
if let n = o { | |
nodes.append(n) | |
} | |
} | |
case .Branch(let subnodes): | |
nodes.extend(subnodes.values) | |
default: | |
break | |
} | |
return nodes.generate() | |
} | |
} | |
// Traverse the tree of nodes, by keeping a stack of generators for the children of the current node at each level | |
struct TrieGenerator : Generator { | |
typealias Element = AnyObject | |
var generatorStack : TrieNode.GeneratorType[] = [] | |
var currentNode : TrieNode? | |
init(_ n : TrieNode?) { | |
currentNode = n | |
} | |
mutating func next() -> Element? { | |
if let n = currentNode { | |
generatorStack.append(n.generate()) | |
currentNode = nil | |
if let v : AnyObject = n.value { | |
return v | |
} | |
} | |
if (generatorStack.count == 0) { | |
return nil | |
} | |
var g = generatorStack.removeLast() | |
currentNode = g.next() | |
if (currentNode) { | |
generatorStack.append(g) | |
} | |
return next() | |
} | |
} | |
extension Trie : Sequence { | |
typealias GeneratorType = TrieGenerator | |
func generate() -> GeneratorType { | |
return TrieGenerator(root) | |
} | |
func subsequenceForPrefix(str : String) -> SequenceOf<AnyObject> { | |
let buffer = TrieUnicharBuffer(str) | |
let subnode = root?.subnodeForPrefix(buffer, exactMatch: false) | |
let g = TrieGenerator(subnode) | |
return SequenceOf<AnyObject>({g}) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment