Created
June 8, 2014 01:57
-
-
Save anonymous/5a8bf6fb97399042d464 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // Trie.swift | |
| // SceneTest | |
| // | |
| // Created by Greg Titus on 6/7/14. | |
| // Copyright (c) 2014 The Omni Group. All rights reserved. | |
| // | |
| // This is essentially the same data structure as OFTrie in OmniFoundation, except that the OmniFoundation version is mutable and ephemeral, | |
| // and this version is immutable and persistent. Every time you add a string to the trie, it replaces all the nodes along the string's path | |
| // to the root, and the only mutable change is reassigning the root. Old copies of the trie are unaffected. | |
| // 100 or so lines of code here, 600 or so in 3 classes (6 .h or .m files) in the Objective-C version. | |
| // Speed improvements at the expense of readability: | |
| // (1) Taking successively smaller slices with 'tail' turns out to be slow, so pass around an original string and a starting index instead | |
| // (2) Dictionaries much slower than fixed length arrays, so add a new .LetterBranch variant that has 'a'..'z' subnode slots. | |
| // If you insert a string with non-lowercase-ascii characters, nodes convert themselves to the slower dictionary-based .Branch | |
| // Adding all of /usr/share/dict/words is about 16s on my system, compared to about 1.5s in Obj-C, still. | |
| // some conveniences, to treat the Sliceable UnicodeScalarView of a String as if it's a sort of cons'd list type... | |
| extension String.UnicodeScalarView { | |
| var isEmpty : Bool { return self.startIndex == self.endIndex } | |
| var head : UnicodeScalar { return self[self.startIndex] } | |
| var tail : String.UnicodeScalarView { return self[self.startIndex.succ() .. self.endIndex] } | |
| func substringFrom(i : String.UnicodeScalarView.IndexType) -> String.UnicodeScalarView { | |
| return self[i .. self.endIndex] | |
| } | |
| func isAtEnd(i : String.UnicodeScalarView.IndexType) -> Bool { | |
| return i == self.endIndex | |
| } | |
| } | |
| let asciiLowercaseA : Int = 97 | |
| extension UnicodeScalar { | |
| var lowercaseLetterIndex : Int { | |
| let index = Int(self.value) | |
| return index - asciiLowercaseA | |
| } | |
| var isLowercaseLetter : Bool { | |
| let index = self.lowercaseLetterIndex | |
| return index >= 0 && index < 26 | |
| } | |
| } | |
| func unicodeScalarFromLetterIndex(index : Int) -> UnicodeScalar { | |
| let val = UInt32(index + asciiLowercaseA) | |
| return UnicodeScalar(val) | |
| } | |
| struct TrieNode { | |
| typealias Node = TrieNode | |
| enum TrieChildren { | |
| // either a single leaf value, with a trailing suffix | |
| case Leaf(String.UnicodeScalarView) | |
| // common case of possible a-z leaves | |
| case LetterBranch(ContiguousArray<Node?>) | |
| // or a full branch, mapping any unichar to child node | |
| case Branch(Dictionary<UnicodeScalar, Node>) | |
| } | |
| var value : AnyObject? | |
| var children : TrieChildren | |
| init(_ val: AnyObject?, prefix: UnicodeScalar, subnode: Node) { | |
| value = val | |
| if (prefix.isLowercaseLetter) { | |
| var array = ContiguousArray<Node?>(count: 26, repeatedValue: nil) | |
| var index = prefix.lowercaseLetterIndex | |
| array[index] = subnode | |
| children = .LetterBranch(array) | |
| } else { | |
| children = .Branch([prefix: subnode]) | |
| } | |
| } | |
| init(_ val: AnyObject?, suffix: String.UnicodeScalarView) { | |
| value = val | |
| children = .Leaf(suffix) | |
| } | |
| init(value: AnyObject?, children: TrieChildren) { | |
| self.value = value | |
| self.children = children | |
| } | |
| func nodeByInsertingValue(newValue: AnyObject, forChars: String.UnicodeScalarView, fromIndex: String.UnicodeScalarView.IndexType) -> Node { | |
| switch children { | |
| case .Leaf(let suffix) where suffix == forChars.substringFrom(fromIndex): | |
| return Node(newValue, suffix: suffix) | |
| case .Leaf(let suffix) where forChars.isAtEnd(fromIndex): | |
| let oldHead = suffix.head | |
| let oldLeaf = Node(value, suffix: suffix.tail) | |
| return Node(newValue, prefix: oldHead, subnode: oldLeaf) | |
| case .Leaf(let suffix) where suffix.isEmpty: | |
| let newHead = forChars[fromIndex] | |
| let newLeaf = Node(newValue, suffix: forChars.substringFrom(fromIndex.succ())) | |
| return Node(value, prefix: newHead, subnode: newLeaf) | |
| case .Leaf(let suffix): | |
| let oldHead = suffix.head | |
| let newHead = forChars[fromIndex] | |
| let oldLeaf = Node(value, suffix: suffix.tail) | |
| if (oldHead == newHead) { | |
| let nextIndex = fromIndex.succ() | |
| let newChild = oldLeaf.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: nextIndex) | |
| return Node(nil, prefix: newHead, subnode: newChild) | |
| } else { | |
| let partial = Node(nil, prefix: oldHead, subnode:oldLeaf) | |
| return partial.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: fromIndex) | |
| } | |
| case .LetterBranch(_) where forChars.isAtEnd(fromIndex): | |
| return Node(value: newValue, children: children) | |
| case .LetterBranch(var array) where forChars[fromIndex].isLowercaseLetter: | |
| let letterIndex = forChars[fromIndex].lowercaseLetterIndex | |
| let nextIndex = fromIndex.succ() | |
| if let child = array[letterIndex] { | |
| array[letterIndex] = child.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: nextIndex) | |
| } else { | |
| let newSuffix = forChars.substringFrom(nextIndex) | |
| array[letterIndex] = Node(newValue, suffix: newSuffix) | |
| } | |
| return Node(value: value, children: .LetterBranch(array)) | |
| case .LetterBranch(let array): | |
| let newSuffix = forChars.substringFrom(fromIndex.succ()) | |
| let newLeaf = Node(newValue, suffix: newSuffix) | |
| var newSubnodes = [forChars[fromIndex]: newLeaf] | |
| for i in 0..26 { | |
| if let child = array[i] { | |
| newSubnodes[unicodeScalarFromLetterIndex(i)] = child | |
| } | |
| } | |
| return Node(value: value, children: .Branch(newSubnodes)) | |
| case .Branch(_) where forChars.isAtEnd(fromIndex): | |
| return Node(value: newValue, children: children) | |
| case .Branch(let subnodes): | |
| var newChild : Node | |
| let charHead = forChars[fromIndex] | |
| let nextIndex = fromIndex.succ() | |
| if let child = subnodes[charHead] { | |
| newChild = child.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: nextIndex) | |
| } else { | |
| let newSuffix = forChars.substringFrom(nextIndex) | |
| newChild = Node(newValue, suffix: newSuffix) | |
| } | |
| var newSubnodes = subnodes | |
| newSubnodes[charHead] = newChild | |
| return Node(value: value, children: .Branch(newSubnodes)) | |
| } | |
| } | |
| func lookup (str : String.UnicodeScalarView, fromIndex: String.UnicodeScalarView.IndexType) -> AnyObject? { | |
| if (str.endIndex == fromIndex) { | |
| return value | |
| } | |
| switch children { | |
| case .Leaf(let suffix): | |
| return suffix == str[fromIndex .. str.endIndex] ? value : nil | |
| case .LetterBranch(let array): | |
| let scalar = str[fromIndex] | |
| if (scalar.isLowercaseLetter) { | |
| return array[scalar.lowercaseLetterIndex]?.lookup(str, fromIndex: fromIndex.succ()) | |
| } else { | |
| return nil | |
| } | |
| case .Branch(let subnodes): | |
| return subnodes[str[fromIndex]]?.lookup(str, fromIndex: fromIndex.succ()) | |
| } | |
| } | |
| } | |
| class Trie { | |
| var root : TrieNode? = nil | |
| func insert(value: String, forString: String) { | |
| if root { | |
| let scalars = forString.unicodeScalars | |
| root = root!.nodeByInsertingValue(value, forChars: scalars, fromIndex: scalars.startIndex) | |
| } else { | |
| root = TrieNode(value, suffix: forString.unicodeScalars) | |
| } | |
| } | |
| func lookup(str : String) -> AnyObject? { | |
| let scalars = str.unicodeScalars | |
| return root?.lookup(scalars, fromIndex: scalars.startIndex) | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment