Skip to content

Instantly share code, notes, and snippets.

Created June 8, 2014 01:57
Show Gist options
  • Select an option

  • Save anonymous/5a8bf6fb97399042d464 to your computer and use it in GitHub Desktop.

Select an option

Save anonymous/5a8bf6fb97399042d464 to your computer and use it in GitHub Desktop.
//
// Trie.swift
// SceneTest
//
// Created by Greg Titus on 6/7/14.
// Copyright (c) 2014 The Omni Group. All rights reserved.
//
// This is essentially the same data structure as OFTrie in OmniFoundation, except that the OmniFoundation version is mutable and ephemeral,
// and this version is immutable and persistent. Every time you add a string to the trie, it replaces all the nodes along the string's path
// to the root, and the only mutable change is reassigning the root. Old copies of the trie are unaffected.
// 100 or so lines of code here, 600 or so in 3 classes (6 .h or .m files) in the Objective-C version.
// Speed improvements at the expense of readability:
// (1) Taking successively smaller slices with 'tail' turns out to be slow, so pass around an original string and a starting index instead
// (2) Dictionaries much slower than fixed length arrays, so add a new .LetterBranch variant that has 'a'..'z' subnode slots.
// If you insert a string with non-lowercase-ascii characters, nodes convert themselves to the slower dictionary-based .Branch
// Adding all of /usr/share/dict/words is about 16s on my system, compared to about 1.5s in Obj-C, still.
// some conveniences, to treat the Sliceable UnicodeScalarView of a String as if it's a sort of cons'd list type...
extension String.UnicodeScalarView {
var isEmpty : Bool { return self.startIndex == self.endIndex }
var head : UnicodeScalar { return self[self.startIndex] }
var tail : String.UnicodeScalarView { return self[self.startIndex.succ() .. self.endIndex] }
func substringFrom(i : String.UnicodeScalarView.IndexType) -> String.UnicodeScalarView {
return self[i .. self.endIndex]
}
func isAtEnd(i : String.UnicodeScalarView.IndexType) -> Bool {
return i == self.endIndex
}
}
let asciiLowercaseA : Int = 97
extension UnicodeScalar {
var lowercaseLetterIndex : Int {
let index = Int(self.value)
return index - asciiLowercaseA
}
var isLowercaseLetter : Bool {
let index = self.lowercaseLetterIndex
return index >= 0 && index < 26
}
}
func unicodeScalarFromLetterIndex(index : Int) -> UnicodeScalar {
let val = UInt32(index + asciiLowercaseA)
return UnicodeScalar(val)
}
struct TrieNode {
typealias Node = TrieNode
enum TrieChildren {
// either a single leaf value, with a trailing suffix
case Leaf(String.UnicodeScalarView)
// common case of possible a-z leaves
case LetterBranch(ContiguousArray<Node?>)
// or a full branch, mapping any unichar to child node
case Branch(Dictionary<UnicodeScalar, Node>)
}
var value : AnyObject?
var children : TrieChildren
init(_ val: AnyObject?, prefix: UnicodeScalar, subnode: Node) {
value = val
if (prefix.isLowercaseLetter) {
var array = ContiguousArray<Node?>(count: 26, repeatedValue: nil)
var index = prefix.lowercaseLetterIndex
array[index] = subnode
children = .LetterBranch(array)
} else {
children = .Branch([prefix: subnode])
}
}
init(_ val: AnyObject?, suffix: String.UnicodeScalarView) {
value = val
children = .Leaf(suffix)
}
init(value: AnyObject?, children: TrieChildren) {
self.value = value
self.children = children
}
func nodeByInsertingValue(newValue: AnyObject, forChars: String.UnicodeScalarView, fromIndex: String.UnicodeScalarView.IndexType) -> Node {
switch children {
case .Leaf(let suffix) where suffix == forChars.substringFrom(fromIndex):
return Node(newValue, suffix: suffix)
case .Leaf(let suffix) where forChars.isAtEnd(fromIndex):
let oldHead = suffix.head
let oldLeaf = Node(value, suffix: suffix.tail)
return Node(newValue, prefix: oldHead, subnode: oldLeaf)
case .Leaf(let suffix) where suffix.isEmpty:
let newHead = forChars[fromIndex]
let newLeaf = Node(newValue, suffix: forChars.substringFrom(fromIndex.succ()))
return Node(value, prefix: newHead, subnode: newLeaf)
case .Leaf(let suffix):
let oldHead = suffix.head
let newHead = forChars[fromIndex]
let oldLeaf = Node(value, suffix: suffix.tail)
if (oldHead == newHead) {
let nextIndex = fromIndex.succ()
let newChild = oldLeaf.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: nextIndex)
return Node(nil, prefix: newHead, subnode: newChild)
} else {
let partial = Node(nil, prefix: oldHead, subnode:oldLeaf)
return partial.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: fromIndex)
}
case .LetterBranch(_) where forChars.isAtEnd(fromIndex):
return Node(value: newValue, children: children)
case .LetterBranch(var array) where forChars[fromIndex].isLowercaseLetter:
let letterIndex = forChars[fromIndex].lowercaseLetterIndex
let nextIndex = fromIndex.succ()
if let child = array[letterIndex] {
array[letterIndex] = child.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: nextIndex)
} else {
let newSuffix = forChars.substringFrom(nextIndex)
array[letterIndex] = Node(newValue, suffix: newSuffix)
}
return Node(value: value, children: .LetterBranch(array))
case .LetterBranch(let array):
let newSuffix = forChars.substringFrom(fromIndex.succ())
let newLeaf = Node(newValue, suffix: newSuffix)
var newSubnodes = [forChars[fromIndex]: newLeaf]
for i in 0..26 {
if let child = array[i] {
newSubnodes[unicodeScalarFromLetterIndex(i)] = child
}
}
return Node(value: value, children: .Branch(newSubnodes))
case .Branch(_) where forChars.isAtEnd(fromIndex):
return Node(value: newValue, children: children)
case .Branch(let subnodes):
var newChild : Node
let charHead = forChars[fromIndex]
let nextIndex = fromIndex.succ()
if let child = subnodes[charHead] {
newChild = child.nodeByInsertingValue(newValue, forChars: forChars, fromIndex: nextIndex)
} else {
let newSuffix = forChars.substringFrom(nextIndex)
newChild = Node(newValue, suffix: newSuffix)
}
var newSubnodes = subnodes
newSubnodes[charHead] = newChild
return Node(value: value, children: .Branch(newSubnodes))
}
}
func lookup (str : String.UnicodeScalarView, fromIndex: String.UnicodeScalarView.IndexType) -> AnyObject? {
if (str.endIndex == fromIndex) {
return value
}
switch children {
case .Leaf(let suffix):
return suffix == str[fromIndex .. str.endIndex] ? value : nil
case .LetterBranch(let array):
let scalar = str[fromIndex]
if (scalar.isLowercaseLetter) {
return array[scalar.lowercaseLetterIndex]?.lookup(str, fromIndex: fromIndex.succ())
} else {
return nil
}
case .Branch(let subnodes):
return subnodes[str[fromIndex]]?.lookup(str, fromIndex: fromIndex.succ())
}
}
}
class Trie {
var root : TrieNode? = nil
func insert(value: String, forString: String) {
if root {
let scalars = forString.unicodeScalars
root = root!.nodeByInsertingValue(value, forChars: scalars, fromIndex: scalars.startIndex)
} else {
root = TrieNode(value, suffix: forString.unicodeScalars)
}
}
func lookup(str : String) -> AnyObject? {
let scalars = str.unicodeScalars
return root?.lookup(scalars, fromIndex: scalars.startIndex)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment