Created
April 2, 2016 23:22
-
-
Save kaja47/66a6f6e36ad88e04c0b14904a0cd2f23 to your computer and use it in GitHub Desktop.
quick and dirty sketch of burstsort
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Arrays | |
// BurstSort | |
// Cache-Conscious Sorting of Large Sets of Strings with Dynamic Tries | |
// http://goanna.cs.rmit.edu.au/~jz/fulltext/alenex03.pdf | |
class BurstLeaf(initSize: Int) { | |
var size: Int = 0 | |
var values: Array[String] = new Array[String](initSize) | |
def add(str: String) = { | |
values(size) = str | |
size += 1 | |
} | |
def resize(factor: Int) = { | |
values = Arrays.copyOfRange(values, 0, values.length * factor) | |
} | |
override def toString = values.filter(_ != null).mkString("BurstLeaf(", ",", ")") | |
} | |
class BurstTrie { | |
val initSize = 16 | |
val resizeFactor = 8 | |
val maxSize = 1024 | |
var size = 0 | |
val root: Array[AnyRef] = new Array[AnyRef](257) | |
def leaves = { | |
def leavesOf(node: Array[AnyRef]): Seq[BurstLeaf] = | |
node.flatMap { child => | |
child match { | |
case null => Seq() | |
case leaf: BurstLeaf => Seq(leaf) | |
case node: Array[AnyRef] => leavesOf(node) | |
} | |
} | |
leavesOf(root) | |
} | |
def sorted = { | |
var pos = 0 | |
val res = new Array[String](size) | |
def run(node: Array[AnyRef]): Unit = { | |
doNode(node(256)) | |
for (i <- 0 until 256) { | |
doNode(node(i)) | |
} | |
} | |
def doNode(n: AnyRef) = n match { | |
case null => | |
case leaf: BurstLeaf => | |
Arrays.sort(leaf.values.asInstanceOf[Array[Object]], 0, leaf.size) | |
System.arraycopy(leaf.values, 0, res, pos, leaf.size) | |
pos += leaf.size | |
case node: Array[AnyRef] => run(node) | |
} | |
run(root) | |
res | |
} | |
def ++= (strs: Seq[String]): this.type = { | |
for (str <- strs) this += str | |
this | |
} | |
def += (str: String): this.type = { | |
def add(str: String, pos: Int, node: Array[AnyRef]): Unit = { | |
val char = if (str.length > pos) str.charAt(pos) else 256 | |
node(char) match { | |
case null => | |
val leaf = new BurstLeaf(initSize) | |
leaf.add(str) | |
node(char) = leaf | |
case leaf: BurstLeaf => | |
// add string, possibly resize or burst | |
if (leaf.size == leaf.values.length) { | |
if (leaf.size * resizeFactor <= maxSize || char == 256) { // resize | |
leaf.resize(resizeFactor) | |
leaf.add(str) | |
} else { // burst | |
println("burst") | |
val newNode = new Array[AnyRef](257) | |
node(char) = newNode | |
for (i <- 0 until leaf.size) { | |
add(leaf.values(i), pos+1, newNode) | |
} | |
add(str, pos+1, newNode) | |
} | |
} else { | |
leaf.add(str) | |
} | |
case child: Array[AnyRef] => | |
add(str, pos+1, child) | |
} | |
} | |
add(str, 0, root) | |
size += 1 | |
this | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment