Skip to content

Instantly share code, notes, and snippets.

@KeenS
Last active December 31, 2016 11:33
Show Gist options
  • Save KeenS/0dee7ad26b40d447c07d158d2966841d to your computer and use it in GitHub Desktop.
Save KeenS/0dee7ad26b40d447c07d158d2966841d to your computer and use it in GitHub Desktop.
$ rustup run nightly rustc -O --test suffix_array.rs
$ ./suffix_array.rs --bench
running 8 tests
test tests::test_make_suffix_array_gfx ... ignored
test tests::test_make_suffix_array_keen ... ignored
test tests::test_make_suffix_array_keen2 ... ignored
test tests::test_make_suffix_array_keen_unsafe ... ignored
test tests::bench_make_suffix_array_gfx ... bench: 481,257 ns/iter (+/- 13,803)
test tests::bench_make_suffix_array_keen ... bench: 165,853 ns/iter (+/- 15,270)
test tests::bench_make_suffix_array_keen2 ... bench: 95,576 ns/iter (+/- 3,215)
test tests::bench_make_suffix_array_keen_unsafe ... bench: 88,353 ns/iter (+/- 3,211)
test result: ok. 0 passed; 0 failed; 4 ignored; 4 measured
#![feature(test)]
extern crate test;
// gfxさんの実装
pub fn make_suffix_array(s: &str) -> Vec<i64> {
use std::collections::HashMap;
let mut map = HashMap::new();
for i in 0..s.len() {
map.insert(&s[i..s.len()], i as i64);
}
use std::iter::FromIterator;
let mut suffixes = Vec::from_iter(map.keys());
suffixes.sort();
return suffixes.iter().map(|suffix| *map.get(*suffix).unwrap()).collect();
}
// HashMapをBtreeに置き換えた実装。
// BTreeは自動でキーをソートしてくれるので中間のVecを使うコストが減る。
// Btreeのアロケーションコストはまだある。
pub fn make_suffix_array_btree(s: &str) -> Vec<u64> {
use std::collections::BTreeMap;
let mut map = BTreeMap::new();
for i in 0..s.len() {
map.insert(&s[i..s.len()], i as u64);
}
return map.iter().map(|(_, i)| *i).collect();
}
// どうせsortが律速だしそもそもKVみたいに思い構造使う必要なくねってやつ。
// Suffix Arrayだったらキー被りもないし。
// 毎回Vecをアロケーションしている。
pub fn make_suffix_array_vec(s: &str) -> Vec<u64> {
let mut suffixes = Vec::with_capacity(s.len());
for i in 0..s.len() {
suffixes.push((&s[i..s.len()], i as u64));
}
suffixes.sort_by(|&(ref s1, _), &(ref s2, _)| s1.cmp(s2));
suffixes.into_iter().map(|(_, i)| i).collect()
}
// 完全にアロケーションコストを消したけど危ない実装。
// 同じVecを使い回してアロケーションを減らすというもの。
// ライフタイム制約のせいで普通は便利に使い回せないので
// transmuteを使って強制変換してから使っている。
// 必ずself.0.clear()してから使わないとdangling pointerが残っている(筈)なので本当に危険。
struct MakeSuffix(Vec<(&'static str, u64)>);
impl MakeSuffix {
pub fn new() -> Self {
MakeSuffix(Vec::new())
}
pub fn make_suffix_array<'b>(&mut self, s: &'b str) -> Vec<u64> {
use std::mem::transmute;
self.0.clear();
unsafe {
let suffixes = transmute::<_, &'b mut Vec<(&'b str, u64)>>(&mut self.0);
for i in 0..s.len() {
suffixes.push((&s[i..s.len()], i as u64));
}
suffixes.sort_by(|&(ref s1, _), &(ref s2, _)| s1.cmp(s2));
suffixes.into_iter().map(|&mut (_, i)| i).collect()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use test::Bencher;
#[test]
fn test_make_suffix_array_keen_unsafe() {
let mut ms = MakeSuffix::new();
let a = ms.make_suffix_array("banana");
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]);
}
#[bench]
fn bench_make_suffix_array_keen_unsafe(b: &mut Bencher) {
let mut ms = MakeSuffix::new();
b.iter(|| {
for _ in 0..test::black_box(1000) {
let _ = ms.make_suffix_array("banana");
}
})
}
#[test]
fn test_make_suffix_array_keen() {
let a = make_suffix_array_btree("banana");
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]);
}
#[bench]
fn bench_make_suffix_array_keen(b: &mut Bencher) {
b.iter(|| {
for _ in 0..test::black_box(1000) {
let _ = make_suffix_array_btree("banana");
}
})
}
#[test]
fn test_make_suffix_array_keen2() {
let a = make_suffix_array_vec("banana");
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]);
}
#[bench]
fn bench_make_suffix_array_keen2(b: &mut Bencher) {
b.iter(|| {
for _ in 0..test::black_box(1000) {
let _ = make_suffix_array_vec("banana");
}
})
}
#[test]
fn test_make_suffix_array_gfx() {
let a = make_suffix_array("banana");
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]);
}
#[bench]
fn bench_make_suffix_array_gfx(b: &mut Bencher) {
b.iter(|| {
for _ in 0..test::black_box(1000) {
let _ = make_suffix_array("banana");
}
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment