Last active
December 31, 2016 11:33
-
-
Save KeenS/0dee7ad26b40d447c07d158d2966841d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ rustup run nightly rustc -O --test suffix_array.rs | |
$ ./suffix_array.rs --bench | |
running 8 tests | |
test tests::test_make_suffix_array_gfx ... ignored | |
test tests::test_make_suffix_array_keen ... ignored | |
test tests::test_make_suffix_array_keen2 ... ignored | |
test tests::test_make_suffix_array_keen_unsafe ... ignored | |
test tests::bench_make_suffix_array_gfx ... bench: 481,257 ns/iter (+/- 13,803) | |
test tests::bench_make_suffix_array_keen ... bench: 165,853 ns/iter (+/- 15,270) | |
test tests::bench_make_suffix_array_keen2 ... bench: 95,576 ns/iter (+/- 3,215) | |
test tests::bench_make_suffix_array_keen_unsafe ... bench: 88,353 ns/iter (+/- 3,211) | |
test result: ok. 0 passed; 0 failed; 4 ignored; 4 measured | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test)] | |
extern crate test; | |
// gfxさんの実装 | |
pub fn make_suffix_array(s: &str) -> Vec<i64> { | |
use std::collections::HashMap; | |
let mut map = HashMap::new(); | |
for i in 0..s.len() { | |
map.insert(&s[i..s.len()], i as i64); | |
} | |
use std::iter::FromIterator; | |
let mut suffixes = Vec::from_iter(map.keys()); | |
suffixes.sort(); | |
return suffixes.iter().map(|suffix| *map.get(*suffix).unwrap()).collect(); | |
} | |
// HashMapをBtreeに置き換えた実装。 | |
// BTreeは自動でキーをソートしてくれるので中間のVecを使うコストが減る。 | |
// Btreeのアロケーションコストはまだある。 | |
pub fn make_suffix_array_btree(s: &str) -> Vec<u64> { | |
use std::collections::BTreeMap; | |
let mut map = BTreeMap::new(); | |
for i in 0..s.len() { | |
map.insert(&s[i..s.len()], i as u64); | |
} | |
return map.iter().map(|(_, i)| *i).collect(); | |
} | |
// どうせsortが律速だしそもそもKVみたいに思い構造使う必要なくねってやつ。 | |
// Suffix Arrayだったらキー被りもないし。 | |
// 毎回Vecをアロケーションしている。 | |
pub fn make_suffix_array_vec(s: &str) -> Vec<u64> { | |
let mut suffixes = Vec::with_capacity(s.len()); | |
for i in 0..s.len() { | |
suffixes.push((&s[i..s.len()], i as u64)); | |
} | |
suffixes.sort_by(|&(ref s1, _), &(ref s2, _)| s1.cmp(s2)); | |
suffixes.into_iter().map(|(_, i)| i).collect() | |
} | |
// 完全にアロケーションコストを消したけど危ない実装。 | |
// 同じVecを使い回してアロケーションを減らすというもの。 | |
// ライフタイム制約のせいで普通は便利に使い回せないので | |
// transmuteを使って強制変換してから使っている。 | |
// 必ずself.0.clear()してから使わないとdangling pointerが残っている(筈)なので本当に危険。 | |
struct MakeSuffix(Vec<(&'static str, u64)>); | |
impl MakeSuffix { | |
pub fn new() -> Self { | |
MakeSuffix(Vec::new()) | |
} | |
pub fn make_suffix_array<'b>(&mut self, s: &'b str) -> Vec<u64> { | |
use std::mem::transmute; | |
self.0.clear(); | |
unsafe { | |
let suffixes = transmute::<_, &'b mut Vec<(&'b str, u64)>>(&mut self.0); | |
for i in 0..s.len() { | |
suffixes.push((&s[i..s.len()], i as u64)); | |
} | |
suffixes.sort_by(|&(ref s1, _), &(ref s2, _)| s1.cmp(s2)); | |
suffixes.into_iter().map(|&mut (_, i)| i).collect() | |
} | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use test::Bencher; | |
#[test] | |
fn test_make_suffix_array_keen_unsafe() { | |
let mut ms = MakeSuffix::new(); | |
let a = ms.make_suffix_array("banana"); | |
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]); | |
} | |
#[bench] | |
fn bench_make_suffix_array_keen_unsafe(b: &mut Bencher) { | |
let mut ms = MakeSuffix::new(); | |
b.iter(|| { | |
for _ in 0..test::black_box(1000) { | |
let _ = ms.make_suffix_array("banana"); | |
} | |
}) | |
} | |
#[test] | |
fn test_make_suffix_array_keen() { | |
let a = make_suffix_array_btree("banana"); | |
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]); | |
} | |
#[bench] | |
fn bench_make_suffix_array_keen(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..test::black_box(1000) { | |
let _ = make_suffix_array_btree("banana"); | |
} | |
}) | |
} | |
#[test] | |
fn test_make_suffix_array_keen2() { | |
let a = make_suffix_array_vec("banana"); | |
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]); | |
} | |
#[bench] | |
fn bench_make_suffix_array_keen2(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..test::black_box(1000) { | |
let _ = make_suffix_array_vec("banana"); | |
} | |
}) | |
} | |
#[test] | |
fn test_make_suffix_array_gfx() { | |
let a = make_suffix_array("banana"); | |
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]); | |
} | |
#[bench] | |
fn bench_make_suffix_array_gfx(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..test::black_box(1000) { | |
let _ = make_suffix_array("banana"); | |
} | |
}) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment