Created
December 16, 2022 18:36
-
-
Save gleicon/4155f45d6003bd13cac1085abd2a2516 to your computer and use it in GitHub Desktop.
rust and symspell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "aho-exploration" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
aho-corasick = "0.7.19" | |
symspell = "0.4.3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use aho_corasick::AhoCorasickBuilder; | |
use symspell::{AsciiStringStrategy, SymSpell, Verbosity}; | |
use std::time::{Duration, Instant}; | |
fn main() { | |
let patterns = &["apple", "maple", "snapple"]; | |
let haystack = "Nobody likes maple in their apple flavored Snapple."; | |
let haystack2 = "Nobody likes mapleapple flavored Snapple."; | |
let ac = AhoCorasickBuilder::new() | |
.ascii_case_insensitive(true) | |
.build(patterns); | |
let mut matches = vec![]; | |
for mat in ac.find_iter(haystack2) { | |
println!("{:?} - pattern {} -> haystack {}", mat, patterns[mat.pattern()], &haystack2[mat.start()..mat.end()]); | |
matches.push((mat.pattern(), mat.start(), mat.end())); | |
} | |
println!("matches: {:?}", matches); | |
// assert_eq!(matches, vec![ | |
// (1, 13, 18), | |
// (0, 28, 33), | |
// (2, 43, 50), | |
// ]); | |
let mut symspell: SymSpell<AsciiStringStrategy> = SymSpell::default(); | |
symspell.load_dictionary("data/frequency_dictionary_en_82_765.txt", 0, 1, " "); | |
symspell.load_bigram_dictionary( | |
"./data/frequency_bigramdictionary_en_243_342.txt", | |
0, | |
2, | |
" " | |
); | |
let start = Instant::now(); | |
let suggestions = symspell.lookup("roket", Verbosity::Top, 2); | |
println!("sentence 0: {:?}", suggestions); | |
let sentence = "whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixtgrade and ins pired him"; | |
let compound_suggestions = symspell.lookup_compound(sentence, 2); | |
println!("sentence 1: {:?}", compound_suggestions); | |
let sentence = "whereisthelove"; | |
let segmented = symspell.word_segmentation(sentence, 2); | |
println!("sentence 2: {:?}", segmented); | |
let duration = start.elapsed(); | |
println!("Time elapsed in expensive_function() is: {:?}", duration); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment