Skip to content

Instantly share code, notes, and snippets.

@skeptrunedev
Created August 3, 2024 10:25
Show Gist options
  • Save skeptrunedev/1179151db78ed7ce8c0347f8c2d60f64 to your computer and use it in GitHub Desktop.
Save skeptrunedev/1179151db78ed7ce8c0347f8c2d60f64 to your computer and use it in GitHub Desktop.
query-highlighting-system
// TODO: latency optimize this so it can be uncommented
if phrases.is_empty() {
let potential_query_split_whitespace = potential_query.split_whitespace().collect_vec();
if potential_query_split_whitespace.len() > 5 {
continue;
}
let query_without_stop_words = potential_query
.split_whitespace()
.filter(|word| !stop_words.contains(&word.to_lowercase()))
.collect::<Vec<&str>>();
if query_without_stop_words.len() < 2
|| (potential_query_split_whitespace.len() - query_without_stop_words.len() < 1)
{
continue;
}
// \b(?:word1\W+(?:\w+\W+){0,3}?word2|word2\W+(?:\w+\W+){0,3}?word1)\b
let query_regex = format!(
"\\b(?:{}|{})\\b",
query_without_stop_words.join("\\W+(?:\\w+\\W+){0,6}?"),
query_without_stop_words
.iter()
.rev()
.join("\\W+(?i:\\w+\\W+){0,6}?")
);
if let Ok(re) = regex::Regex::new(&query_regex) {
let matched_idxs: Vec<(usize, usize)> = re
.find_iter(&content)
.map(|x| (x.start(), x.as_str().len()))
.collect();
phrases = matched_idxs
.iter()
.map(|(index, length)| content[*index..*index + length].to_string())
.collect_vec();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment