Last active
April 30, 2025 04:43
-
-
Save MikuroXina/cb16242aa80791901f04e8fff8a86956 to your computer and use it in GitHub Desktop.
Trie tree implementation with Rust.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Debug, Clone, PartialEq, Eq, Hash)] | |
struct TrieNode<const L: usize> { | |
children: [Option<Box<TrieNode<L>>>; L], | |
// invariant: `words` must not be `0` if `children` is vacant. | |
words: usize, | |
} | |
impl<const L: usize> TrieNode<L> { | |
fn new() -> Self { | |
TrieNode { | |
children: std::array::from_fn(|_| None), | |
words: 0, | |
} | |
} | |
fn child(&self, index: usize) -> Option<&Self> { | |
self.children[index].as_deref() | |
} | |
fn child_or_insert(&mut self, index: usize) -> &mut Self { | |
self.children[index].get_or_insert_with(|| Self::new().into()) | |
} | |
fn is_branching(&self) -> bool { | |
let mut count = 0; | |
for child in &self.children { | |
if child.is_some() { | |
count += 1; | |
} | |
if count >= 2 { | |
return true; | |
} | |
} | |
false | |
} | |
fn has_children(&self) -> bool { | |
self.children.iter().any(|child| child.is_some()) | |
} | |
} | |
/// Trie tree which contains strings by counting common prefix. | |
#[derive(Debug, Clone, PartialEq, Eq, Hash)] | |
pub struct Trie<const L: usize> { | |
root: TrieNode<L>, | |
} | |
impl<const L: usize> Trie<L> { | |
/// Creates a new tree. | |
pub fn new() -> Self { | |
Self { | |
root: TrieNode::new(), | |
} | |
} | |
fn get_node(&self, seq: &[usize]) -> Option<&TrieNode<L>> { | |
let mut current = &self.root; | |
for &idx in seq { | |
current = current.child(idx)?; | |
} | |
Some(current) | |
} | |
fn get_branching_descendant(&mut self, seq: &[usize]) -> Option<(usize, &mut TrieNode<L>)> { | |
if seq.is_empty() { | |
return None; | |
} | |
let mut current = &self.root; | |
let mut last_branching_depth = 0; | |
for (depth, &idx) in seq.into_iter().enumerate() { | |
current = current.child(idx)?; | |
if current.is_branching() { | |
last_branching_depth = depth + 1; | |
} | |
} | |
let mut current = &mut self.root; | |
for &idx in &seq[..last_branching_depth] { | |
current = current.child_or_insert(idx); | |
} | |
Some((last_branching_depth, current)) | |
} | |
fn dig_node(&mut self, seq: &[usize]) -> &mut TrieNode<L> { | |
let mut current = &mut self.root; | |
for &idx in seq { | |
current = current.child_or_insert(idx); | |
} | |
current | |
} | |
/// Checks whether the prefix seuqence is contained. | |
pub fn contains_prefix(&self, seq: &[usize]) -> bool { | |
self.get_node(seq).is_some() | |
} | |
/// Checks whether the exact string sequence is contained. | |
pub fn contains(&self, seq: &[usize]) -> bool { | |
self.get_node(seq).map_or(false, |node| node.words > 0) | |
} | |
/// Inserts the indices sequence made from string. | |
pub fn insert(&mut self, seq: &[usize]) { | |
let node = self.dig_node(seq); | |
node.words += 1; | |
} | |
/// Delets the indeices sequence made from string. | |
pub fn remove(&mut self, seq: &[usize]) -> bool { | |
if !self.contains(seq) { | |
return false; | |
} | |
if seq.len() == 1 { | |
return self.root.children[seq[0]].take().is_some(); | |
} | |
let target = self.dig_node(seq); | |
target.words -= 1; | |
if target.words == 0 && !target.has_children() { | |
if let Some((depth, branching)) = self.get_branching_descendant(seq) { | |
branching.children[seq[depth]] = None; | |
} else { | |
self.root.children[seq[0]] = None; | |
} | |
} | |
true | |
} | |
} | |
/// Converts ascii lowercase string (`'a'..='z'`) into indices, such as `'a'` as `0`. | |
/// It is a hepler function to use `Trie`. | |
pub fn ascii_lowercase_indices(s: &str) -> Vec<usize> { | |
assert!(s.chars().all(|c| c.is_ascii_lowercase())); | |
s.bytes().map(|b| (b - b'a') as usize).collect() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment