Created
June 29, 2022 12:37
-
-
Save pfmoore/f1a696ff231ea38dd8e6e9d3b4c8648b to your computer and use it in GitHub Desktop.
Count words in a file, in Rust
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use clap::Parser; | |
use std::fs::File; | |
use std::io; | |
use std::path::Path; | |
use std::io::BufRead; | |
use std::collections::HashMap; | |
use std::cmp::Reverse; | |
use crossbeam::thread; | |
type Counts = HashMap<String, u32>; | |
fn words_in_file<P: AsRef<Path>>(filename: P) -> std::io::Result<Counts> { | |
let file = File::open(filename)?; | |
let buf_reader = io::BufReader::new(file); | |
let mut words = HashMap::new(); | |
for line in buf_reader.lines() { | |
for word in line?.split_whitespace() { | |
if let Some(x) = words.get_mut(word) { | |
*x += 1; | |
} else { | |
words.insert(word.to_owned(), 1); | |
} | |
} | |
} | |
Ok(words) | |
} | |
fn total_counts<I>(counts: I) -> Counts | |
where | |
I: Iterator<Item=Counts> | |
{ | |
let mut total = Counts::new(); | |
for c in counts { | |
for (k, v) in c { | |
if let Some(x) = total.get_mut(&k) { | |
*x += v; | |
} else { | |
total.insert(k, v); | |
} | |
} | |
} | |
total | |
} | |
fn first_n(words: Counts, len: usize) -> Vec<(String, u32)> { | |
if len == 0 { | |
return Vec::new(); | |
} | |
let mut vec: Vec<(String, u32)> = words.into_iter().collect(); | |
if len < vec.len() { | |
// Put the len largest at the start and throw the rest away | |
vec.select_nth_unstable_by_key(len - 1, |(_,n)| Reverse(*n)); | |
vec.truncate(len); | |
} | |
vec.sort_unstable_by_key(|(_,n)| Reverse(*n)); | |
vec | |
} | |
/// Search for a pattern in a file and display the lines that contain it. | |
#[derive(Parser)] | |
struct Args { | |
/// Nunber of words to report | |
count: usize, | |
/// The paths to the files to read | |
paths: Vec<String>, | |
} | |
fn main() { | |
let args = Args::parse(); | |
let results = thread::scope(|s| { | |
let threads = args.paths.iter().map(|p| s.spawn(move |_| { | |
words_in_file(&p) | |
})); | |
total_counts(threads.map(|t| (t.join().unwrap().unwrap()))) | |
}).unwrap(); | |
for x in first_n(results, args.count) { | |
println!("{:?}", x); | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment