Created
October 20, 2020 02:57
-
-
Save siedentop/cb4d85cabad9ec035512d8c76c98052a to your computer and use it in GitHub Desktop.
Counting words, inspired by http://jmoiron.net/blog/cpp-deserves-its-bad-reputation/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use anyhow::Result; | |
use std::fs::File; | |
use std::io::{BufRead, BufReader}; | |
use walkdir::WalkDir; | |
/// Inspired by http://jmoiron.net/blog/cpp-deserves-its-bad-reputation/ | |
fn main() -> Result<()> { | |
let mut counter = std::collections::BTreeMap::new(); | |
for entry in WalkDir::new(".") | |
.follow_links(true) | |
.into_iter() | |
.filter_map(|e| e.ok()) | |
.filter(|e| e.file_name().to_string_lossy().ends_with(".txt")) | |
{ | |
let reader = BufReader::new(File::open(entry.into_path())?); | |
for line in reader.lines() { | |
for word in line?.split_whitespace() { | |
let w = word.to_lowercase().to_string(); | |
*counter.entry(w).or_insert(0) += 1; | |
} | |
} | |
} | |
let mut tops: Vec<_> = counter.iter().map(|(w, c)| (c, w)).collect(); | |
tops.sort_by_key(|(&c, w)| (std::cmp::Reverse(c), *w)); | |
for (c, w) in tops.iter().take(10) { | |
println!("{} - {}", w, c); | |
} | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment