Last active
January 2, 2024 21:58
-
-
Save Butch78/893561f77de456a096ed6d1e672c4bed to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Implementation for the 1 Billion Row Challenge, set here: https://www.morling.dev/blog/one-billion-row-challenge/ | |
use std::collections::BTreeMap; | |
use std::fmt::{Display, Formatter}; | |
use std::fs::File; | |
use std::io::{BufRead, BufReader}; | |
use std::str::FromStr; | |
use std::time::Instant; | |
struct Aggregate { | |
min: f64, | |
max: f64, | |
mean: f64, | |
sum: f64, | |
measurements: usize | |
} | |
impl Display for Aggregate { | |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | |
write!(f, "{:.1}/{:.1}/{:.1}", self.min, self.mean, self.max) | |
} | |
} | |
fn main() { | |
let now = Instant::now(); | |
let f = File::open("../measurements_1b.txt").unwrap(); | |
let reader = BufReader::new(f); | |
let mut res_map = BTreeMap::<String, Aggregate>::new(); | |
for line in reader.lines() { | |
if let Some((name_str, measurement_str)) = line.unwrap().split_once(";") { | |
let name_string = name_str.to_string(); | |
let measurement = f64::from_str(measurement_str.trim()).unwrap(); | |
if let Some(aggr) = res_map.get_mut(&name_string) { | |
if measurement.lt(&aggr.min) { | |
aggr.min = measurement; | |
} | |
if measurement.gt(&aggr.min) { | |
aggr.max = measurement; | |
} | |
// Note: for performance, we calculate the mean at the end | |
aggr.sum += measurement; | |
aggr.measurements += 1; | |
} else { | |
res_map.insert(name_string, Aggregate { | |
min: measurement, | |
max: measurement, | |
mean: measurement, | |
sum: measurement, | |
measurements: 1 | |
}); | |
} | |
} | |
} | |
for aggr in res_map.values_mut() { | |
aggr.mean = aggr.sum / (aggr.measurements as f64) | |
} | |
for (name, aggr) in res_map { | |
println!("{}={}", name, aggr) | |
} | |
println!("Time={} μs", now.elapsed().as_micros()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This allocates for each line. That can be avoided by having a
let mut line: String
and reusing that buffer. Lending iterators will hopefully fix that at some point in the future.You could avoid doing that allocation upfront since it's only needed in the else branch