-
-
Save djhworld/d49f9acae96e32465e05f9d16930448c to your computer and use it in GitHub Desktop.
| package main | |
| import ( | |
| "bufio" | |
| "fmt" | |
| "os" | |
| ) | |
| type Counter struct { | |
| uniqueItems map[string]int | |
| } | |
| func NewCounter() *Counter { | |
| c := new(Counter) | |
| c.uniqueItems = make(map[string]int) | |
| return c | |
| } | |
| func (c *Counter) Add(item string) { | |
| if _, ok := c.uniqueItems[item]; !ok { | |
| c.uniqueItems[item] = 1 | |
| } else { | |
| c.uniqueItems[item] += 1 | |
| } | |
| } | |
| func (c *Counter) Render() { | |
| for k, v := range c.uniqueItems { | |
| fmt.Printf("%d\t%s\n", v, k) | |
| } | |
| } | |
| func main() { | |
| counter := NewCounter() | |
| scanner := bufio.NewScanner(os.Stdin) | |
| for scanner.Scan() { | |
| counter.Add(scanner.Text()) | |
| } | |
| counter.Render() | |
| } |
| use std::collections::HashMap; | |
| use std::io; | |
| struct Counter { | |
| items: HashMap<String, usize>, | |
| } | |
| impl Counter { | |
| fn new() -> Counter { | |
| return Counter { | |
| items: HashMap::with_capacity(1000), | |
| }; | |
| } | |
| fn add(&mut self, st: String) { | |
| let x = self.items.entry(st).or_insert(0); | |
| *x += 1; | |
| } | |
| fn render(&self) { | |
| for (key, val) in &self.items { | |
| println!("{}\t{}", val, key); | |
| } | |
| } | |
| } | |
| fn main() { | |
| let mut c = Counter::new(); | |
| loop { | |
| let mut line = String::new(); | |
| let r = io::stdin().read_line(&mut line); | |
| match r { | |
| Ok(0) => break, | |
| Ok(_) => { | |
| line.pop(); //strip newline char | |
| c.add(line); | |
| } | |
| Err(err) => { | |
| println!("{}", err); | |
| break; | |
| } | |
| }; | |
| } | |
| c.render(); | |
| } |
It's worth noting that this rust program locks stdin once per iteration when it should probably be locked outside the loop. Reference function: https://doc.rust-lang.org/std/io/struct.Stdin.html#method.lock
After the locking changes rust version should be quicker. But on the next comparison you should give both versions the exact same map capacity and also func (c *Counter) Add(item string) {c.uniqueItems[item]++} is what you want, which should be quicker.
Maybe adding -ldflags "-s -w" could have a minimal positive effect on the runtime of the go version, but I've not tested it.
I've made a follow up gist with the improvements suggested here https://gist.github.com/djhworld/ced7eabca8e0c06d5b65917e87fb8745
I'll look into changing the go code too!
For the Rust version, deriving Default and using the HashMap::entry API might be more idiomatic:
#[derive(Debug, Clone, Default)]
pub struct Counter {
pub items: HashMap<String, usize>,
}
impl Counter {
pub fn count_rdr(&mut self, rdr: &mut impl BufRead) -> io::Result<()> {
let mut buffer = String::new();
loop {
buffer.clear();
match rdr.read_line(&mut buffer) {
Ok(0) => break,
Ok(_) => self.add(buffer.trim_end()),
Err(e) => {
eprintln!("ERR: {}", e);
break;
}
}
}
Ok(())
}
fn add(&mut self, line: &str) {
let count = self.items.entry(line.to_string()).or_insert(0);
*count += 1;
}
}Using the entry API costs an allocation on every lookup which can be a severe performance penalty on something like this.
@Freaky good point! Your mention of allocation gave me the idea to look into refactoring the Counter to take a string slice and use that as the key instead of an owned String. This resulted in about a 34% improvement in counting performance over the get_mut implementation using owned Strings. I haven't looked into the performance implications of reading all of stdin before doing the counting, instead of using buffered reads. I suspect that clearing the buffer on each line read would incur some overhead, though. Interesting stuff.
go version seems to be about 40% quicker than the rust version
after 100 runs
go version takes about 0.10 seconds to run
rust version about 0.17 seconds
measured using script
timeit.shwith a file (file.txt) containing 1,000,000 strings (500,000foo, 500,000bar)executables built with