-
-
Save djhworld/d49f9acae96e32465e05f9d16930448c to your computer and use it in GitHub Desktop.
package main | |
import ( | |
"bufio" | |
"fmt" | |
"os" | |
) | |
type Counter struct { | |
uniqueItems map[string]int | |
} | |
func NewCounter() *Counter { | |
c := new(Counter) | |
c.uniqueItems = make(map[string]int) | |
return c | |
} | |
func (c *Counter) Add(item string) { | |
if _, ok := c.uniqueItems[item]; !ok { | |
c.uniqueItems[item] = 1 | |
} else { | |
c.uniqueItems[item] += 1 | |
} | |
} | |
func (c *Counter) Render() { | |
for k, v := range c.uniqueItems { | |
fmt.Printf("%d\t%s\n", v, k) | |
} | |
} | |
func main() { | |
counter := NewCounter() | |
scanner := bufio.NewScanner(os.Stdin) | |
for scanner.Scan() { | |
counter.Add(scanner.Text()) | |
} | |
counter.Render() | |
} |
use std::collections::HashMap; | |
use std::io; | |
struct Counter { | |
items: HashMap<String, usize>, | |
} | |
impl Counter { | |
fn new() -> Counter { | |
return Counter { | |
items: HashMap::with_capacity(1000), | |
}; | |
} | |
fn add(&mut self, st: String) { | |
let x = self.items.entry(st).or_insert(0); | |
*x += 1; | |
} | |
fn render(&self) { | |
for (key, val) in &self.items { | |
println!("{}\t{}", val, key); | |
} | |
} | |
} | |
fn main() { | |
let mut c = Counter::new(); | |
loop { | |
let mut line = String::new(); | |
let r = io::stdin().read_line(&mut line); | |
match r { | |
Ok(0) => break, | |
Ok(_) => { | |
line.pop(); //strip newline char | |
c.add(line); | |
} | |
Err(err) => { | |
println!("{}", err); | |
break; | |
} | |
}; | |
} | |
c.render(); | |
} |
After the locking changes rust version should be quicker. But on the next comparison you should give both versions the exact same map capacity and also func (c *Counter) Add(item string) {c.uniqueItems[item]++}
is what you want, which should be quicker.
Maybe adding -ldflags "-s -w"
could have a minimal positive effect on the runtime of the go version, but I've not tested it.
I've made a follow up gist with the improvements suggested here https://gist.github.com/djhworld/ced7eabca8e0c06d5b65917e87fb8745
I'll look into changing the go code too!
For the Rust version, deriving Default
and using the HashMap::entry
API might be more idiomatic:
#[derive(Debug, Clone, Default)]
pub struct Counter {
pub items: HashMap<String, usize>,
}
impl Counter {
pub fn count_rdr(&mut self, rdr: &mut impl BufRead) -> io::Result<()> {
let mut buffer = String::new();
loop {
buffer.clear();
match rdr.read_line(&mut buffer) {
Ok(0) => break,
Ok(_) => self.add(buffer.trim_end()),
Err(e) => {
eprintln!("ERR: {}", e);
break;
}
}
}
Ok(())
}
fn add(&mut self, line: &str) {
let count = self.items.entry(line.to_string()).or_insert(0);
*count += 1;
}
}
Using the entry API costs an allocation on every lookup which can be a severe performance penalty on something like this.
@Freaky good point! Your mention of allocation gave me the idea to look into refactoring the Counter to take a string slice and use that as the key instead of an owned String. This resulted in about a 34% improvement in counting performance over the get_mut
implementation using owned Strings. I haven't looked into the performance implications of reading all of stdin
before doing the counting, instead of using buffered reads. I suspect that clearing the buffer on each line read would incur some overhead, though. Interesting stuff.
It's worth noting that this rust program locks
stdin
once per iteration when it should probably be locked outside the loop. Reference function: https://doc.rust-lang.org/std/io/struct.Stdin.html#method.lock