Skip to content

Instantly share code, notes, and snippets.

@Robsteranium
Forked from attractivechaos/get_votes.pl
Created March 26, 2012 16:17
Show Gist options
  • Save Robsteranium/2206278 to your computer and use it in GitHub Desktop.
Save Robsteranium/2206278 to your computer and use it in GitHub Desktop.
Plot HackerNews polls on favorite and disliked programming languages
#!/usr/bin/env perl
# This script collects voting from HackerNews and outputs a plot votes.eps
# You need to have gnuplot installed for plotting.
use strict;
use warnings;
use IO::Socket::INET;
sub http_get {
my ($host, $path, $proxy) = @_;
my $server = ($proxy)? $proxy : $host;
$server .= ":80" unless ($server =~ /:\d+$/); # port 80 by default
my $url = ($proxy)? "http://$host$path" : $path;
my $fh = IO::Socket::INET->new($server) || die("Fail to connect server: $server.\n");
print $fh join("\015\012", "GET $url HTTP/1.0", "Host: $host", "", "");
return $fh;
}
sub get_poll {
my ($host, $path, $hash) = @_;
my $fh = &http_get($host, $path);
while (<$fh>) {
while (/<font color=#000000>([^<]+)<\/font><\/div><\/td><\/tr><tr><td><\/td><td class="default"><span class="comhead"><span id=score_\d+>(\d+) points</g) {
next if ($1 eq 'Other');
my ($lang, $cnt) = ($1, $2);
$lang =~ s/\s/-/;
push(@{$hash->{$lang}}, $cnt);
}
}
close($fh);
}
sub main {
my (%hash, @a, $fh);
&get_poll('news.ycombinator.com', '/item?id=3746692', \%hash);
&get_poll('news.ycombinator.com', '/item?id=3748961', \%hash);
for (keys %hash) {
if (@{$hash{$_}} == 2) {
push(@a, [$_, @{$hash{$_}}]);
}
}
open($fh, ">votes.txt") || die;
@a = sort{($b->[1]+$b->[2])<=>($a->[1]+$a->[2])} @a;
for (@a) {
print $fh join("\t", @{$_}), "\n";
}
close($fh);
open($fh, ">votes-alt.txt") || die;
@a = sort{$b->[1]/($b->[1]+$b->[2])<=>$a->[1]/($a->[1]+$a->[2])} @a;
for (@a) {
print $fh join("\t", @{$_}), "\n";
}
close($fh);
}
&main();
setwd("/Users/robingower/cshop/2206278")
library(ggplot2)
d <- read.csv("votes.txt", sep="\t", header=F)
colnames(d) <- c("language","liked","disliked")
d$pliked <- d$liked/max(d$liked)
d$pdisliked <- d$disliked/max(d$disliked)
g <- ggplot(d, aes(pdisliked, pliked)) +
scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") +
scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") +
geom_abline(intercept=0,slope=1,alpha=0.25) +
geom_point(colour="blue", size=1) +
geom_text(aes(label=language), size=2, vjust=-1) +
geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) +
opts(title="HackerNews polls on favourite/ disliked programming languages")
ggsave("plot.png",g)
# summary(lm(pliked ~ pdisliked, data=d))
# no correlation!
@chewxy
Copy link

chewxy commented Mar 26, 2012

I updated it a little to show the response size:

library(ggplot2)
d <- read.csv("votes.txt", sep="\t", header=F)
colnames(d) <- c("language","liked","disliked")

d$pliked <- d$liked/max(d$liked)
d$pdisliked <- d$disliked/max(d$disliked)
d$respons <- d$liked + d$disliked

g <- ggplot(d, aes(pdisliked, pliked)) +
  scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") +
  scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") +
  geom_abline(intercept=0,slope=1,alpha=0.25) +
  geom_point(colour="#43A2CA", aes(size=d$response)) +
  geom_text(aes(label=language), size=2, vjust=-1) +
  geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) +
  opts(title="HackerNews polls on favourite/ disliked programming languages")

ggsave("plot.png",g)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment