-
-
Save Robsteranium/2206278 to your computer and use it in GitHub Desktop.
Plot HackerNews polls on favorite and disliked programming languages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# This script collects voting from HackerNews and outputs a plot votes.eps | |
# You need to have gnuplot installed for plotting. | |
use strict; | |
use warnings; | |
use IO::Socket::INET; | |
sub http_get { | |
my ($host, $path, $proxy) = @_; | |
my $server = ($proxy)? $proxy : $host; | |
$server .= ":80" unless ($server =~ /:\d+$/); # port 80 by default | |
my $url = ($proxy)? "http://$host$path" : $path; | |
my $fh = IO::Socket::INET->new($server) || die("Fail to connect server: $server.\n"); | |
print $fh join("\015\012", "GET $url HTTP/1.0", "Host: $host", "", ""); | |
return $fh; | |
} | |
sub get_poll { | |
my ($host, $path, $hash) = @_; | |
my $fh = &http_get($host, $path); | |
while (<$fh>) { | |
while (/<font color=#000000>([^<]+)<\/font><\/div><\/td><\/tr><tr><td><\/td><td class="default"><span class="comhead"><span id=score_\d+>(\d+) points</g) { | |
next if ($1 eq 'Other'); | |
my ($lang, $cnt) = ($1, $2); | |
$lang =~ s/\s/-/; | |
push(@{$hash->{$lang}}, $cnt); | |
} | |
} | |
close($fh); | |
} | |
sub main { | |
my (%hash, @a, $fh); | |
&get_poll('news.ycombinator.com', '/item?id=3746692', \%hash); | |
&get_poll('news.ycombinator.com', '/item?id=3748961', \%hash); | |
for (keys %hash) { | |
if (@{$hash{$_}} == 2) { | |
push(@a, [$_, @{$hash{$_}}]); | |
} | |
} | |
open($fh, ">votes.txt") || die; | |
@a = sort{($b->[1]+$b->[2])<=>($a->[1]+$a->[2])} @a; | |
for (@a) { | |
print $fh join("\t", @{$_}), "\n"; | |
} | |
close($fh); | |
open($fh, ">votes-alt.txt") || die; | |
@a = sort{$b->[1]/($b->[1]+$b->[2])<=>$a->[1]/($a->[1]+$a->[2])} @a; | |
for (@a) { | |
print $fh join("\t", @{$_}), "\n"; | |
} | |
close($fh); | |
} | |
&main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("/Users/robingower/cshop/2206278") | |
library(ggplot2) | |
d <- read.csv("votes.txt", sep="\t", header=F) | |
colnames(d) <- c("language","liked","disliked") | |
d$pliked <- d$liked/max(d$liked) | |
d$pdisliked <- d$disliked/max(d$disliked) | |
g <- ggplot(d, aes(pdisliked, pliked)) + | |
scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") + | |
scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") + | |
geom_abline(intercept=0,slope=1,alpha=0.25) + | |
geom_point(colour="blue", size=1) + | |
geom_text(aes(label=language), size=2, vjust=-1) + | |
geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) + | |
opts(title="HackerNews polls on favourite/ disliked programming languages") | |
ggsave("plot.png",g) | |
# summary(lm(pliked ~ pdisliked, data=d)) | |
# no correlation! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I updated it a little to show the response size: