Created
May 31, 2012 00:50
-
-
Save zackmdavis/2839935 to your computer and use it in GitHub Desktop.
Less Wrong karma histogram creator (author unknown)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'nokogiri' | |
require 'open-uri' | |
#Change this to your username to run this script for you. | |
#Make sure it makes the name in the 'lesswrong.com/user/USERNAME/comments/ url. | |
username = "Will_Newsome" | |
def parse_page url | |
puts $pages | |
$pages+=1 | |
#We obtain the url for a page somehow, and want to pull out all of the comments from it. | |
page = Nokogiri::HTML.parse(open(url)) | |
temp_kar=[] | |
temp_links=[] | |
page.xpath('//span[@class="votes "]').children.each {|x| | |
#This extracts the karma. We get each one twice, though. | |
temp_kar.push(x.content[/-?\d+/].to_i) | |
} | |
page.xpath('//li[@class="permalink"]').children.each {|x| | |
#This extracts the permalink. | |
temp_links.push(x["href"]) | |
} | |
#Make a karma histogram. | |
temp_links.each_index {|y| | |
$karma[temp_kar[2*y]]+=1; | |
$links.push([temp_kar[2*y],temp_links[y]]); | |
} | |
#Now, find the url of the next page, if it exists, and parse that page. | |
link = page.xpath('//a[text()="Next"]')[0] | |
$last = link["href"] if link | |
parse_page link["href"] if link | |
end | |
$karma = Hash.new(0) | |
$links = [] | |
$pages = 0 | |
$last = "" | |
parse_page("http://lesswrong.com/user/#{username}/comments/") | |
puts $last | |
puts $pages | |
#Now that we have a karma hash, output it. | |
outfile=File.open("karma.tsv",'w') | |
sk=$karma.sort | |
sk.each{|pair| | |
outfile.puts(pair[0].to_s+"\t"+pair[1].to_s)} | |
outfile.close | |
#Now that we have a hash of links, output it. | |
outfile=File.open("links.tsv",'w') | |
sl=$links.sort.reverse | |
sl.each{|pair| | |
outfile.puts(pair[0].to_s+"\t"+pair[1])} | |
outfile.close | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
listing = open('karma.tsv') | |
comments = 0 | |
karma = 0 | |
negative_comments = 0 | |
minus_four_comments = 0 | |
plus_four_comments = 0 | |
for line in listing: | |
record = list(map(int, line.split(' '))) | |
print(record) | |
comments += record[1] | |
karma += record[0]*record[1] | |
if record[0] < 0: | |
negative_comments += record[1] | |
if record[0] <= -4: | |
minus_four_comments += record[1] | |
if record[0] >= 4: | |
plus_four_comments += record[1] | |
print("total comments", comments) | |
print("total karma", karma) | |
print("downvoted comments", negative_comments) | |
print("karma <= -4 comments", minus_four_comments) | |
print("karma >= 4 comments", minus_four_comments) | |
print("proportion downvoted", negative_comments/comments) | |
print("proportion downvoted to -4 or below", minus_four_comments/comments) | |
print("proportion upvoted to 4 or above", plus_four_comments/comments) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment