Created
June 10, 2013 10:48
-
-
Save sarfata/5747916 to your computer and use it in GitHub Desktop.
Parses hn homepage and counts article with small scores so you can have a better idea of when is a good time to post.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import sys | |
import codecs | |
import re | |
from bs4 import BeautifulSoup | |
import requests | |
reload(sys) | |
sys.setdefaultencoding("utf-8") | |
page = requests.get('http://news.ycombinator.com/') | |
soup = BeautifulSoup(page.text) | |
soupPosts = soup.find_all('table')[0].find_all('table')[1] | |
posts = [] | |
current = {} | |
class Post: | |
position = "" | |
title = "" | |
author = "" | |
points = "" | |
def __init__(self): | |
pass | |
def __str__(self): | |
return "{} - {} - {} - {}".format(self.position, self.title, self.author, self.points) | |
current = None | |
for tr in soupPosts: | |
tds = tr.find_all('td') | |
if (len(tds) == 3): | |
current = Post() | |
current.position = tds[0].text | |
current.title = tds[2].text | |
if (len(tds) == 2): | |
if (current): | |
if (tds[1].find('span') != None): | |
pointsString = tds[1].find('span').text | |
m = re.search('[0-9]+', pointsString) | |
current.points = int(m.group(0)) | |
else: | |
current.points = 0 | |
if (tds[1].find_all('a')): | |
current.author = tds[1].find_all('a')[0].text | |
else: | |
current.author = '' | |
if (len(tds) == 0): | |
if (current != None): | |
posts.append(current) | |
current = None | |
# Pro-tip: A good time to post to HN is when at least 2 front page articles have scores less than 15, one of which has a score less than 5. | |
lessThan5 = 0 | |
lessThan15 = 0 | |
for p in posts: | |
print p | |
if (p.points < 5): | |
lessThan5 = lessThan5 + 1 | |
if (p.points < 15): | |
lessThan15 = lessThan15 + 1 | |
print "Articles with score less than 5 points: {}".format(lessThan5) | |
print "Articles with score less than 15 points: {}".format(lessThan15) | |
if (lessThan5 >= 1 and lessThan15 >= 2): | |
print "It's a good time to post!" | |
else: | |
print "This is not a good time to post!" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment