Skip to content

Instantly share code, notes, and snippets.

@sarfata
Created June 10, 2013 10:48
Show Gist options
  • Save sarfata/5747916 to your computer and use it in GitHub Desktop.
Save sarfata/5747916 to your computer and use it in GitHub Desktop.
Parses hn homepage and counts article with small scores so you can have a better idea of when is a good time to post.
# -*- coding: utf-8 -*-
import sys
import codecs
import re
from bs4 import BeautifulSoup
import requests
reload(sys)
sys.setdefaultencoding("utf-8")
page = requests.get('http://news.ycombinator.com/')
soup = BeautifulSoup(page.text)
soupPosts = soup.find_all('table')[0].find_all('table')[1]
posts = []
current = {}
class Post:
position = ""
title = ""
author = ""
points = ""
def __init__(self):
pass
def __str__(self):
return "{} - {} - {} - {}".format(self.position, self.title, self.author, self.points)
current = None
for tr in soupPosts:
tds = tr.find_all('td')
if (len(tds) == 3):
current = Post()
current.position = tds[0].text
current.title = tds[2].text
if (len(tds) == 2):
if (current):
if (tds[1].find('span') != None):
pointsString = tds[1].find('span').text
m = re.search('[0-9]+', pointsString)
current.points = int(m.group(0))
else:
current.points = 0
if (tds[1].find_all('a')):
current.author = tds[1].find_all('a')[0].text
else:
current.author = ''
if (len(tds) == 0):
if (current != None):
posts.append(current)
current = None
# Pro-tip: A good time to post to HN is when at least 2 front page articles have scores less than 15, one of which has a score less than 5.
lessThan5 = 0
lessThan15 = 0
for p in posts:
print p
if (p.points < 5):
lessThan5 = lessThan5 + 1
if (p.points < 15):
lessThan15 = lessThan15 + 1
print "Articles with score less than 5 points: {}".format(lessThan5)
print "Articles with score less than 15 points: {}".format(lessThan15)
if (lessThan5 >= 1 and lessThan15 >= 2):
print "It's a good time to post!"
else:
print "This is not a good time to post!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment