Last active
March 2, 2016 01:58
-
-
Save brianv0/74b723f326eea2bb738b to your computer and use it in GitHub Desktop.
Find "Who is Hiring" posts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import bs4 | |
import requests | |
import re | |
import time | |
def get_page(href): | |
print "Getting page:" + href | |
time.sleep(2) | |
if not href: | |
return None | |
href = os.path.join("https://news.ycombinator.com", href) | |
resp = requests.get(href) | |
page = bs4.BeautifulSoup(resp.content, 'html.parser') | |
page.href = href | |
return page | |
def next_href(page): | |
elements = page.select('td .title a[rel="nofollow"]') | |
if elements: | |
return elements[0].attrs["href"] | |
return None | |
class pagerange: | |
def __init__(self, href): | |
self.href = href | |
def __iter__(self): | |
return self | |
def next(self): | |
if self.href: | |
next = get_page(self.href) | |
self.href = next_href(next) | |
return next | |
raise StopIteration() | |
pages = [page for page in pagerange("submitted?id=whoishiring")] | |
tables = [p.select_one(".athing").parent for p in pages] | |
all_hiring_regex = re.compile("ask hn.*who is hiring", flags=re.IGNORECASE) | |
all_posts = [] | |
def extract(entry, extract_posts_regex): | |
title = entry[0].select("a")[-1].text | |
link = entry[0].select("a")[-1].attrs["href"] | |
posts = entry[1].select("a")[-1].text.split(" ")[0] | |
top_level_comments = -1 | |
if extract_posts_regex.match(title): | |
page = get_page(link) | |
top_level_comments = len(page.select('img[width="0"]')) | |
return title, link, top_level_comments, posts | |
for table in tables: | |
rows = table.select("tr") | |
entries = [rows[i:i+2] for i in xrange(0, len(rows), 3)] | |
entries.pop() | |
all_posts.extend([extract(e, all_hiring_regex) for e in entries]) | |
all_hiring = [i for i in all_posts if all_hiring_regex.match(i[0])] | |
results = [[' ' for x in range(len(all_hiring))] for y in range(20)] | |
maxpoints = 850 | |
height = 60 | |
maxdatelen = 14 | |
results = [] | |
for x in range(len(all_hiring)): | |
post = all_hiring[x] | |
result = [' ' for i in range(height)] | |
date = all_hiring_regex.match(post[0]).group(1) | |
print date + " "* (maxdatelen - len(date)), | |
points = float(post[2]) / maxpoints * height | |
for y in range(int(round(points))): | |
result[y] = 'X' | |
print ''.join(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A bit buggy, but...