Skip to content

Instantly share code, notes, and snippets.

@brianv0
Last active March 2, 2016 01:58
Show Gist options
  • Save brianv0/74b723f326eea2bb738b to your computer and use it in GitHub Desktop.
Save brianv0/74b723f326eea2bb738b to your computer and use it in GitHub Desktop.
Find "Who is Hiring" posts
import os
import bs4
import requests
import re
import time
def get_page(href):
print "Getting page:" + href
time.sleep(2)
if not href:
return None
href = os.path.join("https://news.ycombinator.com", href)
resp = requests.get(href)
page = bs4.BeautifulSoup(resp.content, 'html.parser')
page.href = href
return page
def next_href(page):
elements = page.select('td .title a[rel="nofollow"]')
if elements:
return elements[0].attrs["href"]
return None
class pagerange:
def __init__(self, href):
self.href = href
def __iter__(self):
return self
def next(self):
if self.href:
next = get_page(self.href)
self.href = next_href(next)
return next
raise StopIteration()
pages = [page for page in pagerange("submitted?id=whoishiring")]
tables = [p.select_one(".athing").parent for p in pages]
all_hiring_regex = re.compile("ask hn.*who is hiring", flags=re.IGNORECASE)
all_posts = []
def extract(entry, extract_posts_regex):
title = entry[0].select("a")[-1].text
link = entry[0].select("a")[-1].attrs["href"]
posts = entry[1].select("a")[-1].text.split(" ")[0]
top_level_comments = -1
if extract_posts_regex.match(title):
page = get_page(link)
top_level_comments = len(page.select('img[width="0"]'))
return title, link, top_level_comments, posts
for table in tables:
rows = table.select("tr")
entries = [rows[i:i+2] for i in xrange(0, len(rows), 3)]
entries.pop()
all_posts.extend([extract(e, all_hiring_regex) for e in entries])
all_hiring = [i for i in all_posts if all_hiring_regex.match(i[0])]
results = [[' ' for x in range(len(all_hiring))] for y in range(20)]
maxpoints = 850
height = 60
maxdatelen = 14
results = []
for x in range(len(all_hiring)):
post = all_hiring[x]
result = [' ' for i in range(height)]
date = all_hiring_regex.match(post[0]).group(1)
print date + " "* (maxdatelen - len(date)),
points = float(post[2]) / maxpoints * height
for y in range(int(round(points))):
result[y] = 'X'
print ''.join(result)
@brianv0
Copy link
Author

brianv0 commented Mar 2, 2016

A bit buggy, but...


                                                X        
                                                XX       
                                                XXX X    
                                                XXX X  X 
                                               XXXXXX  X 
                                               XXXXXX  X 
                                       X   X XXXXXXXX  XX
                             X      X XX   X XXXXXXXX  XX
                             X     XX XX X X XXXXXXXXXXXX
                       X   X X   XXXXXXX XXX XXXXXXXXXXXX
  X       X      X    XX  XX XXX XXXXXXXXXXXXXXXXXXXXXXXX
XXXXX X  XXXXXXXXXXXXXXX  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXX X XXXXXXXXXXXXXXXX  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXX X XXXXXXXXXXXXXXXX  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXX X XXXXXXXXXXXXXXXX  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment