Skip to content

Instantly share code, notes, and snippets.

@jmrobles
Created August 13, 2020 14:46
Show Gist options
  • Save jmrobles/6c6868745c9f2da5327c1e7552134d6c to your computer and use it in GitHub Desktop.
Save jmrobles/6c6868745c9f2da5327c1e7552134d6c to your computer and use it in GitHub Desktop.
Hacker News upvoted scrap
import requests
from bs4 import BeautifulSoup
creds = {'acct': 'YOUR_ACCOUNT', 'pw': 'YOUR_PASSWORD'}
sess = requests.Session()
resp = sess.post('https://news.ycombinator.com/login', creds)
assert resp.status_code == 200
cnt = 0
items = []
nextUrl = f"https://news.ycombinator.com/upvoted?id={creds['acct']}"
while True:
lnkMore = None
lastTitle = None
lastLink = None
lastPoints = None
lastComments = None
resp = sess.get(nextUrl)
nextUrl = None
dom = BeautifulSoup(resp.content, 'html.parser')
tbl = dom.findAll("table", attrs={'class': 'itemlist'})
for tr in tbl[0].findAll('tr'):
if tr.has_attr('class') and tr['class'][0] == 'athing':
a = tr.select('td[class="title"] a[class="storylink"]')[0]
lastTitle = a.get_text()
lastLink = a['href']
elif tr.has_attr('class') and tr['class'][0] == 'spacer':
# Time to append to array
items.append({'title': lastTitle, 'url': lastLink, 'points': lastPoints, 'comments': lastComments})
lastTitle = None
lastLink = None
lastPoints = None
lastComments = None
cnt += 1
elif not tr.has_attr('class'):
# More info
sel = tr.select('span[class="score"]')
try:
lastPoints = int(sel[0].get_text().split(' ')[0])
except:
lastPoints = None
for a in tr.find_all('a'):
if 'comment' in a.get_text():
try:
lastComments = int(a.get_text().split('\xa0')[0])
except:
lastComments = None
if len(tr.select('a[class="morelink"]')) > 0:
nextUrl = 'https://news.ycombinator.com/' + tr.select('a[class="morelink"]')[0]['href']
if nextUrl is None:
break
print(f'Total upvoted links: {cnt}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment