jon1scr · March 26, 2021 11:01
diff --git a/daily-hackernews.py b/daily-hackernews.py
 from github import Github
 import csv
 import re
 import requests
 from datetime import datetime, timedelta

 g = Github("access_token")

 REPO_NAME = 'xueyuanl/daily-hackernews'
 FILE_NAME = "hn25.csv"
 BASE_URL = 'https://hacker-news.firebaseio.com/v0/'
 ITEM_URL = BASE_URL + 'item/'
 USER_URL = BASE_URL + 'user/'


 NOOB_ACCOUNT_TRESHOLD = 15

 repo = g.get_repo(REPO_NAME)
 issues = repo.get_issues(state='open')

 fields = ['Date', 'Title', 'Link', 'userID', 'userID Age', 'New Account?', 'Number of Points', 'Number of Comments', 'HN Link']
 rows = []

 for issue in issues:
 	date = all_square_brackets = all_links = title = hn_link = link = user_id = ''
 	user_id_age = is_new = number_of_points = number_of_comments = ''
 	try:
 		date = issue.title.split(' ')[3]
 	except IndexError:
 		print ('bad title format: ', issue.title)
 		# TODO
 	post = issue.body.split('\n')
 	del post[:3] # first 3 elements are empty
 	for line in post:
 		# regex for title
 		all_square_brackets = re.findall("\[(.*?)\]", line)
 		# regex for Link, HN Link
 		all_links = re.findall("(?P<url>https?://[^\s]+)", line.lower())
 		title = all_square_brackets[0].strip('**')
 		link = all_links[0].strip(')')
 		hn_link = all_links[1].strip(')')
 		item_id = hn_link.split('=')[1]

 		item = requests.get(ITEM_URL + item_id + '.json')
 		user_id = item.json()['by']
 		number_of_points = item.json()['score']

 		user = requests.get(USER_URL + user_id + '.json')
 		# number_of_comments not outright available, needs more computing.
 		try:
 			user_id_age = user.json()['created']
 			account_created_date = datetime.fromtimestamp(user_id_age)
 			submission_date = is_new = ''
 			try:
 				submission_date = datetime.strptime(date, '%d-%m-%Y')
 				calc_date = - timedelta(days=NOOB_ACCOUNT_TRESHOLD)
 				is_new = account_created_date > submission_date # submission date should always be 15 days greater than account creation
 			except ValueError:
 				print ('date parsing issue: ', line)
 				# TODO
 		except TypeError:
 			print ('Automated submission.')
 		row = [date, title, link, user_id, user_id_age, is_new, number_of_points, number_of_comments, hn_link]
 		rows.append(row)

 with open(FILE_NAME, 'w', encoding='utf-8-sig') as csvfile:
 	csvwriter = csv.writer(csvfile)
 	csvwriter.writerow(fields)
 	csvwriter.writerows(rows)
	from github import Github
	import csv
	import re
	import requests
	from datetime import datetime, timedelta

	g = Github("access_token")

	REPO_NAME = 'xueyuanl/daily-hackernews'
	FILE_NAME = "hn25.csv"
	BASE_URL = 'https://hacker-news.firebaseio.com/v0/'
	ITEM_URL = BASE_URL + 'item/'
	USER_URL = BASE_URL + 'user/'


	NOOB_ACCOUNT_TRESHOLD = 15

	repo = g.get_repo(REPO_NAME)
	issues = repo.get_issues(state='open')

	fields = ['Date', 'Title', 'Link', 'userID', 'userID Age', 'New Account?', 'Number of Points', 'Number of Comments', 'HN Link']
	rows = []

	for issue in issues:
	date = all_square_brackets = all_links = title = hn_link = link = user_id = ''
	user_id_age = is_new = number_of_points = number_of_comments = ''
	try:
	date = issue.title.split(' ')[3]
	except IndexError:
	print ('bad title format: ', issue.title)
	# TODO
	post = issue.body.split('\n')
	del post[:3] # first 3 elements are empty
	for line in post:
	# regex for title
	all_square_brackets = re.findall("\[(.*?)\]", line)
	# regex for Link, HN Link
	all_links = re.findall("(?P<url>https?://[^\s]+)", line.lower())
	title = all_square_brackets[0].strip('**')
	link = all_links[0].strip(')')
	hn_link = all_links[1].strip(')')
	item_id = hn_link.split('=')[1]

	item = requests.get(ITEM_URL + item_id + '.json')
	user_id = item.json()['by']
	number_of_points = item.json()['score']

	user = requests.get(USER_URL + user_id + '.json')
	# number_of_comments not outright available, needs more computing.
	try:
	user_id_age = user.json()['created']
	account_created_date = datetime.fromtimestamp(user_id_age)
	submission_date = is_new = ''
	try:
	submission_date = datetime.strptime(date, '%d-%m-%Y')
	calc_date = - timedelta(days=NOOB_ACCOUNT_TRESHOLD)
	is_new = account_created_date > submission_date # submission date should always be 15 days greater than account creation
	except ValueError:
	print ('date parsing issue: ', line)
	# TODO
	except TypeError:
	print ('Automated submission.')
	row = [date, title, link, user_id, user_id_age, is_new, number_of_points, number_of_comments, hn_link]
	rows.append(row)

	with open(FILE_NAME, 'w', encoding='utf-8-sig') as csvfile:
	csvwriter = csv.writer(csvfile)
	csvwriter.writerow(fields)
	csvwriter.writerows(rows)