albinoloverats · June 2, 2024 13:38 · rokesby · May 28, 2024
diff --git a/parkrun.py b/parkrun.py
 #!/usr/bin/python
 # vim: set fileencoding=utf-8 :

 import sys
 import requests
 import re
 from bs4 import BeautifulSoup
 from datetime import date
 from datetime import timedelta
 import dateutil.parser
 import json

 USAGE = 'Usage:\n  parkrun.py <runner id>\n\nExample:\n  parkrun.py 690181'
 URL   = 'http://www.parkrun.org.uk/results/athleteeventresultshistory/?athleteNumber=%(runner)s&eventNumber=0'

 def parse_datetime(date: str) -> date:
 	'''Convert date from string mm/dd/yyyy to date object.'''
 	return dateutil.parser.parse(date, dayfirst = True).date()

 def parse_duration(time: str) -> timedelta:
 	'''Convert time from string [hh:]mm:ss to timedelta object.'''
 	tm = re.compile('[:]+').split(time)
 	if len(tm) == 3:
 		return timedelta(hours = int(tm[0]), minutes = int(tm[1]), seconds = int(tm[2]))
 	else:
 		return timedelta(minutes = int(tm[0]), seconds = int(tm[1]))

 def parse_agegrade(grade: str) -> float:
 	'''Convert age grade string (with trailing %) to a float.'''
 	return float(re.compile(r'[^\d.]+').sub('', grade))

 def get_results(runner: int) -> list:
 	'''Get list of runners' results from the Parkrun website (extracted
 	from the HTML). NB The Parkrun website does not allow scraping! See
 	http://www.parkrun.com/scraping/ (I actually substituted the
 	downloaded HTML with a local variable.)'''
 	html = requests.get(URL % { 'runner': runner }, headers = { 'user-agent': 'Chrome/43.0.2357' }).content
 	soup = BeautifulSoup(re.sub(r'(\s)+', ' ', str(html)))
 	# all results is 3rd table (why do they all have the same ID?)
 	table = soup.findAll('table', attrs = { 'id': 'results' })[2]
 	headings = [th.get_text() for th in table.find('tr').find_all('th')]
 	results = []
 	for row in table.find_all('tr')[1:]:
 		r = dict(zip(headings, (td.get_text() for td in row.find_all('td'))))
 		results.append(Result(parse_datetime(r['Run Date']),
 				      parse_duration(r['Time']),
 				      'PB' in r['PB?'],
 				      r['Event'],
 				      int(r['Run Number']),
 				      int(r['Pos']),
 				      parse_agegrade(r['Age Grade'])))
 	return results

 class Result:
 	'''Event result (date, time, pb?, event location and number, finish
 	position, age grading).'''
 	def __init__(self, date: date, time: timedelta, pb: bool, location: str, event_number: int, position: int, grade: float):
 		'''Result constructor; requires date, run time, pb?, event
 		location and number, finishing position, and age grading.'''
 		self.date         = date
 		self.time         = time
 		self.pb           = pb
 		self.location     = location
 		self.event_number = event_number
 		self.position     = position
 		self.grade        = grade
 	def __str__(self):
 		'''Standard str() method; not too much going on, although dates
 		are converted to a string in the format yyyy-mm-dd and the run
 		time is converted from seconds to hh:mm:ss.'''
 		p = self.position % 10
 		x = 'th'
 		if p == 1:
 			x = 'st'
 		elif p == 2:
 			x = 'nd'
 		elif p == 3:
 			x = 'rd'
 		mp = {  'l': self.location,
 			'n': self.event_number,
 			'd': self.date.strftime('%Y-%m-%d'),
 			't': self.time,
 			'p': self.position,
 			'x': x,
 			'g': self.grade }
 		return '%(l)-16s (%(n)4d) on %(d)s with %(t)s in %(p)3d%(x)s (%(g).2f%%)' % mp + (' PB!' if self.pb else '')
 	def _json(self):
 		'''Create a dictionary that can be easily converted to a JSON
 		object. NB The time in this object stays in seconds.'''
 		return { 'location'    : self.location,
 			 'event_number': self.event_number,
 			 'date'        : self.date.strftime('%Y-%m-%d'),
 			 'time'        : self.time,
 			 'position'    : self.position,
 			 'age_grade'   : self.grade }
 	@staticmethod
 	def json(result_list: list) -> str:
 		'''Export the list of results as JSON string.'''
 		tmp = []
 		for result in result_list:
 			if not isinstance(result, Result):
 				raise TypeError('Object "' + str(result) + '" is not a Result!')
 			tmp.append(result._json())
 		return json.dumps(tmp)

 def main(runner: int):
 	results = get_results(runner)

 	display_results_all(results)
 	print()
 	display_results_summary(results)

 	# display results as JSON
 	#print(Result.json(results))

 def display_results_all(results: list):
 	'''Display all results.'''
 	for result in sorted(results, key = lambda result: result.date):
 		print(result)

 def display_results_summary(results: list):
 	'''Display a summary of results.'''
 	# sort by time
 	results = sorted(results, key = lambda result: result.time)
 	print('Fastest = ' + str(results[0]))
 	print('Slowest = ' + str(results[-1]))
 	# sort by finish position
 	results = sorted(results, key = lambda result: result.position)
 	print('Highest = ' + str(results[0]))
 	print('Lowest  = ' + str(results[-1]))
 	# sort by date
 	results = sorted(results, key = lambda result: result.date)
 	print('First   = ' + str(results[0]))
 	print('Last    = ' + str(results[-1]))
 	# display average run time
 	n = len(results)
 	t = sum(result.time.total_seconds() for result in results) / n
 	print('Average over {} runs is {}'.format(n, timedelta(seconds = round(t))))

 if __name__ == '__main__':
 	if len(sys.argv) != 2:
 		print(USAGE)
 		sys.exit(1)
 	main(int(sys.argv[1]))
	#!/usr/bin/python
	# vim: set fileencoding=utf-8 :

	import sys
	import requests
	import re
	from bs4 import BeautifulSoup
	from datetime import date
	from datetime import timedelta
	import dateutil.parser
	import json

	USAGE = 'Usage:\n parkrun.py <runner id>\n\nExample:\n parkrun.py 690181'
	URL = 'http://www.parkrun.org.uk/results/athleteeventresultshistory/?athleteNumber=%(runner)s&eventNumber=0'

	def parse_datetime(date: str) -> date:
	'''Convert date from string mm/dd/yyyy to date object.'''
	return dateutil.parser.parse(date, dayfirst = True).date()

	def parse_duration(time: str) -> timedelta:
	'''Convert time from string [hh:]mm:ss to timedelta object.'''
	tm = re.compile('[:]+').split(time)
	if len(tm) == 3:
	return timedelta(hours = int(tm[0]), minutes = int(tm[1]), seconds = int(tm[2]))
	else:
	return timedelta(minutes = int(tm[0]), seconds = int(tm[1]))

	def parse_agegrade(grade: str) -> float:
	'''Convert age grade string (with trailing %) to a float.'''
	return float(re.compile(r'[^\d.]+').sub('', grade))

	def get_results(runner: int) -> list:
	'''Get list of runners' results from the Parkrun website (extracted
	from the HTML). NB The Parkrun website does not allow scraping! See
	http://www.parkrun.com/scraping/ (I actually substituted the
	downloaded HTML with a local variable.)'''
	html = requests.get(URL % { 'runner': runner }, headers = { 'user-agent': 'Chrome/43.0.2357' }).content
	soup = BeautifulSoup(re.sub(r'(\s)+', ' ', str(html)))
	# all results is 3rd table (why do they all have the same ID?)
	table = soup.findAll('table', attrs = { 'id': 'results' })[2]
	headings = [th.get_text() for th in table.find('tr').find_all('th')]
	results = []
	for row in table.find_all('tr')[1:]:
	r = dict(zip(headings, (td.get_text() for td in row.find_all('td'))))
	results.append(Result(parse_datetime(r['Run Date']),
	parse_duration(r['Time']),
	'PB' in r['PB?'],
	r['Event'],
	int(r['Run Number']),
	int(r['Pos']),
	parse_agegrade(r['Age Grade'])))
	return results

	class Result:
	'''Event result (date, time, pb?, event location and number, finish
	position, age grading).'''
	def __init__(self, date: date, time: timedelta, pb: bool, location: str, event_number: int, position: int, grade: float):
	'''Result constructor; requires date, run time, pb?, event
	location and number, finishing position, and age grading.'''
	self.date = date
	self.time = time
	self.pb = pb
	self.location = location
	self.event_number = event_number
	self.position = position
	self.grade = grade
	def __str__(self):
	'''Standard str() method; not too much going on, although dates
	are converted to a string in the format yyyy-mm-dd and the run
	time is converted from seconds to hh:mm:ss.'''
	p = self.position % 10
	x = 'th'
	if p == 1:
	x = 'st'
	elif p == 2:
	x = 'nd'
	elif p == 3:
	x = 'rd'
	mp = { 'l': self.location,
	'n': self.event_number,
	'd': self.date.strftime('%Y-%m-%d'),
	't': self.time,
	'p': self.position,
	'x': x,
	'g': self.grade }
	return '%(l)-16s (%(n)4d) on %(d)s with %(t)s in %(p)3d%(x)s (%(g).2f%%)' % mp + (' PB!' if self.pb else '')
	def _json(self):
	'''Create a dictionary that can be easily converted to a JSON
	object. NB The time in this object stays in seconds.'''
	return { 'location' : self.location,
	'event_number': self.event_number,
	'date' : self.date.strftime('%Y-%m-%d'),
	'time' : self.time,
	'position' : self.position,
	'age_grade' : self.grade }
	@staticmethod
	def json(result_list: list) -> str:
	'''Export the list of results as JSON string.'''
	tmp = []
	for result in result_list:
	if not isinstance(result, Result):
	raise TypeError('Object "' + str(result) + '" is not a Result!')
	tmp.append(result._json())
	return json.dumps(tmp)

	def main(runner: int):
	results = get_results(runner)

	display_results_all(results)
	print()
	display_results_summary(results)

	# display results as JSON
	#print(Result.json(results))

	def display_results_all(results: list):
	'''Display all results.'''
	for result in sorted(results, key = lambda result: result.date):
	print(result)

	def display_results_summary(results: list):
	'''Display a summary of results.'''
	# sort by time
	results = sorted(results, key = lambda result: result.time)
	print('Fastest = ' + str(results[0]))
	print('Slowest = ' + str(results[-1]))
	# sort by finish position
	results = sorted(results, key = lambda result: result.position)
	print('Highest = ' + str(results[0]))
	print('Lowest = ' + str(results[-1]))
	# sort by date
	results = sorted(results, key = lambda result: result.date)
	print('First = ' + str(results[0]))
	print('Last = ' + str(results[-1]))
	# display average run time
	n = len(results)
	t = sum(result.time.total_seconds() for result in results) / n
	print('Average over {} runs is {}'.format(n, timedelta(seconds = round(t))))

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print(USAGE)
	sys.exit(1)
	main(int(sys.argv[1]))