Last active
June 2, 2024 13:38
-
-
Save albinoloverats/b4d5c3da779e092a25f4 to your computer and use it in GitHub Desktop.
Get running results from Parkrun
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# vim: set fileencoding=utf-8 : | |
import sys | |
import requests | |
import re | |
from bs4 import BeautifulSoup | |
from datetime import date | |
from datetime import timedelta | |
import dateutil.parser | |
import json | |
USAGE = 'Usage:\n parkrun.py <runner id>\n\nExample:\n parkrun.py 690181' | |
URL = 'http://www.parkrun.org.uk/results/athleteeventresultshistory/?athleteNumber=%(runner)s&eventNumber=0' | |
def parse_datetime(date: str) -> date: | |
'''Convert date from string mm/dd/yyyy to date object.''' | |
return dateutil.parser.parse(date, dayfirst = True).date() | |
def parse_duration(time: str) -> timedelta: | |
'''Convert time from string [hh:]mm:ss to timedelta object.''' | |
tm = re.compile('[:]+').split(time) | |
if len(tm) == 3: | |
return timedelta(hours = int(tm[0]), minutes = int(tm[1]), seconds = int(tm[2])) | |
else: | |
return timedelta(minutes = int(tm[0]), seconds = int(tm[1])) | |
def parse_agegrade(grade: str) -> float: | |
'''Convert age grade string (with trailing %) to a float.''' | |
return float(re.compile(r'[^\d.]+').sub('', grade)) | |
def get_results(runner: int) -> list: | |
'''Get list of runners' results from the Parkrun website (extracted | |
from the HTML). NB The Parkrun website does not allow scraping! See | |
http://www.parkrun.com/scraping/ (I actually substituted the | |
downloaded HTML with a local variable.)''' | |
html = requests.get(URL % { 'runner': runner }, headers = { 'user-agent': 'Chrome/43.0.2357' }).content | |
soup = BeautifulSoup(re.sub(r'(\s)+', ' ', str(html))) | |
# all results is 3rd table (why do they all have the same ID?) | |
table = soup.findAll('table', attrs = { 'id': 'results' })[2] | |
headings = [th.get_text() for th in table.find('tr').find_all('th')] | |
results = [] | |
for row in table.find_all('tr')[1:]: | |
r = dict(zip(headings, (td.get_text() for td in row.find_all('td')))) | |
results.append(Result(parse_datetime(r['Run Date']), | |
parse_duration(r['Time']), | |
'PB' in r['PB?'], | |
r['Event'], | |
int(r['Run Number']), | |
int(r['Pos']), | |
parse_agegrade(r['Age Grade']))) | |
return results | |
class Result: | |
'''Event result (date, time, pb?, event location and number, finish | |
position, age grading).''' | |
def __init__(self, date: date, time: timedelta, pb: bool, location: str, event_number: int, position: int, grade: float): | |
'''Result constructor; requires date, run time, pb?, event | |
location and number, finishing position, and age grading.''' | |
self.date = date | |
self.time = time | |
self.pb = pb | |
self.location = location | |
self.event_number = event_number | |
self.position = position | |
self.grade = grade | |
def __str__(self): | |
'''Standard str() method; not too much going on, although dates | |
are converted to a string in the format yyyy-mm-dd and the run | |
time is converted from seconds to hh:mm:ss.''' | |
p = self.position % 10 | |
x = 'th' | |
if p == 1: | |
x = 'st' | |
elif p == 2: | |
x = 'nd' | |
elif p == 3: | |
x = 'rd' | |
mp = { 'l': self.location, | |
'n': self.event_number, | |
'd': self.date.strftime('%Y-%m-%d'), | |
't': self.time, | |
'p': self.position, | |
'x': x, | |
'g': self.grade } | |
return '%(l)-16s (%(n)4d) on %(d)s with %(t)s in %(p)3d%(x)s (%(g).2f%%)' % mp + (' PB!' if self.pb else '') | |
def _json(self): | |
'''Create a dictionary that can be easily converted to a JSON | |
object. NB The time in this object stays in seconds.''' | |
return { 'location' : self.location, | |
'event_number': self.event_number, | |
'date' : self.date.strftime('%Y-%m-%d'), | |
'time' : self.time, | |
'position' : self.position, | |
'age_grade' : self.grade } | |
@staticmethod | |
def json(result_list: list) -> str: | |
'''Export the list of results as JSON string.''' | |
tmp = [] | |
for result in result_list: | |
if not isinstance(result, Result): | |
raise TypeError('Object "' + str(result) + '" is not a Result!') | |
tmp.append(result._json()) | |
return json.dumps(tmp) | |
def main(runner: int): | |
results = get_results(runner) | |
display_results_all(results) | |
print() | |
display_results_summary(results) | |
# display results as JSON | |
#print(Result.json(results)) | |
def display_results_all(results: list): | |
'''Display all results.''' | |
for result in sorted(results, key = lambda result: result.date): | |
print(result) | |
def display_results_summary(results: list): | |
'''Display a summary of results.''' | |
# sort by time | |
results = sorted(results, key = lambda result: result.time) | |
print('Fastest = ' + str(results[0])) | |
print('Slowest = ' + str(results[-1])) | |
# sort by finish position | |
results = sorted(results, key = lambda result: result.position) | |
print('Highest = ' + str(results[0])) | |
print('Lowest = ' + str(results[-1])) | |
# sort by date | |
results = sorted(results, key = lambda result: result.date) | |
print('First = ' + str(results[0])) | |
print('Last = ' + str(results[-1])) | |
# display average run time | |
n = len(results) | |
t = sum(result.time.total_seconds() for result in results) / n | |
print('Average over {} runs is {}'.format(n, timedelta(seconds = round(t)))) | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print(USAGE) | |
sys.exit(1) | |
main(int(sys.argv[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi there, can you share any advice as I get the following error when running this code:
(parkrun1) ➜ parkrun1 python3 parkrun.py 191748
/Users/reza/Code/parkrun1/parkrun1/lib/python3.9/site-packages/urllib3/init.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: urllib3/urllib3#3020
warnings.warn(
/Users/reza/Code/parkrun1/parkrun.py:38: GuessedAtParserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("html.parser"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
The code that caused this warning is on line 38 of the file /Users/reza/Code/parkrun1/parkrun.py. To get rid of this warning, pass the additional argument 'features="html.parser"' to the BeautifulSoup constructor.
soup = BeautifulSoup(re.sub(r'(\s)+', ' ', str(html)))
Traceback (most recent call last):
File "/Users/reza/Code/parkrun1/parkrun.py", line 144, in
main(int(sys.argv[1]))
File "/Users/reza/Code/parkrun1/parkrun.py", line 107, in main
results = get_results(runner)
File "/Users/reza/Code/parkrun1/parkrun.py", line 40, in get_results
table = soup.findAll('table', attrs = { 'id': 'results' })[2]
IndexError: list index out of