Skip to content

Instantly share code, notes, and snippets.

@albinoloverats
Last active June 2, 2024 13:38
Show Gist options
  • Save albinoloverats/b4d5c3da779e092a25f4 to your computer and use it in GitHub Desktop.
Save albinoloverats/b4d5c3da779e092a25f4 to your computer and use it in GitHub Desktop.
Get running results from Parkrun
#!/usr/bin/python
# vim: set fileencoding=utf-8 :
import sys
import requests
import re
from bs4 import BeautifulSoup
from datetime import date
from datetime import timedelta
import dateutil.parser
import json
USAGE = 'Usage:\n parkrun.py <runner id>\n\nExample:\n parkrun.py 690181'
URL = 'http://www.parkrun.org.uk/results/athleteeventresultshistory/?athleteNumber=%(runner)s&eventNumber=0'
def parse_datetime(date: str) -> date:
'''Convert date from string mm/dd/yyyy to date object.'''
return dateutil.parser.parse(date, dayfirst = True).date()
def parse_duration(time: str) -> timedelta:
'''Convert time from string [hh:]mm:ss to timedelta object.'''
tm = re.compile('[:]+').split(time)
if len(tm) == 3:
return timedelta(hours = int(tm[0]), minutes = int(tm[1]), seconds = int(tm[2]))
else:
return timedelta(minutes = int(tm[0]), seconds = int(tm[1]))
def parse_agegrade(grade: str) -> float:
'''Convert age grade string (with trailing %) to a float.'''
return float(re.compile(r'[^\d.]+').sub('', grade))
def get_results(runner: int) -> list:
'''Get list of runners' results from the Parkrun website (extracted
from the HTML). NB The Parkrun website does not allow scraping! See
http://www.parkrun.com/scraping/ (I actually substituted the
downloaded HTML with a local variable.)'''
html = requests.get(URL % { 'runner': runner }, headers = { 'user-agent': 'Chrome/43.0.2357' }).content
soup = BeautifulSoup(re.sub(r'(\s)+', ' ', str(html)))
# all results is 3rd table (why do they all have the same ID?)
table = soup.findAll('table', attrs = { 'id': 'results' })[2]
headings = [th.get_text() for th in table.find('tr').find_all('th')]
results = []
for row in table.find_all('tr')[1:]:
r = dict(zip(headings, (td.get_text() for td in row.find_all('td'))))
results.append(Result(parse_datetime(r['Run Date']),
parse_duration(r['Time']),
'PB' in r['PB?'],
r['Event'],
int(r['Run Number']),
int(r['Pos']),
parse_agegrade(r['Age Grade'])))
return results
class Result:
'''Event result (date, time, pb?, event location and number, finish
position, age grading).'''
def __init__(self, date: date, time: timedelta, pb: bool, location: str, event_number: int, position: int, grade: float):
'''Result constructor; requires date, run time, pb?, event
location and number, finishing position, and age grading.'''
self.date = date
self.time = time
self.pb = pb
self.location = location
self.event_number = event_number
self.position = position
self.grade = grade
def __str__(self):
'''Standard str() method; not too much going on, although dates
are converted to a string in the format yyyy-mm-dd and the run
time is converted from seconds to hh:mm:ss.'''
p = self.position % 10
x = 'th'
if p == 1:
x = 'st'
elif p == 2:
x = 'nd'
elif p == 3:
x = 'rd'
mp = { 'l': self.location,
'n': self.event_number,
'd': self.date.strftime('%Y-%m-%d'),
't': self.time,
'p': self.position,
'x': x,
'g': self.grade }
return '%(l)-16s (%(n)4d) on %(d)s with %(t)s in %(p)3d%(x)s (%(g).2f%%)' % mp + (' PB!' if self.pb else '')
def _json(self):
'''Create a dictionary that can be easily converted to a JSON
object. NB The time in this object stays in seconds.'''
return { 'location' : self.location,
'event_number': self.event_number,
'date' : self.date.strftime('%Y-%m-%d'),
'time' : self.time,
'position' : self.position,
'age_grade' : self.grade }
@staticmethod
def json(result_list: list) -> str:
'''Export the list of results as JSON string.'''
tmp = []
for result in result_list:
if not isinstance(result, Result):
raise TypeError('Object "' + str(result) + '" is not a Result!')
tmp.append(result._json())
return json.dumps(tmp)
def main(runner: int):
results = get_results(runner)
display_results_all(results)
print()
display_results_summary(results)
# display results as JSON
#print(Result.json(results))
def display_results_all(results: list):
'''Display all results.'''
for result in sorted(results, key = lambda result: result.date):
print(result)
def display_results_summary(results: list):
'''Display a summary of results.'''
# sort by time
results = sorted(results, key = lambda result: result.time)
print('Fastest = ' + str(results[0]))
print('Slowest = ' + str(results[-1]))
# sort by finish position
results = sorted(results, key = lambda result: result.position)
print('Highest = ' + str(results[0]))
print('Lowest = ' + str(results[-1]))
# sort by date
results = sorted(results, key = lambda result: result.date)
print('First = ' + str(results[0]))
print('Last = ' + str(results[-1]))
# display average run time
n = len(results)
t = sum(result.time.total_seconds() for result in results) / n
print('Average over {} runs is {}'.format(n, timedelta(seconds = round(t))))
if __name__ == '__main__':
if len(sys.argv) != 2:
print(USAGE)
sys.exit(1)
main(int(sys.argv[1]))
@rokesby
Copy link

rokesby commented May 28, 2024

Hi there, can you share any advice as I get the following error when running this code:

(parkrun1) ➜ parkrun1 python3 parkrun.py 191748

/Users/reza/Code/parkrun1/parkrun1/lib/python3.9/site-packages/urllib3/init.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: urllib3/urllib3#3020
warnings.warn(
/Users/reza/Code/parkrun1/parkrun.py:38: GuessedAtParserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("html.parser"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

The code that caused this warning is on line 38 of the file /Users/reza/Code/parkrun1/parkrun.py. To get rid of this warning, pass the additional argument 'features="html.parser"' to the BeautifulSoup constructor.

soup = BeautifulSoup(re.sub(r'(\s)+', ' ', str(html)))
Traceback (most recent call last):
File "/Users/reza/Code/parkrun1/parkrun.py", line 144, in
main(int(sys.argv[1]))
File "/Users/reza/Code/parkrun1/parkrun.py", line 107, in main
results = get_results(runner)
File "/Users/reza/Code/parkrun1/parkrun.py", line 40, in get_results
table = soup.findAll('table', attrs = { 'id': 'results' })[2]
IndexError: list index out of

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment