Created
April 18, 2024 15:00
-
-
Save matiaskorhonen/fd2b6e84c0986f5c222694838175cffc to your computer and use it in GitHub Desktop.
A rough script to get the Two Oceans 2024 half marathon results as a CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# frozen_string_literal: true | |
require 'bundler/inline' | |
gemfile do | |
source "https://rubygems.org" | |
gem "csv" | |
gem "http" | |
gem "nokolexbor" | |
end | |
base_path = "https://results.finishtime.co.za" | |
pages = | |
begin | |
# create HTTP client with persistent connection: | |
http = HTTP.persistent(base_path) | |
CSV.open("results.csv", "wb") do |csv| | |
page_number = 0 | |
# Headers | |
csv << [ | |
"Pos", | |
"Race No", | |
"Country", | |
"Name", | |
"Time", | |
"Medal", | |
"Club", | |
"Gender", | |
"Gen Pos", | |
"Category", | |
"Cat Pos", | |
"Start", | |
"5km", | |
"10.55km", | |
"15km" | |
] | |
loop do | |
page_number += 1 | |
puts "Fetching page #{page_number}..." | |
# issue multiple requests using same connection: | |
page = http.get("/results.aspx?CId=35&RId=30353&EId=2&dt=0&PageNo=#{page_number}") | |
doc = Nokolexbor::HTML(page.to_s) | |
unless doc.css("table tr").any? | |
break | |
end | |
doc.css("#ctl00_Content_Main_divGrid table tr").each_with_index do |row, index| | |
# Skip header row | |
next if index < 1 | |
row_contents = row.css('td').map do |cell| | |
if cell.css('img').any? | |
src = cell.css('img').first['src'] | |
matches = src.match(/\Aimages\/flags\/\d+\/([a-zA-Z]*)\.png\z/i) | |
if matches | |
matches[1] | |
else | |
"" | |
end | |
else | |
cell.text | |
end | |
end | |
csv << row_contents.values_at(2, 4, 5, 6, 8..18) | |
end | |
end | |
end | |
ensure | |
# close underlying connection when you don't need it anymore | |
http.close if http | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment