Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yuxi-liu-wired/2c875817befed5e29665871b076aeb92 to your computer and use it in GitHub Desktop.
Save yuxi-liu-wired/2c875817befed5e29665871b076aeb92 to your computer and use it in GitHub Desktop.
# Plots for Competitive Programming with Large Reasoning Models (2025)
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import numpy as np
# URL with all IOI 2024 scores
url = "https://cphof.org/standings/ioi/2024"
response = requests.get(url)
response.raise_for_status() # ensure request succeeded
# Parse the page with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Use a CSS selector to locate the table (based on provided HTML snippet)
table = soup.select_one("table.table-bordered.table-striped.table-hover.w-auto")
if table is None:
raise ValueError("Couldn't find the standings table!")
# Get all rows; the first row is the header
rows = table.find_all("tr")[1:]
scores = []
for row in rows:
# Each row has 5 columns: Rank, Country, Name, Score, Prize.
# The score is in the fourth <td>.
cols = row.find_all("td")
if len(cols) >= 4:
score_text = cols[3].get_text(strip=True)
try:
score = float(score_text)
scores.append(score)
except ValueError:
# Skip rows where the score isn't a valid float.
continue
n = len(scores)
print("Number of contestants scraped:", n)
sorted_scores = sorted(scores, reverse=True)
# Compute percentiles for each contestant.
# Percentile for the i-th score (i=0 for highest) is computed as:
# percentile = 100 * (n - i - 0.5) / n
percentiles = [100 * (n - i - 0.5) / n for i in range(n)]
# Plot the contestants’ scores.
plt.figure(figsize=(18, 10))
plt.plot(percentiles, sorted_scores, alpha=0.6)
plt.scatter(percentiles, sorted_scores, alpha=0.6, marker='x', s=4, label='Contestants')
# Medal thresholds from IOI 2024 info.
bronze = 216.97
silver = 289.31
gold = 359.71
plt.axhline(y=bronze, color='brown', linestyle='--', label='Bronze cutoff (216.97)')
plt.axhline(y=silver, color='silver', linestyle='--', label='Silver cutoff (289.31)')
plt.axhline(y=gold, color='gold', linestyle='--', label='Gold cutoff (359.71)')
# Special points to overlay.
special_points = {
156: "o1, random (156)",
213: "o1, handcrafted (213)",
362.14: "o1, 10K submissions (362.14)",
395.64: "o3 (395.64)"
}
def compute_percentile_for_special(special_score, sorted_scores):
"""
Compute the percentile for a special score by counting how many contestants
have a strictly higher score.
"""
count_higher = sum(1 for s in sorted_scores if s > special_score)
return 100 * (n - count_higher - 0.5) / n
# Plot each special point with a star marker and add an annotation.
for sp_score, annotation in special_points.items():
sp_percentile = compute_percentile_for_special(sp_score, sorted_scores)
plt.scatter(sp_percentile, sp_score, marker='*', s=200, color='red', zorder=5)
plt.text(sp_percentile - 5, sp_score + 20, annotation, fontsize=10,
verticalalignment='center', color='red')
plt.xlabel('Percentile')
plt.ylabel('Score')
# plt.title('IOI 2024 Contestants: Scores vs. Percentiles')
plt.legend()
plt.grid(True)
plt.show()
# ------------------------------------------------------------
import requests
import ast
import matplotlib.pyplot as plt
# 1) Fetch data from Pastebin
url = 'https://pastebin.com/raw/D6H8qVbF'
response = requests.get(url)
raw_data = response.text.strip()
# 2) Convert string dictionary to actual Python dict
data_dict = ast.literal_eval(raw_data) # {rating: count, ...}
# 3) Sort by rating
items = sorted(data_dict.items(), key=lambda x: x[0])
ratings = [item[0] for item in items]
counts = [item[1] for item in items]
# 4) Assign colors by typical CF rating intervals (adjust as desired)
# For example:
# <1200: gray, 1200–1399: green, 1400–1599: cyan, 1600–1899: blue,
# 1900–2199: purple, 2200–2599: orange, 2600–2899: red, >=2900: maroon
def get_color(rating):
if rating < 1200:
return 'gray'
elif rating < 1400:
return 'green'
elif rating < 1600:
return 'cyan'
elif rating < 1900:
return 'blue'
elif rating < 2100:
return 'purple'
elif rating < 2400:
return 'orange'
elif rating < 3000:
return 'red'
else:
return 'maroon'
colors = [get_color(r) for r in ratings]
# Gray: Newbie (rating below 1200)
# Green: Pupil (1200–1399)
# Cyan: Specialist (1400–1599)
# Blue: Expert (1600–1899)
# Violet/Purple: Candidate Master (1900–2099)
# Orange: Master (2100–2399)
# Red: Grandmaster+ (2400-)
plt.figure(figsize=(10,6))
plt.bar(ratings, counts, color=colors, width=4.5)
plt.title("Codeforces Rating Distribution (active user, number of participations >= 6)\nMarch 5, 2024")
plt.xlabel("Rating")
plt.ylabel("Number of contestants per bin")
# Optional: define a y-limit for better control of text placement
ymax = max(counts) * 1.05
plt.ylim(0, ymax)
# 6) Add vertical dashed lines for each model and label them
model_ratings = [
("gpt-4o", 808),
("o1-preview", 1258),
("o1", 1673),
("o1-ioi", 1807),
("o1-ioi (simple filter)", 2092),
("o1-ioi (full sampling)", 2214),
("o3", 2724),
]
for model, rating in model_ratings:
# Draw dashed line
plt.axvline(x=rating, color='k', linestyle='--', linewidth=1)
# Add model name slightly to the right of the line; adjust text position as needed
plt.text(
rating + 20, # horizontal offset
ymax * 0.7, # vertical position (70% of max height)
model,
rotation=90, # rotate text vertically
va='bottom', # align text at the bottom
ha='left', # horizontal alignment
color='black'
)
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment