Last active
February 13, 2025 04:15
-
-
Save yuxi-liu-wired/2c875817befed5e29665871b076aeb92 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plots for Competitive Programming with Large Reasoning Models (2025) | |
import requests | |
from bs4 import BeautifulSoup | |
import matplotlib.pyplot as plt | |
import numpy as np | |
# URL with all IOI 2024 scores | |
url = "https://cphof.org/standings/ioi/2024" | |
response = requests.get(url) | |
response.raise_for_status() # ensure request succeeded | |
# Parse the page with BeautifulSoup | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Use a CSS selector to locate the table (based on provided HTML snippet) | |
table = soup.select_one("table.table-bordered.table-striped.table-hover.w-auto") | |
if table is None: | |
raise ValueError("Couldn't find the standings table!") | |
# Get all rows; the first row is the header | |
rows = table.find_all("tr")[1:] | |
scores = [] | |
for row in rows: | |
# Each row has 5 columns: Rank, Country, Name, Score, Prize. | |
# The score is in the fourth <td>. | |
cols = row.find_all("td") | |
if len(cols) >= 4: | |
score_text = cols[3].get_text(strip=True) | |
try: | |
score = float(score_text) | |
scores.append(score) | |
except ValueError: | |
# Skip rows where the score isn't a valid float. | |
continue | |
n = len(scores) | |
print("Number of contestants scraped:", n) | |
sorted_scores = sorted(scores, reverse=True) | |
# Compute percentiles for each contestant. | |
# Percentile for the i-th score (i=0 for highest) is computed as: | |
# percentile = 100 * (n - i - 0.5) / n | |
percentiles = [100 * (n - i - 0.5) / n for i in range(n)] | |
# Plot the contestants’ scores. | |
plt.figure(figsize=(18, 10)) | |
plt.plot(percentiles, sorted_scores, alpha=0.6) | |
plt.scatter(percentiles, sorted_scores, alpha=0.6, marker='x', s=4, label='Contestants') | |
# Medal thresholds from IOI 2024 info. | |
bronze = 216.97 | |
silver = 289.31 | |
gold = 359.71 | |
plt.axhline(y=bronze, color='brown', linestyle='--', label='Bronze cutoff (216.97)') | |
plt.axhline(y=silver, color='silver', linestyle='--', label='Silver cutoff (289.31)') | |
plt.axhline(y=gold, color='gold', linestyle='--', label='Gold cutoff (359.71)') | |
# Special points to overlay. | |
special_points = { | |
156: "o1, random (156)", | |
213: "o1, handcrafted (213)", | |
362.14: "o1, 10K submissions (362.14)", | |
395.64: "o3 (395.64)" | |
} | |
def compute_percentile_for_special(special_score, sorted_scores): | |
""" | |
Compute the percentile for a special score by counting how many contestants | |
have a strictly higher score. | |
""" | |
count_higher = sum(1 for s in sorted_scores if s > special_score) | |
return 100 * (n - count_higher - 0.5) / n | |
# Plot each special point with a star marker and add an annotation. | |
for sp_score, annotation in special_points.items(): | |
sp_percentile = compute_percentile_for_special(sp_score, sorted_scores) | |
plt.scatter(sp_percentile, sp_score, marker='*', s=200, color='red', zorder=5) | |
plt.text(sp_percentile - 5, sp_score + 20, annotation, fontsize=10, | |
verticalalignment='center', color='red') | |
plt.xlabel('Percentile') | |
plt.ylabel('Score') | |
# plt.title('IOI 2024 Contestants: Scores vs. Percentiles') | |
plt.legend() | |
plt.grid(True) | |
plt.show() | |
# ------------------------------------------------------------ | |
import requests | |
import ast | |
import matplotlib.pyplot as plt | |
# 1) Fetch data from Pastebin | |
url = 'https://pastebin.com/raw/D6H8qVbF' | |
response = requests.get(url) | |
raw_data = response.text.strip() | |
# 2) Convert string dictionary to actual Python dict | |
data_dict = ast.literal_eval(raw_data) # {rating: count, ...} | |
# 3) Sort by rating | |
items = sorted(data_dict.items(), key=lambda x: x[0]) | |
ratings = [item[0] for item in items] | |
counts = [item[1] for item in items] | |
# 4) Assign colors by typical CF rating intervals (adjust as desired) | |
# For example: | |
# <1200: gray, 1200–1399: green, 1400–1599: cyan, 1600–1899: blue, | |
# 1900–2199: purple, 2200–2599: orange, 2600–2899: red, >=2900: maroon | |
def get_color(rating): | |
if rating < 1200: | |
return 'gray' | |
elif rating < 1400: | |
return 'green' | |
elif rating < 1600: | |
return 'cyan' | |
elif rating < 1900: | |
return 'blue' | |
elif rating < 2100: | |
return 'purple' | |
elif rating < 2400: | |
return 'orange' | |
elif rating < 3000: | |
return 'red' | |
else: | |
return 'maroon' | |
colors = [get_color(r) for r in ratings] | |
# Gray: Newbie (rating below 1200) | |
# Green: Pupil (1200–1399) | |
# Cyan: Specialist (1400–1599) | |
# Blue: Expert (1600–1899) | |
# Violet/Purple: Candidate Master (1900–2099) | |
# Orange: Master (2100–2399) | |
# Red: Grandmaster+ (2400-) | |
plt.figure(figsize=(10,6)) | |
plt.bar(ratings, counts, color=colors, width=4.5) | |
plt.title("Codeforces Rating Distribution (active user, number of participations >= 6)\nMarch 5, 2024") | |
plt.xlabel("Rating") | |
plt.ylabel("Number of contestants per bin") | |
# Optional: define a y-limit for better control of text placement | |
ymax = max(counts) * 1.05 | |
plt.ylim(0, ymax) | |
# 6) Add vertical dashed lines for each model and label them | |
model_ratings = [ | |
("gpt-4o", 808), | |
("o1-preview", 1258), | |
("o1", 1673), | |
("o1-ioi", 1807), | |
("o1-ioi (simple filter)", 2092), | |
("o1-ioi (full sampling)", 2214), | |
("o3", 2724), | |
] | |
for model, rating in model_ratings: | |
# Draw dashed line | |
plt.axvline(x=rating, color='k', linestyle='--', linewidth=1) | |
# Add model name slightly to the right of the line; adjust text position as needed | |
plt.text( | |
rating + 20, # horizontal offset | |
ymax * 0.7, # vertical position (70% of max height) | |
model, | |
rotation=90, # rotate text vertically | |
va='bottom', # align text at the bottom | |
ha='left', # horizontal alignment | |
color='black' | |
) | |
plt.tight_layout() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment