yuxi-liu-wired · February 13, 2025 04:15
diff --git a/Plots for Competitive Programming with Large Reasoning Models.py b/Plots for Competitive Programming with Large Reasoning Models.py
 # Plots for Competitive Programming with Large Reasoning Models (2025)

 import requests
 from bs4 import BeautifulSoup
 import matplotlib.pyplot as plt
 import numpy as np

 # URL with all IOI 2024 scores
 url = "https://cphof.org/standings/ioi/2024"
 response = requests.get(url)
 response.raise_for_status()  # ensure request succeeded

 # Parse the page with BeautifulSoup
 soup = BeautifulSoup(response.content, 'html.parser')

 # Use a CSS selector to locate the table (based on provided HTML snippet)
 table = soup.select_one("table.table-bordered.table-striped.table-hover.w-auto")
 if table is None:
    raise ValueError("Couldn't find the standings table!")

 # Get all rows; the first row is the header
 rows = table.find_all("tr")[1:]
 scores = []
 for row in rows:
    # Each row has 5 columns: Rank, Country, Name, Score, Prize.
    # The score is in the fourth <td>.
    cols = row.find_all("td")
    if len(cols) >= 4:
        score_text = cols[3].get_text(strip=True)
        try:
            score = float(score_text)
            scores.append(score)
        except ValueError:
            # Skip rows where the score isn't a valid float.
            continue

 n = len(scores)
 print("Number of contestants scraped:", n)
 sorted_scores = sorted(scores, reverse=True)


 # Compute percentiles for each contestant.
 # Percentile for the i-th score (i=0 for highest) is computed as:
 #    percentile = 100 * (n - i - 0.5) / n
 percentiles = [100 * (n - i - 0.5) / n for i in range(n)]

 # Plot the contestants’ scores.
 plt.figure(figsize=(18, 10))
 plt.plot(percentiles, sorted_scores, alpha=0.6)
 plt.scatter(percentiles, sorted_scores, alpha=0.6, marker='x', s=4, label='Contestants')

 # Medal thresholds from IOI 2024 info.
 bronze = 216.97
 silver = 289.31
 gold   = 359.71

 plt.axhline(y=bronze, color='brown', linestyle='--', label='Bronze cutoff (216.97)')
 plt.axhline(y=silver, color='silver', linestyle='--', label='Silver cutoff (289.31)')
 plt.axhline(y=gold,   color='gold', linestyle='--', label='Gold cutoff (359.71)')

 # Special points to overlay.
 special_points = {
    156:      "o1, random (156)",
    213:      "o1, handcrafted (213)",
    362.14:   "o1, 10K submissions (362.14)",
    395.64:   "o3 (395.64)"
 }

 def compute_percentile_for_special(special_score, sorted_scores):
    """
    Compute the percentile for a special score by counting how many contestants
    have a strictly higher score.
    """
    count_higher = sum(1 for s in sorted_scores if s > special_score)
    return 100 * (n - count_higher - 0.5) / n

 # Plot each special point with a star marker and add an annotation.
 for sp_score, annotation in special_points.items():
    sp_percentile = compute_percentile_for_special(sp_score, sorted_scores)
    plt.scatter(sp_percentile, sp_score, marker='*', s=200, color='red', zorder=5)
    plt.text(sp_percentile - 5, sp_score + 20, annotation, fontsize=10,
             verticalalignment='center', color='red')

 plt.xlabel('Percentile')
 plt.ylabel('Score')
 # plt.title('IOI 2024 Contestants: Scores vs. Percentiles')
 plt.legend()
 plt.grid(True)
 plt.show()

 # ------------------------------------------------------------

 import requests
 import ast
 import matplotlib.pyplot as plt

 # 1) Fetch data from Pastebin
 url = 'https://pastebin.com/raw/D6H8qVbF'
 response = requests.get(url)
 raw_data = response.text.strip()

 # 2) Convert string dictionary to actual Python dict
 data_dict = ast.literal_eval(raw_data)  # {rating: count, ...}

 # 3) Sort by rating
 items = sorted(data_dict.items(), key=lambda x: x[0])
 ratings = [item[0] for item in items]
 counts  = [item[1] for item in items]

 # 4) Assign colors by typical CF rating intervals (adjust as desired)
 #    For example:
 #      <1200: gray, 1200–1399: green, 1400–1599: cyan, 1600–1899: blue,
 #      1900–2199: purple, 2200–2599: orange, 2600–2899: red, >=2900: maroon
 def get_color(rating):
    if rating < 1200:
        return 'gray'
    elif rating < 1400:
        return 'green'
    elif rating < 1600:
        return 'cyan'
    elif rating < 1900:
        return 'blue'
    elif rating < 2100:
        return 'purple'
    elif rating < 2400:
        return 'orange'
    elif rating < 3000:
        return 'red'
    else:
        return 'maroon'

 colors = [get_color(r) for r in ratings]

 # Gray: Newbie (rating below 1200)
 # Green: Pupil (1200–1399)
 # Cyan: Specialist (1400–1599)
 # Blue: Expert (1600–1899)
 # Violet/Purple: Candidate Master (1900–2099)
 # Orange: Master (2100–2399)
 # Red: Grandmaster+ (2400-)

 plt.figure(figsize=(10,6))
 plt.bar(ratings, counts, color=colors, width=4.5)

 plt.title("Codeforces Rating Distribution (active user, number of participations >= 6)\nMarch 5, 2024")
 plt.xlabel("Rating")
 plt.ylabel("Number of contestants per bin")

 # Optional: define a y-limit for better control of text placement
 ymax = max(counts) * 1.05
 plt.ylim(0, ymax)

 # 6) Add vertical dashed lines for each model and label them
 model_ratings = [
    ("gpt-4o", 808),
    ("o1-preview", 1258),
    ("o1", 1673),
    ("o1-ioi", 1807),
    ("o1-ioi (simple filter)", 2092),
    ("o1-ioi (full sampling)", 2214),
    ("o3", 2724),
 ]

 for model, rating in model_ratings:
    # Draw dashed line
    plt.axvline(x=rating, color='k', linestyle='--', linewidth=1)
    
    # Add model name slightly to the right of the line; adjust text position as needed
    plt.text(
        rating + 20,           # horizontal offset
        ymax * 0.7,           # vertical position (70% of max height)
        model,
        rotation=90,          # rotate text vertically
        va='bottom',          # align text at the bottom
        ha='left',            # horizontal alignment
        color='black'
    )

 plt.tight_layout()
 plt.show()
	# Plots for Competitive Programming with Large Reasoning Models (2025)

	import requests
	from bs4 import BeautifulSoup
	import matplotlib.pyplot as plt
	import numpy as np

	# URL with all IOI 2024 scores
	url = "https://cphof.org/standings/ioi/2024"
	response = requests.get(url)
	response.raise_for_status() # ensure request succeeded

	# Parse the page with BeautifulSoup
	soup = BeautifulSoup(response.content, 'html.parser')

	# Use a CSS selector to locate the table (based on provided HTML snippet)
	table = soup.select_one("table.table-bordered.table-striped.table-hover.w-auto")
	if table is None:
	raise ValueError("Couldn't find the standings table!")

	# Get all rows; the first row is the header
	rows = table.find_all("tr")[1:]
	scores = []
	for row in rows:
	# Each row has 5 columns: Rank, Country, Name, Score, Prize.
	# The score is in the fourth <td>.
	cols = row.find_all("td")
	if len(cols) >= 4:
	score_text = cols[3].get_text(strip=True)
	try:
	score = float(score_text)
	scores.append(score)
	except ValueError:
	# Skip rows where the score isn't a valid float.
	continue

	n = len(scores)
	print("Number of contestants scraped:", n)
	sorted_scores = sorted(scores, reverse=True)


	# Compute percentiles for each contestant.
	# Percentile for the i-th score (i=0 for highest) is computed as:
	# percentile = 100 * (n - i - 0.5) / n
	percentiles = [100 * (n - i - 0.5) / n for i in range(n)]

	# Plot the contestants’ scores.
	plt.figure(figsize=(18, 10))
	plt.plot(percentiles, sorted_scores, alpha=0.6)
	plt.scatter(percentiles, sorted_scores, alpha=0.6, marker='x', s=4, label='Contestants')

	# Medal thresholds from IOI 2024 info.
	bronze = 216.97
	silver = 289.31
	gold = 359.71

	plt.axhline(y=bronze, color='brown', linestyle='--', label='Bronze cutoff (216.97)')
	plt.axhline(y=silver, color='silver', linestyle='--', label='Silver cutoff (289.31)')
	plt.axhline(y=gold, color='gold', linestyle='--', label='Gold cutoff (359.71)')

	# Special points to overlay.
	special_points = {
	156: "o1, random (156)",
	213: "o1, handcrafted (213)",
	362.14: "o1, 10K submissions (362.14)",
	395.64: "o3 (395.64)"
	}

	def compute_percentile_for_special(special_score, sorted_scores):
	"""
	Compute the percentile for a special score by counting how many contestants
	have a strictly higher score.
	"""
	count_higher = sum(1 for s in sorted_scores if s > special_score)
	return 100 * (n - count_higher - 0.5) / n

	# Plot each special point with a star marker and add an annotation.
	for sp_score, annotation in special_points.items():
	sp_percentile = compute_percentile_for_special(sp_score, sorted_scores)
	plt.scatter(sp_percentile, sp_score, marker='*', s=200, color='red', zorder=5)
	plt.text(sp_percentile - 5, sp_score + 20, annotation, fontsize=10,
	verticalalignment='center', color='red')

	plt.xlabel('Percentile')
	plt.ylabel('Score')
	# plt.title('IOI 2024 Contestants: Scores vs. Percentiles')
	plt.legend()
	plt.grid(True)
	plt.show()

	# ------------------------------------------------------------

	import requests
	import ast
	import matplotlib.pyplot as plt

	# 1) Fetch data from Pastebin
	url = 'https://pastebin.com/raw/D6H8qVbF'
	response = requests.get(url)
	raw_data = response.text.strip()

	# 2) Convert string dictionary to actual Python dict
	data_dict = ast.literal_eval(raw_data) # {rating: count, ...}

	# 3) Sort by rating
	items = sorted(data_dict.items(), key=lambda x: x[0])
	ratings = [item[0] for item in items]
	counts = [item[1] for item in items]

	# 4) Assign colors by typical CF rating intervals (adjust as desired)
	# For example:
	# <1200: gray, 1200–1399: green, 1400–1599: cyan, 1600–1899: blue,
	# 1900–2199: purple, 2200–2599: orange, 2600–2899: red, >=2900: maroon
	def get_color(rating):
	if rating < 1200:
	return 'gray'
	elif rating < 1400:
	return 'green'
	elif rating < 1600:
	return 'cyan'
	elif rating < 1900:
	return 'blue'
	elif rating < 2100:
	return 'purple'
	elif rating < 2400:
	return 'orange'
	elif rating < 3000:
	return 'red'
	else:
	return 'maroon'

	colors = [get_color(r) for r in ratings]

	# Gray: Newbie (rating below 1200)
	# Green: Pupil (1200–1399)
	# Cyan: Specialist (1400–1599)
	# Blue: Expert (1600–1899)
	# Violet/Purple: Candidate Master (1900–2099)
	# Orange: Master (2100–2399)
	# Red: Grandmaster+ (2400-)

	plt.figure(figsize=(10,6))
	plt.bar(ratings, counts, color=colors, width=4.5)

	plt.title("Codeforces Rating Distribution (active user, number of participations >= 6)\nMarch 5, 2024")
	plt.xlabel("Rating")
	plt.ylabel("Number of contestants per bin")

	# Optional: define a y-limit for better control of text placement
	ymax = max(counts) * 1.05
	plt.ylim(0, ymax)

	# 6) Add vertical dashed lines for each model and label them
	model_ratings = [
	("gpt-4o", 808),
	("o1-preview", 1258),
	("o1", 1673),
	("o1-ioi", 1807),
	("o1-ioi (simple filter)", 2092),
	("o1-ioi (full sampling)", 2214),
	("o3", 2724),
	]

	for model, rating in model_ratings:
	# Draw dashed line
	plt.axvline(x=rating, color='k', linestyle='--', linewidth=1)

	# Add model name slightly to the right of the line; adjust text position as needed
	plt.text(
	rating + 20, # horizontal offset
	ymax * 0.7, # vertical position (70% of max height)
	model,
	rotation=90, # rotate text vertically
	va='bottom', # align text at the bottom
	ha='left', # horizontal alignment
	color='black'
	)

	plt.tight_layout()
	plt.show()