Skip to content

Instantly share code, notes, and snippets.

@tukachev
Created February 24, 2025 13:36
Show Gist options
  • Save tukachev/84d5e7445059f904e550283f33074654 to your computer and use it in GitHub Desktop.
Save tukachev/84d5e7445059f904e550283f33074654 to your computer and use it in GitHub Desktop.
# AI Model Benchmark Scores Across Domains
# https://x.com/xai/status/1891699715298730482
library(tidyverse)
color_palette <- c(
"Grok-3" = "#1b9e77",
"Grok-3 mini" = "#66a61e",
"Gemini-2 Pro" = "#e7298a",
"DeepSeek-V3" = "#7570b3",
"Claude 3.5 Sonnet" = "#d95f02",
"GPT-4o" = "#e6ab02"
)
data <- tibble(
Benchmark = rep(
c("Math (AIME'24)", "Science (GPQA)", "Coding (LCB Oct-Feb)"),
each = 6
),
Model = rep(c("Grok-3", "Grok-3 mini", "Gemini-2 Pro",
"DeepSeek-V3", "Claude 3.5 Sonnet", "GPT-4o"), 3),
Score = c(52, 40, 36, 40, 16, 9, # Math
75, 65, 65, 59, 65, 50, # Science
57, 41, 36, 40, 36, 34) # Coding
)
data <- data %>%
group_by(Benchmark) %>%
mutate(Benchmark_Mean = mean(Score)) %>%
ungroup() %>%
arrange(desc(Benchmark_Mean), Benchmark, desc(Score)) %>%
group_by(Benchmark) %>%
mutate(Order = row_number()) %>%
ungroup() %>%
mutate(Benchmark = factor(Benchmark,
levels = unique(Benchmark[order(-Benchmark_Mean)]))) %>%
mutate(Model = factor(
Model,
levels = c(
"Grok-3",
"Grok-3 mini",
"Gemini-2 Pro",
"DeepSeek-V3",
"Claude 3.5 Sonnet",
"GPT-4o"
)
))
ggplot(data, aes(x = Benchmark, y = Score, fill = Model, group = Order)) +
geom_col(position = position_dodge(width = 0.9), width = 0.7) +
geom_hline(yintercept = c(20, 40, 60), color = "black", linewidth = 0.5) +
scale_fill_manual(values = color_palette, name = "AI Model") +
labs(
title = "AI Model Benchmark Scores Across Domains",
subtitle = "Performance on Math, Science, and Coding Tasks, Feb 2025",
x = "Benchmark Category",
y = "Benchmark Score",
caption = "Note:\nAIME'24 = American Invitational Mathematics Examination 2024\nGPQA = General-Purpose Question Answering\nLCB Oct-Feb = LeetCode Benchmark (Oct 2024 - Feb 2025)\n
Source: xAI Grok3 Launch Presentation, Feb 18, 2025"
) +
theme(
plot.background = element_rect(fill = "black", color = NA),
panel.background = element_rect(fill = "black", color = NA),
plot.margin = margin(20, 15, 20, 15),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.x = element_line(color = "gray30", linewidth = 0.5),
axis.line.y = element_blank(),
axis.text = element_text(color = "white", size = 12, family = "sans"),
axis.title = element_text(color = "white", size = 14, family = "sans"),
plot.title = element_text(hjust = 0.5, size = 16,
face = "bold", color = "white", family = "sans"),
plot.title.position = "plot",
plot.caption.position = "plot",
axis.ticks.length.y = unit(.2, "cm"),
axis.ticks.y = element_line(colour = "gray30"),
axis.ticks.length.x = unit(0, "cm"),
plot.subtitle = element_text(hjust = 0.5, size = 12,
color = "white", family = "sans"),
plot.caption = element_text(hjust = 0, size = 10,
color = "gray90", family = "sans"),
legend.position = "top",
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.text = element_text(color = "white",
size = 12, family = "sans"),
legend.title = element_text(color = "white",
size = 14, face = "bold", family = "sans")
) +
scale_y_continuous(limits = c(0, 80),
breaks = c(0, 20, 40, 60, 80), expand = c(0, 0)) +
geom_text(aes(label = round(Score, 1)), position = position_dodge(width = 0.9),
vjust = -0.3, size = 4, color = "white", family = "sans")
ggsave(
"grok-3_Benchmark2025.png",
dpi = 300,
width = 7,
height = 7
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment