Last active
October 8, 2022 21:53
-
-
Save tallakt/33882f58bafa2aa531b304fa7ebb9084 to your computer and use it in GitHub Desktop.
Looking for chess cheating in centipawn dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CSV | |
using Plots | |
function load_all_games() | |
CSV.read("/Users/tallakt/Downloads/15982_games_with_centipawn_metrics/15982_games_with_centipawn_metrics.csv", DataFrame) | |
end | |
function player_games(all_games, player_name) | |
black = filter(r -> !isnothing(match(Regex(player_name), r["Black Name"])), all_games) | |
white = filter(r -> !isnothing(match(Regex(player_name), r["White Name"])), all_games) | |
black.player_name = black[:, "Black Name"] | |
white.player_name = white[:, "White Name"] | |
black.opponent_name = black[:, "White Name"] | |
white.opponent_name = white[:, "Black Name"] | |
black.player_elo = black[:, "Black ELO"] | |
white.player_elo = white[:, "White ELO"] | |
black.opponent_elo = black[:, "White ELO"] | |
white.opponent_elo = white[:, "Black ELO"] | |
black.player_avg_cp_loss = black[:, "Black Av CP Loss"] | |
white.player_avg_cp_loss = white[:, "White Av CP Loss"] | |
black.opponent_avg_cp_loss = black[:, "White Av CP Loss"] | |
white.opponent_avg_cp_loss = white[:, "Black Av CP Loss"] | |
black.player_cp_loss = JSON.parse.(String.(black[:, "Black CP Loss List"])) | |
white.player_cp_loss = JSON.parse.(String.(white[:, "White CP Loss List"])) | |
black.opponent_cp_loss = JSON.parse.(String.(black[:, "White CP Loss List"])) | |
white.opponent_cp_loss = JSON.parse.(String.(white[:, "Black CP Loss List"])) | |
black.evaluations_list = JSON.parse.(String.(black[:, "Evaluations List"])) | |
white.evaluations_list = JSON.parse.(String.(white[:, "Evaluations List"])) | |
black.player_evaluations = [-x for x = [e[2:2:end] for e=JSON.parse.(String.(black[:, "Evaluations List"]))]] | |
white.player_evaluations = [e[1:2:end] for e=JSON.parse.(String.(white[:, "Evaluations List"]))] | |
black.opponent_evaluations = [e[1:2:end] for e=JSON.parse.(String.(black[:, "Evaluations List"]))] | |
white.opponent_evaluations = [1000 .- x for x = [e[2:2:end] for e=JSON.parse.(String.(white[:, "Evaluations List"]))]] | |
black.player_signed_evaluations = [-el for el=black.evaluations_list] | |
white.player_signed_evaluations = white.evaluations_list | |
black.player_color .= :black | |
white.player_color .= :white | |
vcat(black, white) | |
end | |
function plot_player_vs_opponent_moves_histogram(games; firstmove = 1) | |
player_cp_losses = vcat([l[firstmove:end] for l = games.player_cp_loss]...) | |
opponent_cp_losses = vcat([l[firstmove:end] for l = games.opponent_cp_loss]...) | |
ymax = mean([count(player_cp_losses .== 1), count(opponent_cp_losses .== 1)]) | |
plot( | |
histogram(player_cp_losses, bins = 0:150, ylims = (0, ymax * 1.3), title = games.player_name[1]) | |
, histogram(opponent_cp_losses, bins = 0:150, ylims = (0, ymax * 1.3), title = "Opponents", xlabel="centipawn loss") | |
, size=(1100, 600) | |
, layout = (2,1) | |
, legend = false | |
) | |
end | |
function scatter_player_standing_vs_move(games) | |
n = games.Date |> length | |
all_player_cp_loss = vcat(games.player_evaluations...) | |
move_no = vcat([1:length(g) for g=games.player_evaluations]...) | |
println(length(move_no), length(vcat(games.player_evaluations...))) | |
scatter( | |
all_player_cp_loss | |
, vcat(games.player_cp_loss...) | |
, legend = false | |
, msw = 0 | |
, msa = 0 | |
, ma = min(1.0, (0.2 * 15_000 / n)) | |
, ms = 2.5 | |
, ylims = (0, 500) | |
, xlims = (-1000, 1000) | |
, size=(1100, 600) | |
, marker_z = move_no | |
, colorbar = true | |
); | |
end | |
function scatter_player_move_vs_opponent_move(games) | |
n = games.Date |> length | |
move_no = vcat([1:length(g) for g=games.player_evaluations]...) | |
scatter( | |
vcat(games.player_cp_loss...) | |
, vcat(games.opponent_cp_loss...) | |
, legend = false | |
, msw = 0 | |
, msa = 0 | |
, ma = min(1.0, (0.1 * 15_000 / n)) | |
, ms = 2.5 | |
, xlims = (0, 750) | |
, ylims = (0, 750) | |
, aspectratio = :equal | |
, size=(1100, 600) | |
, xlabel = games.player_name[1] | |
, marker_z = move_no | |
); | |
end | |
function scatter_player_avg_move_vs_opponent_move(games) | |
n = games.Date |> length | |
scatter( | |
[mean(l) for l=games.player_cp_loss] | |
, [mean(l) for l=games.opponent_cp_loss] | |
, legend = false | |
, msw = 0 | |
, msa = 0 | |
, ma = min(1.0, (0.4 * 2_000 / n)) | |
, ms = 2.5 | |
, xlims = (0, 75) | |
, ylims = (0, 75) | |
, aspectratio = :equal | |
, size=(1100, 600) | |
, xlabel = games.player_name[1] | |
, marker_z = datetime2unix.(games.Date .|> DateTime) | |
); | |
end | |
function scatter_player_quantile_move_vs_opponent_move(games, p; lim = 25) | |
n = games.Date |> length | |
scatter( | |
[quantile(l, p) for l=games.player_cp_loss] | |
, [quantile(l, p) for l=games.opponent_cp_loss] | |
, legend = false | |
, msw = 0 | |
, msa = 0 | |
, ma = min(1.0, (0.2 * 2_000 / n)) | |
, ms = 3.5 | |
, xlims = (0, lim) | |
, ylims = (0, lim) | |
, aspectratio = :equal | |
, size=(1100, 600) | |
, xlabel = games.player_name[1] | |
, marker_z = datetime2unix.(games.Date .|> DateTime) | |
); | |
end | |
function filter_year(games, from, to) | |
filter(g -> Date(from, 1, 1) <= g.Date <= Date(to, 12, 31), games) | |
end | |
function histogram_per_tournament(games) | |
for g = groupby(games, "Event Name") | |
println("Event: $(g[1,"Event Name"]), Player: $(g.player_name[1])") | |
plot_player_vs_opponent_moves_histogram(g) |> display | |
println("") | |
end | |
end | |
function histogram_per_year(games) | |
(y0, y1) = extrema([year(d) for d=games.Date]) | |
for y = y0:y1 | |
println("Year: $(y), Player: $(games.player_name[1])") | |
plot_player_vs_opponent_moves_histogram(filter_year(games, y, y)) |> display | |
println("") | |
end | |
end | |
# "suspicious" tournamenet for HN | |
function filter_world_opp_49th(games) | |
filter(r -> !isnothing(match(r"World op 49th", r."Event Name")), games) | |
end | |
# "suspicious" tournamenet for HN | |
function filter_mix_200(games) | |
filter(r -> !isnothing(match(r"Novi Sad TS Mix 200th", r."Event Name")), games) | |
end | |
# "suspicious" tournamenet for HN | |
function filter_cccsa_20(games) | |
filter(r -> !isnothing(match(r"Charlotte CCCSA Fall GM", r."Event Name")), games) | |
end | |
function filter_suspicios(games) | |
vcat(filter_world_opp_49th(games), filter_mix_200(games), filter_cccsa_20(games)) | |
end | |
function plot_some_players_meadian_cp_loss_per_event(allgames) | |
six = [player_games(allgames, n) for n=["Carlsen", "Niemann", "Caruana", "Erigaisi", "Firouzja", "Praggnanandhaa"]] | |
function plot_player(games) | |
by_event = groupby(games, "Event Name") | |
mean_cp_loss = [mean(vcat(ev.player_cp_loss...)) for ev = by_event] | |
median_cp_loss = [median(vcat(ev.player_cp_loss...)) for ev = by_event] | |
p0 = plot(mean_cp_loss, lab = "mean") | |
plot!(p0, median_cp_loss, lab="median: $(games.player_name[1])") | |
end | |
plot( | |
[plot_player(player) for player = [sort(df, "Date") for df = six]]... | |
, layout = (2,3) | |
, xlims = (0, 180) | |
, ylims = (0, 40) | |
, size = (1100, 650) | |
) | |
end | |
function plot_some_players_meadian_cp_loss_per_game(allgames; xlims = (0, 2000)) | |
six = [player_games(allgames, n) for n=["Carlsen", "Niemann", "Caruana", "Erigaisi", "Firouzja", "Praggnanandhaa"]] | |
function plot_player(games) | |
mean_cp_loss = [mean(ev.player_cp_loss) for ev = eachrow(sort(games, "Date"))] | |
median_cp_loss = [median(ev.player_cp_loss) for ev = eachrow(sort(games, "Date"))] | |
p0 = plot(mean_cp_loss, lab = "mean") | |
plot!(p0, median_cp_loss, lab="median: $(games.player_name[1])") | |
end | |
plot( | |
[plot_player(player) for player = [sort(df, "Date") for df = six]]... | |
, layout = (2,3) | |
, xlims = xlims | |
, ylims = (0, 90) | |
, size = (1100, 650) | |
) | |
end | |
function scatter_median_cp_loss_vs_std(games; firstmove = 1, ms = 8.0) | |
cp_losses = [g.player_cp_loss[min(firstmove, length(g.player_cp_loss)):end] for g=eachrow(games)] | |
scatter( | |
[median(cpl) + randn(1)[1] for cpl=cp_losses] | |
, [std(cpl) for cpl = cp_losses] | |
, msw = 0, msa = 0, ma = 0.15 * 2000 / length(games.Date), ms = ms | |
, size = (1100, 650) | |
, xlabel = "median centipawn loss per game" | |
, ylabel = "std centipawn loss per game" | |
, ylims = (0, 300) | |
, xlims = (0, 40) | |
, label = games.player_name[1] | |
) | |
end | |
function scatter_median_cp_loss_vs_std_for_some_players(games) | |
players = [x[2] for x=([(length(g.Date), g[1, "White Name"]) for g=groupby(all_games, "White Name")] |> sort)[(end-8):end]] | |
plot( | |
[plot!(scatter_median_cp_loss_vs_std(filter_year(player_games(games, p), 2018, 2999), firstmove = 15, ms = 4), title = p, legend = false, titlefontsize = 8, ticks = false, xlabel = "", ylabel = "") for p=players]... | |
, layout = (3, 3) | |
, size = (1100, 650) | |
); | |
end | |
nothing |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment