Skip to content

Instantly share code, notes, and snippets.

@NeKzor
Last active June 26, 2025 02:07
Show Gist options
  • Save NeKzor/ef166f9d7e48690dabcd712f54f9d1b1 to your computer and use it in GitHub Desktop.
Save NeKzor/ef166f9d7e48690dabcd712f54f9d1b1 to your computer and use it in GitHub Desktop.
Every Mario Kart World Record form 1996-2025. Datasets available at https://www.dolthub.com/repositories/nekz/mkwrs/data/main
// deno run -E -W=import.sh,data/ -R=data/ -N=mkwrs.com mk.ts --refresh
// Dumped .json files of each game
export const dataFolder = "./data";
// Generated database import script
const importScript = "./import.sh";
const userAgent = "ne^";
// deno-lint-ignore no-explicit-any
let _JSDOM: any;
const JSDOM = async () => {
if (_JSDOM) return _JSDOM;
const { JSDOM } = await import("npm:jsdom");
_JSDOM = JSDOM;
return _JSDOM;
};
async function hash(value: string, length: number = 8): Promise<string> {
const encoder = new TextEncoder();
const data = encoder.encode(value);
const hashBuffer = await crypto.subtle.digest("SHA-256", data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hashHex = hashArray.map((b) => b.toString(16).padStart(2, "0")).join(
"",
);
return hashHex.slice(0, length);
}
export type Track = {
id: string;
name: string;
category?: string;
};
export type Entry = {
id: string;
date: string;
note: string;
ntscTime: string;
ntscVideo: string;
palTime: string;
palVideo: string;
player_id: string;
player_name: string;
player_nation: string;
time: string;
video: string;
ghost: string;
player_mii: string;
device: string;
mode: string;
track_id: string;
track_name: string;
track_categor: string;
};
const baseApi = "https://mkwrs.com";
const fetchOptions = {
headers: {
"User-Agent": userAgent,
},
};
export const games = [
"mkworld",
"mk8dx",
"mk8",
"mk7",
"mkwii",
"mkds",
"mkdd",
"mksc",
"mk64",
"smk",
] as const;
export type Game = typeof games[number];
const findMostColumns = (rows: Entry[]) => {
let [most] = rows;
let count = Object.keys(most).length;
rows.forEach((row) => {
const newCount = Object.keys(row).length;
if (newCount > count) {
count = newCount;
most = row;
}
});
return most;
};
const main = async () => {
const refresh = Deno.args.some((arg) => arg === "--refresh");
const gameOnly = Deno.args.find((arg) => arg.startsWith("--game="))?.split(
"=",
)
?.[1] as Game | undefined;
if (gameOnly !== undefined && !games.includes(gameOnly)) {
console.log("Expected one of:", games);
Deno.exit(1);
}
Deno.writeTextFileSync(importScript, "#!/bin/bash\n", { create: true });
for (const game of games) {
if (gameOnly !== undefined && game !== gameOnly) continue;
const dataFile = `${dataFolder}/${game}.json`;
let rows: Entry[] = [];
if (refresh) {
const tracks = await scrapeTracks(game);
rows = await scrapeGame(game, tracks);
Deno.writeTextFileSync(dataFile, JSON.stringify({ rows }, null, 4));
} else {
console.log("importing", game);
rows = JSON.parse(Deno.readTextFileSync(dataFile)).rows;
}
Deno.writeTextFileSync(
importScript,
`\ndolt sql -q 'CREATE TABLE ${game} (
${
Object.keys(findMostColumns(rows)).map((column) =>
` \`${column}\` varchar(255)`
).join(",\n")
}
)'
dolt table import -u ${game} ${dataFile}\n`,
{
append: true,
},
);
}
};
const scrapeTracks = async (game: Game): Promise<Track[]> => {
console.log("scraping", game, "...");
const route = `${baseApi}/${game}`;
const res = await fetch(route, fetchOptions);
console.log("[GET]", route, ":", res.status);
const text = await res.text();
const dom = new (await JSDOM())(text);
const document = dom.window.document;
const tracks: Track[] = [];
const tables = document.querySelectorAll(".wr");
console.log("found", tables.length, "tables");
const isMkwii = game === "mkwii";
let tableMode = false;
for (const table of tables) {
const rows = [...table.querySelectorAll("tr")]
.slice(1)
.filter((tr) => tr.parentElement.parentElement.className === "wr");
console.log("found", rows.length, "rows");
for (const row of rows) {
const columns = row.querySelectorAll("td");
let track = columns[0];
if (!track || !track.children[0]) {
continue;
}
if (track.children[0].tagName === "CENTER") {
track = columns[1];
if (!track || !track.children[0]) {
continue;
}
}
if (track.children[0].tagName === "A" && !tableMode) {
const trackA = track.children[0];
const name = trackA.textContent;
const link = trackA.getAttribute("href");
if (link.startsWith("http")) {
continue;
}
console.log(name, link);
const hasId = link.startsWith("display.php?track=");
const id = hasId ? link.slice(18) : link;
let category: Track["category"];
if (isMkwii) {
if (id.endsWith("&nsc=1")) {
category = "non-shortcut";
} else if (!hasId) {
category = link.replace(/\.php$/g, "");
} else {
category = "unrestricted";
}
}
tracks.push({
name,
id,
category,
});
} else if (track.children[0].tagName === "TABLE") {
tableMode = true;
if (
!(track.children[0].children[0] &&
track.children[0].children[0].querySelectorAll("td"))
) {
continue;
}
if (track.getAttribute("colspan") !== "2") {
continue;
}
const [trackTd, ...tds] = [
...track.children[0].children[0].querySelectorAll("td"),
];
const name = trackTd.textContent;
console.log(name);
for (const td of tds) {
const link = td.firstElementChild.getAttribute("href");
const category = td.firstElementChild.textContent;
if (link.startsWith("http")) {
continue;
}
console.log(link);
tracks.push({
name,
id: link.startsWith("display.php?track=") ? link.slice(18) : link,
category,
});
}
}
}
}
return tracks;
};
const scrapeGame = async (
gameName: Game,
tracks: Track[],
): Promise<Entry[]> => {
const isMkworld = gameName === "mkworld";
const isMkwii = gameName === "mkwii";
const isMkds = gameName === "mkds";
const isMk64 = gameName === "mk64";
const hasDeviceInfo = gameName === "mk8dx" || gameName === "mk8";
const columnOffset = isMk64 || isMkwii ? 5 : 4;
console.log("scraping tracks for", gameName, "...");
if (isMkds) {
const nonPrb = [...tracks];
tracks.push(
...nonPrb.map((track) => ({
id: track.id + "&m2",
name: track.name,
category: "non-sc",
})),
);
tracks.push(
...nonPrb.map((track) => ({
id: track.id + "&m3",
name: track.name,
category: "overall",
})),
);
nonPrb.forEach((track) => {
track.id += "&m1";
track.category = "non-prb";
});
}
// deno-lint-ignore no-explicit-any
const extractText = (element: any) => element.firstChild.textContent.trim();
const entries = new Map<string, Entry>();
const result: Entry[] = [];
let lastCategory = "";
let PHPSESSID = "";
for (const track of tracks) {
if (isMkds) {
if (lastCategory !== track.category) {
lastCategory = track.category!;
const res = await fetch(
"https://mkwrs.com/mkds/?&m=" + track.id.slice(-1),
);
PHPSESSID =
res.headers.get("set-cookie")?.split("=")[1]?.split(";")[0] ?? "";
}
}
const hasNoId = track.id.endsWith(".php");
const id = hasNoId ? track.id : `display.php?track=${track.id}`;
const hasSplitTable = isMkwii && hasNoId;
const route = `${baseApi}/${gameName}/${id}`;
const options = isMkds
? {
...fetchOptions,
headers: {
...fetchOptions.headers,
"Cookie": "PHPSESSID=" + PHPSESSID,
},
}
: fetchOptions;
const res = await fetch(route, options);
console.log("[GET]", route, ":", res.status);
const text = await res.text();
const dom = new (await JSDOM())(text);
const document = dom.window.document;
const wrs = document.querySelectorAll(".wr");
const [columnRow, ...rows] = [
...wrs[hasSplitTable ? 2 : 1].querySelectorAll("tr"),
];
const columnNames = [...columnRow.querySelectorAll("th")].map((th) =>
th.textContent
)
.slice(columnOffset)
.map((column) => column.replace(/ /g, "_").toLowerCase());
const hasCombination =
columnNames[columnNames.length - 1] === "combination";
let insertSchroomsAndCombination = false;
let record = null;
for (const row of rows) {
const allTds = [...row.querySelectorAll("td")];
if (allTds.length <= 1 || allTds[1].getAttribute("colspan")) {
continue;
}
if (hasCombination) {
if (insertSchroomsAndCombination) {
insertSchroomsAndCombination = false;
record.shrooms = allTds[0].textContent;
if (isMkworld) {
record.kart = allTds[1].textContent;
} else {
record.tires = allTds[1].textContent;
record.glider = allTds[2].textContent;
}
continue;
} else {
insertSchroomsAndCombination = true;
const tds = allTds.slice(columnOffset);
const afterLapsColumn = columnNames.findLastIndex((column) =>
column.startsWith("lap_")
) + 1;
record = columnNames.slice(0, afterLapsColumn).reduce(
(record, column, idx) => {
record[column] = tds[idx].textContent;
return record;
},
Object.assign(Object.create(null), { id: null }),
);
if (isMkworld) {
record.coins = tds[afterLapsColumn].textContent;
record.shrooms = null;
record.character = tds[afterLapsColumn + 1].textContent;
record.kart = null;
} else {
record.coins = tds[afterLapsColumn].textContent;
record.shrooms = null;
record.character = tds[afterLapsColumn + 1].textContent;
record.tires = null;
record.kart = tds[afterLapsColumn + 2].textContent;
record.glider = null;
}
}
} else {
const tds = allTds.slice(columnOffset);
record = columnNames.reduce((record, column, idx) => {
record[column] = tds[idx].textContent;
return record;
}, Object.assign(Object.create(null), { id: null }));
}
if (isMk64) {
const [dateTd, ntscTimeTd, palTimeTd, playerTd, nationTd] = allTds;
record.date = extractText(dateTd);
record.note = dateTd.firstElementChild
? dateTd.firstElementChild.getAttribute("title")
: null;
record.ntscTime = extractText(ntscTimeTd);
record.ntscVideo = ntscTimeTd.firstElementChild
? ntscTimeTd.firstElementChild.getAttribute("href")
: null;
record.palTime = extractText(palTimeTd);
record.palVideo = palTimeTd.firstElementChild
? palTimeTd.firstElementChild.getAttribute("href")
: null;
record.player_id = playerTd.firstElementChild
? playerTd.firstElementChild.getAttribute("href").slice(19)
: null;
record.player_name = extractText(playerTd);
record.player_nation = nationTd.firstElementChild &&
nationTd.firstElementChild.firstElementChild
? nationTd.firstElementChild.firstElementChild.getAttribute("title")
: null;
} else if (isMkwii) {
const [dateTd, timeTd, playerTd, miiNameTd, nationTd] = allTds;
const ghostTd = allTds[allTds.length - 1];
record.date = extractText(dateTd);
record.note = dateTd.firstElementChild
? dateTd.firstElementChild.getAttribute("title")
: null;
record.time = extractText(timeTd);
record.video = timeTd.firstElementChild
? timeTd.firstElementChild.getAttribute("href")
: null;
record.ghost = ghostTd.firstElementChild
? ghostTd.firstElementChild.getAttribute("href")
: null;
record.player_id = playerTd.firstElementChild
? playerTd.firstElementChild.getAttribute("href").slice(19)
: null;
record.player_name = extractText(playerTd);
record.player_nation = nationTd.firstElementChild &&
nationTd.firstElementChild.firstElementChild
? nationTd.firstElementChild.firstElementChild.getAttribute("title")
: null;
record.player_mii = miiNameTd.textContent;
} else if (isMkds) {
const [dateTd, timeTd, modeTd, playerTd, nationTd] = allTds;
record.date = extractText(dateTd);
record.note = dateTd.firstElementChild
? dateTd.firstElementChild.getAttribute("title")
: null;
record.time = extractText(timeTd);
record.video = timeTd.firstElementChild
? timeTd.firstElementChild.getAttribute("href")
: null;
record.mode = extractText(modeTd);
record.player_id = playerTd.firstElementChild
? playerTd.firstElementChild.getAttribute("href").slice(19)
: null;
record.player_name = extractText(playerTd);
record.player_nation = nationTd.firstElementChild &&
nationTd.firstElementChild.firstElementChild
? nationTd.firstElementChild.firstElementChild.getAttribute("title")
: null;
} else {
const [dateTd, timeTd, playerTd, nationTd] = allTds;
record.date = extractText(dateTd);
record.note = dateTd.firstElementChild
? dateTd.firstElementChild.getAttribute("title")
: null;
record.time = extractText(timeTd);
record.video = timeTd.firstElementChild
? timeTd.firstElementChild.getAttribute("href")
: null;
if (hasDeviceInfo) {
record.device = timeTd.children[1]
? timeTd.children[1].getAttribute("title")
: null;
}
record.player_id = playerTd.firstElementChild
? playerTd.firstElementChild.getAttribute("href").slice(19)
: null;
record.player_name = extractText(playerTd);
record.player_nation = nationTd.firstElementChild &&
nationTd.firstElementChild.firstElementChild
? nationTd.firstElementChild.firstElementChild.getAttribute("title")
: null;
}
record.track_id = isMkds ? track.id.slice(0, -3) : track.id;
record.track_name = track.name;
if (track.category) {
record.track_category = track.category;
}
if (record.player_id[record.player_id.length - 1] === "+") {
console.warn("Fixed player id:", record.player_id);
record.player_id = record.player_id.slice(0, -1);
}
if (
record.player_id[0] === "+" ||
record.player_id[record.player_id.length - 1] === "+"
) {
console.error("INVALID PLAYER NAME", record.player_id);
Deno.exit(1);
}
const id = await hash(
[
isMkwii && record.track_category === "non-shortcut"
? record.track_id.slice(0, -6)
: record.track_id,
isMkds || isMkwii ? "" : record.track_category ?? "",
record.player_id,
isMk64 ? record.palTime : record.time,
].join(";"),
);
if (entries.has(id)) {
console.log("HASH COLLISION");
const collision = entries.get(id)!;
console.log(JSON.stringify(collision));
console.log(JSON.stringify({ id, ...record }));
console.log("same date?", collision.date === record.date);
console.log();
continue;
}
record.id = id;
entries.set(id, record);
result.push(record);
}
}
return result;
};
if (import.meta.main) {
main().then(() => console.log("[+] done")).catch(console.error);
}
import { dataFolder, Entry, Game, games } from "./mk.ts";
const main = async () => {
const gameOnly = Deno.args.find((arg) => arg.startsWith("--game="))?.split(
"=",
)
?.[1] as Game | undefined;
if (gameOnly !== undefined && !games.includes(gameOnly)) {
console.log("Expected one of:", games);
Deno.exit(1);
}
const limit = Math.max(10, Deno.args.find((arg) => arg.startsWith("--limit="))?.split(
"=",
)
?.[1] as number | undefined ?? 0);
const rows: (Entry & { game: string })[] = [];
for (const game of games) {
if (gameOnly !== undefined && game !== gameOnly) continue;
const dataFile = `${dataFolder}/${game}.json`;
console.log("importing", game);
rows.push(...(JSON.parse(await Deno.readTextFile(dataFile)).rows as Entry[]).map((entry) => {
// deno-lint-ignore no-explicit-any
(entry as any).game = game;
// deno-lint-ignore no-explicit-any
return entry as any;
}));
}
console.log(rows.length, "records");
{
const domains = new Map<string, number>();
rows.forEach((entry) => {
const url = URL.parse(entry.note?.trim() ?? "");
if (url && url.origin !== "null") {
domains.set(url.origin, (domains.get(url.origin) ?? 0) + 1);
}
});
console.log([...domains.entries()].sort((a, b) => b[1] - a[1]));
}
{
const links = new Map<string, number>();
rows.forEach((entry) => {
const url = URL.parse(entry.note?.trim() ?? "");
if (url && url.origin !== "null") {
links.set(url.toString(), (links.get(url.toString()) ?? 0) + 1);
}
});
console.log([...links.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit));
}
{
const domains = new Map<string, number>();
rows.forEach((entry) => {
const url = URL.parse((entry.ntscVideo ?? entry.palVideo ?? entry.video)?.trim() ?? "");
if (url && url.origin !== "null") {
domains.set(url.origin, (domains.get(url.origin) ?? 0) + 1);
}
});
console.log([...domains.entries()].sort((a, b) => b[1] - a[1]));
}
{
const links = new Map<string, number>();
rows.forEach((entry) => {
const url = URL.parse((entry.ntscVideo ?? entry.palVideo ?? entry.video)?.trim() ?? "");
if (url && url.origin !== "null") {
links.set(url.toString(), (links.get(url.toString()) ?? 0) + 1);
}
});
console.log([...links.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit));
}
{
const players = new Map<string, number>();
rows.forEach((entry) => {
players.set(entry.player_id, (players.get(entry.player_id) ?? 0) + 1);
});
console.log([...players.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit));
}
{
const players = new Map<string, Set<string>>();
rows.forEach((entry) => {
const games = players.get(entry.player_id) ?? new Set();
games.add(entry.game);
players.set(entry.player_id, games);
});
console.log([...players.entries()].sort((a, b) => b[1].size - a[1].size).slice(0, limit));
}
};
if (import.meta.main) {
main().then(() => console.log("[+] done")).catch(console.error);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment