Last active
June 26, 2025 02:07
-
-
Save NeKzor/ef166f9d7e48690dabcd712f54f9d1b1 to your computer and use it in GitHub Desktop.
Every Mario Kart World Record form 1996-2025. Datasets available at https://www.dolthub.com/repositories/nekz/mkwrs/data/main
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// deno run -E -W=import.sh,data/ -R=data/ -N=mkwrs.com mk.ts --refresh | |
// Dumped .json files of each game | |
export const dataFolder = "./data"; | |
// Generated database import script | |
const importScript = "./import.sh"; | |
const userAgent = "ne^"; | |
// deno-lint-ignore no-explicit-any | |
let _JSDOM: any; | |
const JSDOM = async () => { | |
if (_JSDOM) return _JSDOM; | |
const { JSDOM } = await import("npm:jsdom"); | |
_JSDOM = JSDOM; | |
return _JSDOM; | |
}; | |
async function hash(value: string, length: number = 8): Promise<string> { | |
const encoder = new TextEncoder(); | |
const data = encoder.encode(value); | |
const hashBuffer = await crypto.subtle.digest("SHA-256", data); | |
const hashArray = Array.from(new Uint8Array(hashBuffer)); | |
const hashHex = hashArray.map((b) => b.toString(16).padStart(2, "0")).join( | |
"", | |
); | |
return hashHex.slice(0, length); | |
} | |
export type Track = { | |
id: string; | |
name: string; | |
category?: string; | |
}; | |
export type Entry = { | |
id: string; | |
date: string; | |
note: string; | |
ntscTime: string; | |
ntscVideo: string; | |
palTime: string; | |
palVideo: string; | |
player_id: string; | |
player_name: string; | |
player_nation: string; | |
time: string; | |
video: string; | |
ghost: string; | |
player_mii: string; | |
device: string; | |
mode: string; | |
track_id: string; | |
track_name: string; | |
track_categor: string; | |
}; | |
const baseApi = "https://mkwrs.com"; | |
const fetchOptions = { | |
headers: { | |
"User-Agent": userAgent, | |
}, | |
}; | |
export const games = [ | |
"mkworld", | |
"mk8dx", | |
"mk8", | |
"mk7", | |
"mkwii", | |
"mkds", | |
"mkdd", | |
"mksc", | |
"mk64", | |
"smk", | |
] as const; | |
export type Game = typeof games[number]; | |
const findMostColumns = (rows: Entry[]) => { | |
let [most] = rows; | |
let count = Object.keys(most).length; | |
rows.forEach((row) => { | |
const newCount = Object.keys(row).length; | |
if (newCount > count) { | |
count = newCount; | |
most = row; | |
} | |
}); | |
return most; | |
}; | |
const main = async () => { | |
const refresh = Deno.args.some((arg) => arg === "--refresh"); | |
const gameOnly = Deno.args.find((arg) => arg.startsWith("--game="))?.split( | |
"=", | |
) | |
?.[1] as Game | undefined; | |
if (gameOnly !== undefined && !games.includes(gameOnly)) { | |
console.log("Expected one of:", games); | |
Deno.exit(1); | |
} | |
Deno.writeTextFileSync(importScript, "#!/bin/bash\n", { create: true }); | |
for (const game of games) { | |
if (gameOnly !== undefined && game !== gameOnly) continue; | |
const dataFile = `${dataFolder}/${game}.json`; | |
let rows: Entry[] = []; | |
if (refresh) { | |
const tracks = await scrapeTracks(game); | |
rows = await scrapeGame(game, tracks); | |
Deno.writeTextFileSync(dataFile, JSON.stringify({ rows }, null, 4)); | |
} else { | |
console.log("importing", game); | |
rows = JSON.parse(Deno.readTextFileSync(dataFile)).rows; | |
} | |
Deno.writeTextFileSync( | |
importScript, | |
`\ndolt sql -q 'CREATE TABLE ${game} ( | |
${ | |
Object.keys(findMostColumns(rows)).map((column) => | |
` \`${column}\` varchar(255)` | |
).join(",\n") | |
} | |
)' | |
dolt table import -u ${game} ${dataFile}\n`, | |
{ | |
append: true, | |
}, | |
); | |
} | |
}; | |
const scrapeTracks = async (game: Game): Promise<Track[]> => { | |
console.log("scraping", game, "..."); | |
const route = `${baseApi}/${game}`; | |
const res = await fetch(route, fetchOptions); | |
console.log("[GET]", route, ":", res.status); | |
const text = await res.text(); | |
const dom = new (await JSDOM())(text); | |
const document = dom.window.document; | |
const tracks: Track[] = []; | |
const tables = document.querySelectorAll(".wr"); | |
console.log("found", tables.length, "tables"); | |
const isMkwii = game === "mkwii"; | |
let tableMode = false; | |
for (const table of tables) { | |
const rows = [...table.querySelectorAll("tr")] | |
.slice(1) | |
.filter((tr) => tr.parentElement.parentElement.className === "wr"); | |
console.log("found", rows.length, "rows"); | |
for (const row of rows) { | |
const columns = row.querySelectorAll("td"); | |
let track = columns[0]; | |
if (!track || !track.children[0]) { | |
continue; | |
} | |
if (track.children[0].tagName === "CENTER") { | |
track = columns[1]; | |
if (!track || !track.children[0]) { | |
continue; | |
} | |
} | |
if (track.children[0].tagName === "A" && !tableMode) { | |
const trackA = track.children[0]; | |
const name = trackA.textContent; | |
const link = trackA.getAttribute("href"); | |
if (link.startsWith("http")) { | |
continue; | |
} | |
console.log(name, link); | |
const hasId = link.startsWith("display.php?track="); | |
const id = hasId ? link.slice(18) : link; | |
let category: Track["category"]; | |
if (isMkwii) { | |
if (id.endsWith("&nsc=1")) { | |
category = "non-shortcut"; | |
} else if (!hasId) { | |
category = link.replace(/\.php$/g, ""); | |
} else { | |
category = "unrestricted"; | |
} | |
} | |
tracks.push({ | |
name, | |
id, | |
category, | |
}); | |
} else if (track.children[0].tagName === "TABLE") { | |
tableMode = true; | |
if ( | |
!(track.children[0].children[0] && | |
track.children[0].children[0].querySelectorAll("td")) | |
) { | |
continue; | |
} | |
if (track.getAttribute("colspan") !== "2") { | |
continue; | |
} | |
const [trackTd, ...tds] = [ | |
...track.children[0].children[0].querySelectorAll("td"), | |
]; | |
const name = trackTd.textContent; | |
console.log(name); | |
for (const td of tds) { | |
const link = td.firstElementChild.getAttribute("href"); | |
const category = td.firstElementChild.textContent; | |
if (link.startsWith("http")) { | |
continue; | |
} | |
console.log(link); | |
tracks.push({ | |
name, | |
id: link.startsWith("display.php?track=") ? link.slice(18) : link, | |
category, | |
}); | |
} | |
} | |
} | |
} | |
return tracks; | |
}; | |
const scrapeGame = async ( | |
gameName: Game, | |
tracks: Track[], | |
): Promise<Entry[]> => { | |
const isMkworld = gameName === "mkworld"; | |
const isMkwii = gameName === "mkwii"; | |
const isMkds = gameName === "mkds"; | |
const isMk64 = gameName === "mk64"; | |
const hasDeviceInfo = gameName === "mk8dx" || gameName === "mk8"; | |
const columnOffset = isMk64 || isMkwii ? 5 : 4; | |
console.log("scraping tracks for", gameName, "..."); | |
if (isMkds) { | |
const nonPrb = [...tracks]; | |
tracks.push( | |
...nonPrb.map((track) => ({ | |
id: track.id + "&m2", | |
name: track.name, | |
category: "non-sc", | |
})), | |
); | |
tracks.push( | |
...nonPrb.map((track) => ({ | |
id: track.id + "&m3", | |
name: track.name, | |
category: "overall", | |
})), | |
); | |
nonPrb.forEach((track) => { | |
track.id += "&m1"; | |
track.category = "non-prb"; | |
}); | |
} | |
// deno-lint-ignore no-explicit-any | |
const extractText = (element: any) => element.firstChild.textContent.trim(); | |
const entries = new Map<string, Entry>(); | |
const result: Entry[] = []; | |
let lastCategory = ""; | |
let PHPSESSID = ""; | |
for (const track of tracks) { | |
if (isMkds) { | |
if (lastCategory !== track.category) { | |
lastCategory = track.category!; | |
const res = await fetch( | |
"https://mkwrs.com/mkds/?&m=" + track.id.slice(-1), | |
); | |
PHPSESSID = | |
res.headers.get("set-cookie")?.split("=")[1]?.split(";")[0] ?? ""; | |
} | |
} | |
const hasNoId = track.id.endsWith(".php"); | |
const id = hasNoId ? track.id : `display.php?track=${track.id}`; | |
const hasSplitTable = isMkwii && hasNoId; | |
const route = `${baseApi}/${gameName}/${id}`; | |
const options = isMkds | |
? { | |
...fetchOptions, | |
headers: { | |
...fetchOptions.headers, | |
"Cookie": "PHPSESSID=" + PHPSESSID, | |
}, | |
} | |
: fetchOptions; | |
const res = await fetch(route, options); | |
console.log("[GET]", route, ":", res.status); | |
const text = await res.text(); | |
const dom = new (await JSDOM())(text); | |
const document = dom.window.document; | |
const wrs = document.querySelectorAll(".wr"); | |
const [columnRow, ...rows] = [ | |
...wrs[hasSplitTable ? 2 : 1].querySelectorAll("tr"), | |
]; | |
const columnNames = [...columnRow.querySelectorAll("th")].map((th) => | |
th.textContent | |
) | |
.slice(columnOffset) | |
.map((column) => column.replace(/ /g, "_").toLowerCase()); | |
const hasCombination = | |
columnNames[columnNames.length - 1] === "combination"; | |
let insertSchroomsAndCombination = false; | |
let record = null; | |
for (const row of rows) { | |
const allTds = [...row.querySelectorAll("td")]; | |
if (allTds.length <= 1 || allTds[1].getAttribute("colspan")) { | |
continue; | |
} | |
if (hasCombination) { | |
if (insertSchroomsAndCombination) { | |
insertSchroomsAndCombination = false; | |
record.shrooms = allTds[0].textContent; | |
if (isMkworld) { | |
record.kart = allTds[1].textContent; | |
} else { | |
record.tires = allTds[1].textContent; | |
record.glider = allTds[2].textContent; | |
} | |
continue; | |
} else { | |
insertSchroomsAndCombination = true; | |
const tds = allTds.slice(columnOffset); | |
const afterLapsColumn = columnNames.findLastIndex((column) => | |
column.startsWith("lap_") | |
) + 1; | |
record = columnNames.slice(0, afterLapsColumn).reduce( | |
(record, column, idx) => { | |
record[column] = tds[idx].textContent; | |
return record; | |
}, | |
Object.assign(Object.create(null), { id: null }), | |
); | |
if (isMkworld) { | |
record.coins = tds[afterLapsColumn].textContent; | |
record.shrooms = null; | |
record.character = tds[afterLapsColumn + 1].textContent; | |
record.kart = null; | |
} else { | |
record.coins = tds[afterLapsColumn].textContent; | |
record.shrooms = null; | |
record.character = tds[afterLapsColumn + 1].textContent; | |
record.tires = null; | |
record.kart = tds[afterLapsColumn + 2].textContent; | |
record.glider = null; | |
} | |
} | |
} else { | |
const tds = allTds.slice(columnOffset); | |
record = columnNames.reduce((record, column, idx) => { | |
record[column] = tds[idx].textContent; | |
return record; | |
}, Object.assign(Object.create(null), { id: null })); | |
} | |
if (isMk64) { | |
const [dateTd, ntscTimeTd, palTimeTd, playerTd, nationTd] = allTds; | |
record.date = extractText(dateTd); | |
record.note = dateTd.firstElementChild | |
? dateTd.firstElementChild.getAttribute("title") | |
: null; | |
record.ntscTime = extractText(ntscTimeTd); | |
record.ntscVideo = ntscTimeTd.firstElementChild | |
? ntscTimeTd.firstElementChild.getAttribute("href") | |
: null; | |
record.palTime = extractText(palTimeTd); | |
record.palVideo = palTimeTd.firstElementChild | |
? palTimeTd.firstElementChild.getAttribute("href") | |
: null; | |
record.player_id = playerTd.firstElementChild | |
? playerTd.firstElementChild.getAttribute("href").slice(19) | |
: null; | |
record.player_name = extractText(playerTd); | |
record.player_nation = nationTd.firstElementChild && | |
nationTd.firstElementChild.firstElementChild | |
? nationTd.firstElementChild.firstElementChild.getAttribute("title") | |
: null; | |
} else if (isMkwii) { | |
const [dateTd, timeTd, playerTd, miiNameTd, nationTd] = allTds; | |
const ghostTd = allTds[allTds.length - 1]; | |
record.date = extractText(dateTd); | |
record.note = dateTd.firstElementChild | |
? dateTd.firstElementChild.getAttribute("title") | |
: null; | |
record.time = extractText(timeTd); | |
record.video = timeTd.firstElementChild | |
? timeTd.firstElementChild.getAttribute("href") | |
: null; | |
record.ghost = ghostTd.firstElementChild | |
? ghostTd.firstElementChild.getAttribute("href") | |
: null; | |
record.player_id = playerTd.firstElementChild | |
? playerTd.firstElementChild.getAttribute("href").slice(19) | |
: null; | |
record.player_name = extractText(playerTd); | |
record.player_nation = nationTd.firstElementChild && | |
nationTd.firstElementChild.firstElementChild | |
? nationTd.firstElementChild.firstElementChild.getAttribute("title") | |
: null; | |
record.player_mii = miiNameTd.textContent; | |
} else if (isMkds) { | |
const [dateTd, timeTd, modeTd, playerTd, nationTd] = allTds; | |
record.date = extractText(dateTd); | |
record.note = dateTd.firstElementChild | |
? dateTd.firstElementChild.getAttribute("title") | |
: null; | |
record.time = extractText(timeTd); | |
record.video = timeTd.firstElementChild | |
? timeTd.firstElementChild.getAttribute("href") | |
: null; | |
record.mode = extractText(modeTd); | |
record.player_id = playerTd.firstElementChild | |
? playerTd.firstElementChild.getAttribute("href").slice(19) | |
: null; | |
record.player_name = extractText(playerTd); | |
record.player_nation = nationTd.firstElementChild && | |
nationTd.firstElementChild.firstElementChild | |
? nationTd.firstElementChild.firstElementChild.getAttribute("title") | |
: null; | |
} else { | |
const [dateTd, timeTd, playerTd, nationTd] = allTds; | |
record.date = extractText(dateTd); | |
record.note = dateTd.firstElementChild | |
? dateTd.firstElementChild.getAttribute("title") | |
: null; | |
record.time = extractText(timeTd); | |
record.video = timeTd.firstElementChild | |
? timeTd.firstElementChild.getAttribute("href") | |
: null; | |
if (hasDeviceInfo) { | |
record.device = timeTd.children[1] | |
? timeTd.children[1].getAttribute("title") | |
: null; | |
} | |
record.player_id = playerTd.firstElementChild | |
? playerTd.firstElementChild.getAttribute("href").slice(19) | |
: null; | |
record.player_name = extractText(playerTd); | |
record.player_nation = nationTd.firstElementChild && | |
nationTd.firstElementChild.firstElementChild | |
? nationTd.firstElementChild.firstElementChild.getAttribute("title") | |
: null; | |
} | |
record.track_id = isMkds ? track.id.slice(0, -3) : track.id; | |
record.track_name = track.name; | |
if (track.category) { | |
record.track_category = track.category; | |
} | |
if (record.player_id[record.player_id.length - 1] === "+") { | |
console.warn("Fixed player id:", record.player_id); | |
record.player_id = record.player_id.slice(0, -1); | |
} | |
if ( | |
record.player_id[0] === "+" || | |
record.player_id[record.player_id.length - 1] === "+" | |
) { | |
console.error("INVALID PLAYER NAME", record.player_id); | |
Deno.exit(1); | |
} | |
const id = await hash( | |
[ | |
isMkwii && record.track_category === "non-shortcut" | |
? record.track_id.slice(0, -6) | |
: record.track_id, | |
isMkds || isMkwii ? "" : record.track_category ?? "", | |
record.player_id, | |
isMk64 ? record.palTime : record.time, | |
].join(";"), | |
); | |
if (entries.has(id)) { | |
console.log("HASH COLLISION"); | |
const collision = entries.get(id)!; | |
console.log(JSON.stringify(collision)); | |
console.log(JSON.stringify({ id, ...record })); | |
console.log("same date?", collision.date === record.date); | |
console.log(); | |
continue; | |
} | |
record.id = id; | |
entries.set(id, record); | |
result.push(record); | |
} | |
} | |
return result; | |
}; | |
if (import.meta.main) { | |
main().then(() => console.log("[+] done")).catch(console.error); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { dataFolder, Entry, Game, games } from "./mk.ts"; | |
const main = async () => { | |
const gameOnly = Deno.args.find((arg) => arg.startsWith("--game="))?.split( | |
"=", | |
) | |
?.[1] as Game | undefined; | |
if (gameOnly !== undefined && !games.includes(gameOnly)) { | |
console.log("Expected one of:", games); | |
Deno.exit(1); | |
} | |
const limit = Math.max(10, Deno.args.find((arg) => arg.startsWith("--limit="))?.split( | |
"=", | |
) | |
?.[1] as number | undefined ?? 0); | |
const rows: (Entry & { game: string })[] = []; | |
for (const game of games) { | |
if (gameOnly !== undefined && game !== gameOnly) continue; | |
const dataFile = `${dataFolder}/${game}.json`; | |
console.log("importing", game); | |
rows.push(...(JSON.parse(await Deno.readTextFile(dataFile)).rows as Entry[]).map((entry) => { | |
// deno-lint-ignore no-explicit-any | |
(entry as any).game = game; | |
// deno-lint-ignore no-explicit-any | |
return entry as any; | |
})); | |
} | |
console.log(rows.length, "records"); | |
{ | |
const domains = new Map<string, number>(); | |
rows.forEach((entry) => { | |
const url = URL.parse(entry.note?.trim() ?? ""); | |
if (url && url.origin !== "null") { | |
domains.set(url.origin, (domains.get(url.origin) ?? 0) + 1); | |
} | |
}); | |
console.log([...domains.entries()].sort((a, b) => b[1] - a[1])); | |
} | |
{ | |
const links = new Map<string, number>(); | |
rows.forEach((entry) => { | |
const url = URL.parse(entry.note?.trim() ?? ""); | |
if (url && url.origin !== "null") { | |
links.set(url.toString(), (links.get(url.toString()) ?? 0) + 1); | |
} | |
}); | |
console.log([...links.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit)); | |
} | |
{ | |
const domains = new Map<string, number>(); | |
rows.forEach((entry) => { | |
const url = URL.parse((entry.ntscVideo ?? entry.palVideo ?? entry.video)?.trim() ?? ""); | |
if (url && url.origin !== "null") { | |
domains.set(url.origin, (domains.get(url.origin) ?? 0) + 1); | |
} | |
}); | |
console.log([...domains.entries()].sort((a, b) => b[1] - a[1])); | |
} | |
{ | |
const links = new Map<string, number>(); | |
rows.forEach((entry) => { | |
const url = URL.parse((entry.ntscVideo ?? entry.palVideo ?? entry.video)?.trim() ?? ""); | |
if (url && url.origin !== "null") { | |
links.set(url.toString(), (links.get(url.toString()) ?? 0) + 1); | |
} | |
}); | |
console.log([...links.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit)); | |
} | |
{ | |
const players = new Map<string, number>(); | |
rows.forEach((entry) => { | |
players.set(entry.player_id, (players.get(entry.player_id) ?? 0) + 1); | |
}); | |
console.log([...players.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit)); | |
} | |
{ | |
const players = new Map<string, Set<string>>(); | |
rows.forEach((entry) => { | |
const games = players.get(entry.player_id) ?? new Set(); | |
games.add(entry.game); | |
players.set(entry.player_id, games); | |
}); | |
console.log([...players.entries()].sort((a, b) => b[1].size - a[1].size).slice(0, limit)); | |
} | |
}; | |
if (import.meta.main) { | |
main().then(() => console.log("[+] done")).catch(console.error); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment