Skip to content

Instantly share code, notes, and snippets.

@castdrian
Created December 14, 2022 10:32
Show Gist options
  • Save castdrian/647430322e094cca8ced5d247070543f to your computer and use it in GitHub Desktop.
Save castdrian/647430322e094cca8ced5d247070543f to your computer and use it in GitHub Desktop.
deno run --allow-net --allow-write fetchBulba.mjs
import { DOMParser } from 'https://deno.land/x/deno_dom/deno-dom-wasm.ts';
async function fetchMonURLs(MON_URL) {
const res = await fetch(MON_URL);
const text = await res.text();
// get the div with the id "mw-pages"
const doc = new DOMParser().parseFromString(text, 'text/html');
const div = doc.getElementById('mw-pages');
// get all the links in the div
const links = div.getElementsByTagName('a');
const monURLs = [];
for (const item of links) {
// get the href attribute of the link
const href = item.getAttribute('href');
// check if the link is a pokemon
if (href.startsWith('/wiki/')) {
monURLs.push('https://bulbapedia.bulbagarden.net' + href);
}
}
return monURLs;
};
async function fetchBulbaData(url) {
const res = await fetch(url);
const text = await res.text();
const doc = new DOMParser().parseFromString(text, 'text/html');
return doc;
};
async function parseBulbaData(doc) {
// grab page title <title>Fuecoco (Pokémon) - Bulbapedia, the community-driven Pokémon encyclopedia</title>
const title = doc.querySelector('title').textContent;
// get the name of the pokemon
const species = title.split('(')[0].trim().toLowerCase();
// find all tables of class 'roundy'
const tables = doc.querySelectorAll('table.roundy');
// find href='/wiki/Catch_rate' in table
const catchRate = tables[0].querySelector('a[href="/wiki/Catch_rate"]');
const catchRateMerged = catchRate.parentNode.nextSibling.nextSibling.textContent;
// format catch rate 45 (11.9%) to 45 and '11.9%'
const catchRateSplit = catchRateMerged.split(' ');
const catchRateNum = parseInt(catchRateSplit[0]);
const catchRatePercent = catchRateSplit[1].slice(1, -1).replace(')', '');
const genderRatio = tables[0].querySelector('a[href="/wiki/List_of_Pok%C3%A9mon_by_gender_ratio"]');
const genderRatioMerged = genderRatio.parentNode.nextSibling.nextSibling.textContent;
// # get the percentages which look like "87.5% male, 12.5% female"
const percentages = genderRatioMerged.match(/\d+\.?\d*%/g) ?? ['0%', '0%'];
// if lentgh is 1, then we get the gender text
if (percentages.length === 1) {
// if "male" is in the text, then append 0 to the percentages array
// else insert 0 at the beginning of the array
const genderText = genderRatioMerged.match(/male|female/)[0];
switch (genderText) {
case 'male':
percentages.push('0%');
break;
case 'female':
percentages.unshift('0%');
break;
default:
break;
}
}
const breeding = tables[0].querySelector('a[href="/wiki/Pok%C3%A9mon_breeding"]');
const mergedSteps = breeding.parentNode.nextSibling.nextSibling.textContent;
const noGroup = tables[0].querySelector('a[href="/wiki/No_Eggs_Discovered_(Egg_Group)"]');
let isEggObtainable = null;
// if noGroup is not null or if mergedSteps contains "Egg not obtainable" then return false
if (noGroup || mergedSteps.includes('Egg not obtainable')) isEggObtainable = false;
else isEggObtainable = true;
const minimumHatchTime = parseInt(mergedSteps.match(/\d+/)[0]);
const evTable = tables[0].querySelector('a[href="/wiki/List_of_Pok%C3%A9mon_by_effort_value_yield"]');
const mergedYields = evTable.parentNode.nextSibling.nextSibling.textContent;
// find the first 7 numbers
const evYields = mergedYields.match(/\d+/g).slice(0, 7);
// remove the first one
evYields.shift();
// find second instance of href="/wiki/Experience"
const expTable = tables[0].querySelectorAll('a[href="/wiki/Experience"]')[1];
const levelingRate = expTable.parentNode.nextSibling.nextSibling.textContent.trim();
const monData = {
species,
genderRatio: { male: percentages[0], female: percentages[1] },
evYields: {
hp: parseInt(evYields[0]),
atk: parseInt(evYields[1]),
def: parseInt(evYields[2]),
spa: parseInt(evYields[3]),
spd: parseInt(evYields[4]),
spe: parseInt(evYields[5])
},
isEggObtainable,
catchRate: {
base: catchRateNum,
percentageWithOrdinaryPokeballAtFullHealth: catchRatePercent
},
minimumHatchTime,
levelingRate,
};
return monData;
};
async function main() {
const GEN_IX_URL = 'https://bulbapedia.bulbagarden.net/wiki/Category:Generation_IX_Pok%C3%A9mon';
const monURLs = await fetchMonURLs(GEN_IX_URL);
const monData = [];
for (const url of monURLs) {
const doc = await fetchBulbaData(url);
const data = await parseBulbaData(doc);
monData.push(data);
console.log(`Successfully processed ${data.species} (${monData.length}/${monURLs.length})`);
}
// write to file
const json = JSON.stringify(monData, null, 2);
await Deno.writeTextFile('partialBulbaData.json', json);
console.log('done');
}
await main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment