Created
December 14, 2022 10:32
-
-
Save castdrian/647430322e094cca8ced5d247070543f to your computer and use it in GitHub Desktop.
deno run --allow-net --allow-write fetchBulba.mjs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { DOMParser } from 'https://deno.land/x/deno_dom/deno-dom-wasm.ts'; | |
async function fetchMonURLs(MON_URL) { | |
const res = await fetch(MON_URL); | |
const text = await res.text(); | |
// get the div with the id "mw-pages" | |
const doc = new DOMParser().parseFromString(text, 'text/html'); | |
const div = doc.getElementById('mw-pages'); | |
// get all the links in the div | |
const links = div.getElementsByTagName('a'); | |
const monURLs = []; | |
for (const item of links) { | |
// get the href attribute of the link | |
const href = item.getAttribute('href'); | |
// check if the link is a pokemon | |
if (href.startsWith('/wiki/')) { | |
monURLs.push('https://bulbapedia.bulbagarden.net' + href); | |
} | |
} | |
return monURLs; | |
}; | |
async function fetchBulbaData(url) { | |
const res = await fetch(url); | |
const text = await res.text(); | |
const doc = new DOMParser().parseFromString(text, 'text/html'); | |
return doc; | |
}; | |
async function parseBulbaData(doc) { | |
// grab page title <title>Fuecoco (Pokémon) - Bulbapedia, the community-driven Pokémon encyclopedia</title> | |
const title = doc.querySelector('title').textContent; | |
// get the name of the pokemon | |
const species = title.split('(')[0].trim().toLowerCase(); | |
// find all tables of class 'roundy' | |
const tables = doc.querySelectorAll('table.roundy'); | |
// find href='/wiki/Catch_rate' in table | |
const catchRate = tables[0].querySelector('a[href="/wiki/Catch_rate"]'); | |
const catchRateMerged = catchRate.parentNode.nextSibling.nextSibling.textContent; | |
// format catch rate 45 (11.9%) to 45 and '11.9%' | |
const catchRateSplit = catchRateMerged.split(' '); | |
const catchRateNum = parseInt(catchRateSplit[0]); | |
const catchRatePercent = catchRateSplit[1].slice(1, -1).replace(')', ''); | |
const genderRatio = tables[0].querySelector('a[href="/wiki/List_of_Pok%C3%A9mon_by_gender_ratio"]'); | |
const genderRatioMerged = genderRatio.parentNode.nextSibling.nextSibling.textContent; | |
// # get the percentages which look like "87.5% male, 12.5% female" | |
const percentages = genderRatioMerged.match(/\d+\.?\d*%/g) ?? ['0%', '0%']; | |
// if lentgh is 1, then we get the gender text | |
if (percentages.length === 1) { | |
// if "male" is in the text, then append 0 to the percentages array | |
// else insert 0 at the beginning of the array | |
const genderText = genderRatioMerged.match(/male|female/)[0]; | |
switch (genderText) { | |
case 'male': | |
percentages.push('0%'); | |
break; | |
case 'female': | |
percentages.unshift('0%'); | |
break; | |
default: | |
break; | |
} | |
} | |
const breeding = tables[0].querySelector('a[href="/wiki/Pok%C3%A9mon_breeding"]'); | |
const mergedSteps = breeding.parentNode.nextSibling.nextSibling.textContent; | |
const noGroup = tables[0].querySelector('a[href="/wiki/No_Eggs_Discovered_(Egg_Group)"]'); | |
let isEggObtainable = null; | |
// if noGroup is not null or if mergedSteps contains "Egg not obtainable" then return false | |
if (noGroup || mergedSteps.includes('Egg not obtainable')) isEggObtainable = false; | |
else isEggObtainable = true; | |
const minimumHatchTime = parseInt(mergedSteps.match(/\d+/)[0]); | |
const evTable = tables[0].querySelector('a[href="/wiki/List_of_Pok%C3%A9mon_by_effort_value_yield"]'); | |
const mergedYields = evTable.parentNode.nextSibling.nextSibling.textContent; | |
// find the first 7 numbers | |
const evYields = mergedYields.match(/\d+/g).slice(0, 7); | |
// remove the first one | |
evYields.shift(); | |
// find second instance of href="/wiki/Experience" | |
const expTable = tables[0].querySelectorAll('a[href="/wiki/Experience"]')[1]; | |
const levelingRate = expTable.parentNode.nextSibling.nextSibling.textContent.trim(); | |
const monData = { | |
species, | |
genderRatio: { male: percentages[0], female: percentages[1] }, | |
evYields: { | |
hp: parseInt(evYields[0]), | |
atk: parseInt(evYields[1]), | |
def: parseInt(evYields[2]), | |
spa: parseInt(evYields[3]), | |
spd: parseInt(evYields[4]), | |
spe: parseInt(evYields[5]) | |
}, | |
isEggObtainable, | |
catchRate: { | |
base: catchRateNum, | |
percentageWithOrdinaryPokeballAtFullHealth: catchRatePercent | |
}, | |
minimumHatchTime, | |
levelingRate, | |
}; | |
return monData; | |
}; | |
async function main() { | |
const GEN_IX_URL = 'https://bulbapedia.bulbagarden.net/wiki/Category:Generation_IX_Pok%C3%A9mon'; | |
const monURLs = await fetchMonURLs(GEN_IX_URL); | |
const monData = []; | |
for (const url of monURLs) { | |
const doc = await fetchBulbaData(url); | |
const data = await parseBulbaData(doc); | |
monData.push(data); | |
console.log(`Successfully processed ${data.species} (${monData.length}/${monURLs.length})`); | |
} | |
// write to file | |
const json = JSON.stringify(monData, null, 2); | |
await Deno.writeTextFile('partialBulbaData.json', json); | |
console.log('done'); | |
} | |
await main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment