Created
May 21, 2017 16:19
-
-
Save esayler/c35a9144fd3b673a1f6bfdecbef836ab to your computer and use it in GitHub Desktop.
nightmare.js script to scrape Orcz.com "Zelda: Breath of the Wild" Recipe/Item Data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var jsonfile = require('jsonfile') | |
var dest = 'data2.json' | |
jsonfile.spaces = 2 | |
const Nightmare = require('nightmare') | |
const nightmare = Nightmare({ show: true }) | |
nightmare | |
.goto('http://orcz.com/Breath_of_the_Wild:_Recipes') | |
.wait(2000) | |
.evaluate(function () { | |
let JSONList = [] | |
let labels = [ | |
'effects', | |
'food', | |
'critters', | |
'monster_parts', | |
'fillers', | |
'hearts', | |
'stamina', | |
'cold_resist', | |
'heat_resist', | |
'electric_resist', | |
'movement_speed', | |
'temp_maximum_hearts', | |
'defense_boost', | |
'attack_power', | |
'stealth', | |
] | |
const tables = document.querySelectorAll('table.wikitable') | |
// effects | |
const effectsHeaderNodes = document | |
.querySelectorAll('table.wikitable')[0] | |
.querySelectorAll('tr')[0] | |
.querySelectorAll('th') | |
let effectsHeaders = [] | |
for (let i = 0; i < effectsHeaderNodes.length; i++) { | |
effectsHeaders.push(effectsHeaderNodes[i].innerText.toLowerCase().split(/\.?\s+/ig).join('_')) | |
} | |
const effectsRows = document | |
.querySelectorAll('table.wikitable')[0] | |
.querySelectorAll('tr') | |
let effectsData = [] | |
for (let i = 1; i < effectsRows.length; i++) { | |
let effectsDataRow = {} | |
Object.assign(effectsDataRow, { id: i }) | |
for (let j = 0; j < effectsHeaders.length; j++) { | |
Object.assign(effectsDataRow, { | |
[effectsHeaders[j]]: effectsRows[i].querySelectorAll('td')[j] | |
.innerText, | |
}) | |
} | |
effectsData.push(effectsDataRow) | |
} | |
// materials | |
let materials = [] | |
let recipes = [] | |
let elixirs = [] | |
for (let k = 1, id = 1; k < tables.length; k++, id++) { | |
if (k === 5 || k === 15) { | |
// reset id (k = 5 begins dishes, k = 15 begins elixirs) | |
id = 1 | |
} | |
const tableNode = tables[k] | |
const headerNodes = tableNode | |
.querySelectorAll('tr')[0] | |
.querySelectorAll('th') | |
let headerTextList = [] | |
// make a text list of table column headers | |
for (let i = 0; i < headerNodes.length; i++) { | |
headerTextList.push(headerNodes[i].innerText.toLowerCase().split(/\s+/ig).join('_')) | |
} | |
const tableRows = document | |
.querySelectorAll('table.wikitable')[k] | |
.querySelectorAll('tr') | |
let tableData = [] | |
for (let r = 1; r < tableRows.length; r++, id++) { | |
let tableRowData = {} | |
Object.assign(tableRowData, { id: id }) | |
if (k > 0 && k < 15) { | |
Object.assign(tableRowData, { category: labels[k] }) | |
} | |
for (let j = 0; j < headerTextList.length; j++) { | |
let cell = tableRows[r].querySelectorAll('td')[j] | |
if (!cell) { | |
Object.assign(tableRowData, { [headerTextList[j]]: '' }) | |
} else if (headerTextList[j] === 'ingredients') { | |
let list = cell.innerText | |
let listy = list.split(/\s*\n/gi) | |
let listyy = [] | |
for (let g = 0; g < listy.length; g++) { | |
let name = listy[g] | |
let quantityData = /x(\d)/ig.exec(name) | |
let quantity = quantityData ? parseInt(quantityData[1], 10) : 1 | |
name = quantityData ? name.split(/\s*x/ig)[0] : name | |
const materialMatch = materials.find(material => material.name.toLowerCase() === name.toLowerCase()) | |
const id = materialMatch ? materialMatch.id : null | |
listyy.push({ name, quantity, id }) | |
} | |
Object.assign(tableRowData, { [headerTextList[j]]: listyy }) | |
} else if (headerTextList[j] === 'hearts_restored') { | |
let hearts = cell.innerText | |
let heartsDataOneFourth = /(\d)*\s*\u{00bc}/igu.exec(hearts) | |
let heartsDataOneHalf = /(\d)*\s*\u{00bd}/igu.exec(hearts) | |
let heartsDataThreeFourths = /(\d)*\s*\u{00be}/igu.exec(hearts) | |
let heartsNum = 0 | |
let coef = null | |
if (!heartsDataOneFourth && !heartsDataOneHalf && !heartsDataThreeFourths) { | |
heartsNum = parseInt(hearts, 10) | |
} else if (heartsDataOneFourth) { | |
coef = heartsDataOneFourth[1] | |
coef = coef ? parseInt(heartsDataOneFourth[1], 10) : null | |
heartsNum = coef ? coef + 0.25 : 0.25 | |
} else if (heartsDataOneHalf) { | |
coef = heartsDataOneHalf[1] | |
coef = coef ? parseInt(heartsDataOneHalf[1], 10) : null | |
heartsNum = coef ? coef + 0.5 : 0.5 | |
} else if (heartsDataThreeFourths) { | |
coef = heartsDataThreeFourths[1] | |
coef = coef ? parseInt(heartsDataThreeFourths[1], 10) : null | |
heartsNum = coef ? coef + 0.75 : 0.75 | |
} | |
Object.assign(tableRowData, { hearts: heartsNum }) | |
} else if (headerTextList[j] === 'duration') { | |
let duration = cell.innerText | |
let time = duration.split(/:/ig) | |
let min = parseInt(time[0], 10) | |
let sec = parseInt(time[1], 10) | |
duration = min * 60 + sec | |
Object.assign(tableRowData, { duration }) | |
} else if (k > 0 && k < 5 && headerTextList[j] === 'resale') { | |
// rename material 'resale' column to dish 'value' | |
let value = parseInt(cell.innerText, 10) | |
Object.assign(tableRowData, { value }) | |
} else if (k > 0 && k < 5 && headerTextList[j] === 'hearts') { | |
// convert material 'hearts' column from string to float | |
let hearts = parseFloat(cell.innerText) | |
Object.assign(tableRowData, { hearts }) | |
} else if (k > 4 && k < 15 && headerTextList[j] === 'food') { | |
// rename dish 'food' column to dish 'name' | |
let name = cell.innerText | |
Object.assign(tableRowData, { name }) | |
} else if (k > 4 && k < 15 && headerTextList[j] === 'sell_price') { | |
// rename dish 'sell_price' column to dish 'value' | |
let value = cell.innerText | |
value = cell.innerText | |
if (value === '') { | |
value = 0 | |
} else { | |
value = parseInt(value, 10) | |
} | |
Object.assign(tableRowData, { value }) | |
} else if (k === 15 && headerTextList[j] === 'strength') { | |
// rename elixir 'strength' column to elixir 'effect' | |
let effect = cell.innerText | |
Object.assign(tableRowData, { effect }) | |
} else if (k === 15 && headerTextList[j] === 'elixir') { | |
// rename elixir 'elixir' column to elixir 'name' | |
let name = cell.innerText | |
Object.assign(tableRowData, { name }) | |
} else { | |
Object.assign(tableRowData, { | |
[headerTextList[j]]: tableRows[r].querySelectorAll('td')[j] | |
? tableRows[r].querySelectorAll('td')[j].innerText | |
: '', | |
}) | |
} | |
} | |
if (k > 0 && k < 5) { | |
materials.push(tableRowData) | |
} else if (k > 4 && k < 15) { | |
recipes.push(tableRowData) | |
} else { | |
elixirs.push(tableRowData) | |
} | |
} | |
} | |
return { materials, effects: effectsData, recipes, elixirs } | |
}) | |
.end() | |
.then(function (result) { | |
jsonfile.writeFile(dest, result, function (err) { | |
console.error(err) | |
}) | |
console.log(result) | |
}) | |
.catch(function (error) { | |
console.error('Search failed:', error) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment