Skip to content

Instantly share code, notes, and snippets.

@esayler
Created May 21, 2017 16:19
Show Gist options
  • Save esayler/c35a9144fd3b673a1f6bfdecbef836ab to your computer and use it in GitHub Desktop.
Save esayler/c35a9144fd3b673a1f6bfdecbef836ab to your computer and use it in GitHub Desktop.
nightmare.js script to scrape Orcz.com "Zelda: Breath of the Wild" Recipe/Item Data
var jsonfile = require('jsonfile')
var dest = 'data2.json'
jsonfile.spaces = 2
const Nightmare = require('nightmare')
const nightmare = Nightmare({ show: true })
nightmare
.goto('http://orcz.com/Breath_of_the_Wild:_Recipes')
.wait(2000)
.evaluate(function () {
let JSONList = []
let labels = [
'effects',
'food',
'critters',
'monster_parts',
'fillers',
'hearts',
'stamina',
'cold_resist',
'heat_resist',
'electric_resist',
'movement_speed',
'temp_maximum_hearts',
'defense_boost',
'attack_power',
'stealth',
]
const tables = document.querySelectorAll('table.wikitable')
// effects
const effectsHeaderNodes = document
.querySelectorAll('table.wikitable')[0]
.querySelectorAll('tr')[0]
.querySelectorAll('th')
let effectsHeaders = []
for (let i = 0; i < effectsHeaderNodes.length; i++) {
effectsHeaders.push(effectsHeaderNodes[i].innerText.toLowerCase().split(/\.?\s+/ig).join('_'))
}
const effectsRows = document
.querySelectorAll('table.wikitable')[0]
.querySelectorAll('tr')
let effectsData = []
for (let i = 1; i < effectsRows.length; i++) {
let effectsDataRow = {}
Object.assign(effectsDataRow, { id: i })
for (let j = 0; j < effectsHeaders.length; j++) {
Object.assign(effectsDataRow, {
[effectsHeaders[j]]: effectsRows[i].querySelectorAll('td')[j]
.innerText,
})
}
effectsData.push(effectsDataRow)
}
// materials
let materials = []
let recipes = []
let elixirs = []
for (let k = 1, id = 1; k < tables.length; k++, id++) {
if (k === 5 || k === 15) {
// reset id (k = 5 begins dishes, k = 15 begins elixirs)
id = 1
}
const tableNode = tables[k]
const headerNodes = tableNode
.querySelectorAll('tr')[0]
.querySelectorAll('th')
let headerTextList = []
// make a text list of table column headers
for (let i = 0; i < headerNodes.length; i++) {
headerTextList.push(headerNodes[i].innerText.toLowerCase().split(/\s+/ig).join('_'))
}
const tableRows = document
.querySelectorAll('table.wikitable')[k]
.querySelectorAll('tr')
let tableData = []
for (let r = 1; r < tableRows.length; r++, id++) {
let tableRowData = {}
Object.assign(tableRowData, { id: id })
if (k > 0 && k < 15) {
Object.assign(tableRowData, { category: labels[k] })
}
for (let j = 0; j < headerTextList.length; j++) {
let cell = tableRows[r].querySelectorAll('td')[j]
if (!cell) {
Object.assign(tableRowData, { [headerTextList[j]]: '' })
} else if (headerTextList[j] === 'ingredients') {
let list = cell.innerText
let listy = list.split(/\s*\n/gi)
let listyy = []
for (let g = 0; g < listy.length; g++) {
let name = listy[g]
let quantityData = /x(\d)/ig.exec(name)
let quantity = quantityData ? parseInt(quantityData[1], 10) : 1
name = quantityData ? name.split(/\s*x/ig)[0] : name
const materialMatch = materials.find(material => material.name.toLowerCase() === name.toLowerCase())
const id = materialMatch ? materialMatch.id : null
listyy.push({ name, quantity, id })
}
Object.assign(tableRowData, { [headerTextList[j]]: listyy })
} else if (headerTextList[j] === 'hearts_restored') {
let hearts = cell.innerText
let heartsDataOneFourth = /(\d)*\s*\u{00bc}/igu.exec(hearts)
let heartsDataOneHalf = /(\d)*\s*\u{00bd}/igu.exec(hearts)
let heartsDataThreeFourths = /(\d)*\s*\u{00be}/igu.exec(hearts)
let heartsNum = 0
let coef = null
if (!heartsDataOneFourth && !heartsDataOneHalf && !heartsDataThreeFourths) {
heartsNum = parseInt(hearts, 10)
} else if (heartsDataOneFourth) {
coef = heartsDataOneFourth[1]
coef = coef ? parseInt(heartsDataOneFourth[1], 10) : null
heartsNum = coef ? coef + 0.25 : 0.25
} else if (heartsDataOneHalf) {
coef = heartsDataOneHalf[1]
coef = coef ? parseInt(heartsDataOneHalf[1], 10) : null
heartsNum = coef ? coef + 0.5 : 0.5
} else if (heartsDataThreeFourths) {
coef = heartsDataThreeFourths[1]
coef = coef ? parseInt(heartsDataThreeFourths[1], 10) : null
heartsNum = coef ? coef + 0.75 : 0.75
}
Object.assign(tableRowData, { hearts: heartsNum })
} else if (headerTextList[j] === 'duration') {
let duration = cell.innerText
let time = duration.split(/:/ig)
let min = parseInt(time[0], 10)
let sec = parseInt(time[1], 10)
duration = min * 60 + sec
Object.assign(tableRowData, { duration })
} else if (k > 0 && k < 5 && headerTextList[j] === 'resale') {
// rename material 'resale' column to dish 'value'
let value = parseInt(cell.innerText, 10)
Object.assign(tableRowData, { value })
} else if (k > 0 && k < 5 && headerTextList[j] === 'hearts') {
// convert material 'hearts' column from string to float
let hearts = parseFloat(cell.innerText)
Object.assign(tableRowData, { hearts })
} else if (k > 4 && k < 15 && headerTextList[j] === 'food') {
// rename dish 'food' column to dish 'name'
let name = cell.innerText
Object.assign(tableRowData, { name })
} else if (k > 4 && k < 15 && headerTextList[j] === 'sell_price') {
// rename dish 'sell_price' column to dish 'value'
let value = cell.innerText
value = cell.innerText
if (value === '') {
value = 0
} else {
value = parseInt(value, 10)
}
Object.assign(tableRowData, { value })
} else if (k === 15 && headerTextList[j] === 'strength') {
// rename elixir 'strength' column to elixir 'effect'
let effect = cell.innerText
Object.assign(tableRowData, { effect })
} else if (k === 15 && headerTextList[j] === 'elixir') {
// rename elixir 'elixir' column to elixir 'name'
let name = cell.innerText
Object.assign(tableRowData, { name })
} else {
Object.assign(tableRowData, {
[headerTextList[j]]: tableRows[r].querySelectorAll('td')[j]
? tableRows[r].querySelectorAll('td')[j].innerText
: '',
})
}
}
if (k > 0 && k < 5) {
materials.push(tableRowData)
} else if (k > 4 && k < 15) {
recipes.push(tableRowData)
} else {
elixirs.push(tableRowData)
}
}
}
return { materials, effects: effectsData, recipes, elixirs }
})
.end()
.then(function (result) {
jsonfile.writeFile(dest, result, function (err) {
console.error(err)
})
console.log(result)
})
.catch(function (error) {
console.error('Search failed:', error)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment