Created
March 16, 2016 21:41
-
-
Save donnywals/44fbee6f2be4b9296f56 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var request = require('request'); | |
| var cheerio = require('cheerio'); | |
| var Promise = require('promise'); | |
| var fs = require('fs'); | |
| var url = 'http://eu.battle.net/sc2/en/game/unit/'; | |
| request(url, function(err, res, html){ | |
| if(err) { | |
| console.log('An error occurred'); | |
| console.log(error); | |
| return | |
| } | |
| var $ = cheerio.load(html); | |
| var promises = []; | |
| var races = []; | |
| $('.table-lotv .unit-datatable').each(function(i, dataTable){ | |
| var race = $(dataTable).find('.title-bar span').text(); | |
| var $units = $(dataTable).find('.databox table .button-rollover'); | |
| var urls = unitUrls($units, $); | |
| races.push(race); | |
| promises.push(scrapeUnits(urls)); | |
| }); | |
| Promise.all(promises).then(function() { | |
| var data = {}; | |
| arguments[0].forEach(function(units, i) { | |
| data[races[i]] = units; | |
| }); | |
| fs.writeFile('public/units.json', JSON.stringify(data), function(err){ | |
| if(err) { console.log(err); } | |
| }); | |
| console.log('scraping complete') | |
| }); | |
| }); | |
| function unitUrls($units, $) { | |
| var urls = []; | |
| $units.each(function(i, unit){ | |
| var onclick = $(unit).attr('onclick'); | |
| var parts = onclick.split("'"); | |
| urls.push('http://eu.battle.net/sc2/en/game/unit/'+parts[1]); | |
| }); | |
| return urls; | |
| } | |
| function scrapeUnits(unitUrls) { | |
| return new Promise(function(fulfill, reject) { | |
| var promises = []; | |
| unitUrls.forEach(function (url) { | |
| promises.push(scrapeUnit(url)); | |
| }); | |
| Promise.all(promises).then(function () { | |
| var units = []; | |
| arguments[0].forEach(function(unit) { | |
| units.push(unit); | |
| }); | |
| fulfill(units); | |
| }); | |
| }); | |
| } | |
| function scrapeUnit(url) { | |
| var unitData = {}; | |
| return new Promise(function(fulfill, reject) { | |
| request(url, function (err, res, html) { | |
| if (err) { | |
| console.log('An error occurred'); | |
| console.log(err); | |
| reject(err); | |
| return | |
| } | |
| var $ = cheerio.load(html); | |
| unitData["name"] = stripWhiteSpace($('.page-title .header-2').text()); | |
| unitData["description"] = textContents($('.outline')); | |
| var $stats = $('.unit-statistics .stat-data-lotv .statistics-content'); | |
| unitData["stats"] = {}; | |
| var $basics = $stats.find('.basic-stats table tr'); | |
| unitData["stats"]["basic_information"] = getBasicStats($basics, $); | |
| var $production = $stats.find('.production-stats table tr'); | |
| unitData["stats"]["production"] = getProductionStats($production, $); | |
| var $combat = $stats.find('.combat-stats .content table tr'); | |
| unitData["stats"]["combat"] = getCombatStats($combat, $); | |
| var $manual = $('.unit-manual'); | |
| unitData["manual"] = {}; | |
| var $tips = $manual.find('ul').first().find('li'); | |
| unitData["manual"]["tips"] = getManualTips($tips, $); | |
| var $against = $manual.find('.against div'); | |
| unitData["manual"]["against"] = getManualAgainst($against, $); | |
| var $counters = $('.countermeasures .measure-wrapper'); | |
| unitData["countermeasures"] = getCounters($counters, $); | |
| //console.log("will fulfill a unit scrape"); | |
| fulfill(unitData); | |
| }); | |
| }); | |
| } | |
| function getCounters($counters, $) { | |
| var counters = {}; | |
| var races = ["Terran", "Zerg", "Protoss"]; | |
| $counters.each(function(i, counter) { | |
| var counterList = []; | |
| $(counter).find('ul li').each(function(i, tip) { | |
| counterList.push($(tip).text()); | |
| }); | |
| counters[races[i]] = counterList; | |
| }); | |
| return counters; | |
| } | |
| function getManualAgainst($against, $) { | |
| var against = {}; | |
| $against.each(function(i, group) { | |
| var againstType = $(group).hasClass('strong') ? 'strong' : 'weak'; | |
| against[againstType] = []; | |
| $(group).find('ul li').each(function(i, unitItem) { | |
| var identifier = ""; | |
| if(typeof $(unitItem).find('a').attr('href') !== 'undefined') { | |
| identifier = $(unitItem).find('a').attr('href').replace('./', ''); | |
| } | |
| var unit = { | |
| "name": textContents($(unitItem).find('a')), | |
| "identifier": identifier | |
| }; | |
| against[againstType].push(unit); | |
| }); | |
| }); | |
| return against; | |
| } | |
| function getManualTips($tips, $) { | |
| var tips = []; | |
| $tips.each(function(i, tip) { | |
| tips.push($(tip).text()); | |
| }); | |
| return tips; | |
| } | |
| function getCombatStats($combat, $) { | |
| var stats = {}; | |
| $combat.each(function(i, stat) { | |
| var key = $(stat).find('.title').text().toLowerCase().replace(':', ''); | |
| var $stats= $(stat).find('.content'); | |
| switch(key) { | |
| case "upgrades": | |
| stats[key] = extractCombatData($stats, $); | |
| break; | |
| case "weapon": | |
| stats[key] = extractCombatData($stats, $); | |
| break; | |
| case "ability": | |
| stats[key] = extractCombatData($stats, $); | |
| break; | |
| default: | |
| break; | |
| } | |
| }); | |
| return stats; | |
| } | |
| function extractCombatData($stats, $) { | |
| var stats = []; | |
| $stats.find('.tooltip-icon-small').each(function(i, icon) { | |
| var $stat = $stats.find($(icon).attr('data-tooltip')); | |
| var data = { | |
| "title": $stat.find('.tooltip-title').text(), | |
| "description": $stat.find("p").first().text(), | |
| }; | |
| $stat.find('.unit-datatooltip-content ul li').each(function(i, item) { | |
| var key = $(item).find('strong').text().toLowerCase().replace(':', '') | |
| if(key === "cost") { | |
| var costs = textContents($(item)).split(" "); | |
| if(typeof costs === 'undefined') { | |
| data[key] = textContents($(item)).split(" "); | |
| } else { | |
| data[key] = { | |
| "minerals": costs[0], | |
| "gas": costs[2] | |
| } | |
| } | |
| } else { | |
| data[key] = textContents($(item)); | |
| } | |
| }); | |
| stats.push(data); | |
| }); | |
| return stats; | |
| } | |
| function getProductionStats($production, $) { | |
| var stats = {}; | |
| $production.each(function(i, stat){ | |
| var key = $(stat).find('.title').text().toLowerCase().replace(':', ''); | |
| var $value = $(stat).find('.content'); | |
| switch(key) { | |
| case "cost": | |
| stats[key] = { | |
| "minerals": textContents($($value.children()[0])), | |
| "gas": "0" | |
| } | |
| if($value.children().length > 1) { | |
| stats[key]["gas"] = textContents($($value.children()[1])) | |
| } | |
| break; | |
| default: | |
| stats[key] = textContents($value); | |
| break; | |
| } | |
| }); | |
| return stats; | |
| } | |
| function getBasicStats($basics, $) { | |
| var stats = {}; | |
| $basics.each(function(i, stat){ | |
| var key = $(stat).find('.title').text().toLowerCase().replace(':', ''); | |
| var $value = $(stat).find('.content'); | |
| switch(key) { | |
| case "armor": | |
| stats[key] = { | |
| "initial": textContents($value), | |
| "after_upgrades": textContents($value.find('.max-upgrade')).replace('(', '').replace(')', '') | |
| }; | |
| break; | |
| case "shield": | |
| stats[key] = { | |
| "value": $value.find('.has-tooltip').text(), | |
| "regeneration": textContents($value.find("#tooltip-shields")) | |
| }; | |
| break; | |
| case "movement": | |
| stats[key] = { | |
| "label": textContents($value.find('.has-tooltip')), | |
| "acceleration": textContents($value.find('#tooltip-movement')).split(" ")[1] | |
| }; | |
| break; | |
| case "energy": | |
| stats[key] = { | |
| "value": $value.find('.has-tooltip').text(), | |
| "regeneration": textContents($value.find('#tooltip-energy')).split(" ")[1], | |
| "starting": textContents($value.find('#tooltip-energy')).split(" ")[0] | |
| } | |
| break; | |
| case "life": | |
| if($value.find('.has-tooltip').text() != "") { | |
| stats[key] = $value.find('.has-tooltip').text(); | |
| } else { | |
| stats[key] = textContents($value); | |
| } | |
| break; | |
| default: | |
| stats[key] = textContents($value); | |
| break; | |
| } | |
| }); | |
| return stats; | |
| } | |
| function stripWhiteSpace(string) { | |
| return string.replace(/\r?\n|\r|\t/g, ""); | |
| } | |
| function textContents($element) { | |
| return stripWhiteSpace($element.contents().filter(function(){ | |
| return this.type === 'text'; | |
| }).text()).trim(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment