Skip to content

Instantly share code, notes, and snippets.

@donnywals
Created March 16, 2016 21:41
Show Gist options
  • Save donnywals/44fbee6f2be4b9296f56 to your computer and use it in GitHub Desktop.
Save donnywals/44fbee6f2be4b9296f56 to your computer and use it in GitHub Desktop.
var request = require('request');
var cheerio = require('cheerio');
var Promise = require('promise');
var fs = require('fs');
var url = 'http://eu.battle.net/sc2/en/game/unit/';
request(url, function(err, res, html){
if(err) {
console.log('An error occurred');
console.log(error);
return
}
var $ = cheerio.load(html);
var promises = [];
var races = [];
$('.table-lotv .unit-datatable').each(function(i, dataTable){
var race = $(dataTable).find('.title-bar span').text();
var $units = $(dataTable).find('.databox table .button-rollover');
var urls = unitUrls($units, $);
races.push(race);
promises.push(scrapeUnits(urls));
});
Promise.all(promises).then(function() {
var data = {};
arguments[0].forEach(function(units, i) {
data[races[i]] = units;
});
fs.writeFile('public/units.json', JSON.stringify(data), function(err){
if(err) { console.log(err); }
});
console.log('scraping complete')
});
});
function unitUrls($units, $) {
var urls = [];
$units.each(function(i, unit){
var onclick = $(unit).attr('onclick');
var parts = onclick.split("'");
urls.push('http://eu.battle.net/sc2/en/game/unit/'+parts[1]);
});
return urls;
}
function scrapeUnits(unitUrls) {
return new Promise(function(fulfill, reject) {
var promises = [];
unitUrls.forEach(function (url) {
promises.push(scrapeUnit(url));
});
Promise.all(promises).then(function () {
var units = [];
arguments[0].forEach(function(unit) {
units.push(unit);
});
fulfill(units);
});
});
}
function scrapeUnit(url) {
var unitData = {};
return new Promise(function(fulfill, reject) {
request(url, function (err, res, html) {
if (err) {
console.log('An error occurred');
console.log(err);
reject(err);
return
}
var $ = cheerio.load(html);
unitData["name"] = stripWhiteSpace($('.page-title .header-2').text());
unitData["description"] = textContents($('.outline'));
var $stats = $('.unit-statistics .stat-data-lotv .statistics-content');
unitData["stats"] = {};
var $basics = $stats.find('.basic-stats table tr');
unitData["stats"]["basic_information"] = getBasicStats($basics, $);
var $production = $stats.find('.production-stats table tr');
unitData["stats"]["production"] = getProductionStats($production, $);
var $combat = $stats.find('.combat-stats .content table tr');
unitData["stats"]["combat"] = getCombatStats($combat, $);
var $manual = $('.unit-manual');
unitData["manual"] = {};
var $tips = $manual.find('ul').first().find('li');
unitData["manual"]["tips"] = getManualTips($tips, $);
var $against = $manual.find('.against div');
unitData["manual"]["against"] = getManualAgainst($against, $);
var $counters = $('.countermeasures .measure-wrapper');
unitData["countermeasures"] = getCounters($counters, $);
//console.log("will fulfill a unit scrape");
fulfill(unitData);
});
});
}
function getCounters($counters, $) {
var counters = {};
var races = ["Terran", "Zerg", "Protoss"];
$counters.each(function(i, counter) {
var counterList = [];
$(counter).find('ul li').each(function(i, tip) {
counterList.push($(tip).text());
});
counters[races[i]] = counterList;
});
return counters;
}
function getManualAgainst($against, $) {
var against = {};
$against.each(function(i, group) {
var againstType = $(group).hasClass('strong') ? 'strong' : 'weak';
against[againstType] = [];
$(group).find('ul li').each(function(i, unitItem) {
var identifier = "";
if(typeof $(unitItem).find('a').attr('href') !== 'undefined') {
identifier = $(unitItem).find('a').attr('href').replace('./', '');
}
var unit = {
"name": textContents($(unitItem).find('a')),
"identifier": identifier
};
against[againstType].push(unit);
});
});
return against;
}
function getManualTips($tips, $) {
var tips = [];
$tips.each(function(i, tip) {
tips.push($(tip).text());
});
return tips;
}
function getCombatStats($combat, $) {
var stats = {};
$combat.each(function(i, stat) {
var key = $(stat).find('.title').text().toLowerCase().replace(':', '');
var $stats= $(stat).find('.content');
switch(key) {
case "upgrades":
stats[key] = extractCombatData($stats, $);
break;
case "weapon":
stats[key] = extractCombatData($stats, $);
break;
case "ability":
stats[key] = extractCombatData($stats, $);
break;
default:
break;
}
});
return stats;
}
function extractCombatData($stats, $) {
var stats = [];
$stats.find('.tooltip-icon-small').each(function(i, icon) {
var $stat = $stats.find($(icon).attr('data-tooltip'));
var data = {
"title": $stat.find('.tooltip-title').text(),
"description": $stat.find("p").first().text(),
};
$stat.find('.unit-datatooltip-content ul li').each(function(i, item) {
var key = $(item).find('strong').text().toLowerCase().replace(':', '')
if(key === "cost") {
var costs = textContents($(item)).split(" ");
if(typeof costs === 'undefined') {
data[key] = textContents($(item)).split(" ");
} else {
data[key] = {
"minerals": costs[0],
"gas": costs[2]
}
}
} else {
data[key] = textContents($(item));
}
});
stats.push(data);
});
return stats;
}
function getProductionStats($production, $) {
var stats = {};
$production.each(function(i, stat){
var key = $(stat).find('.title').text().toLowerCase().replace(':', '');
var $value = $(stat).find('.content');
switch(key) {
case "cost":
stats[key] = {
"minerals": textContents($($value.children()[0])),
"gas": "0"
}
if($value.children().length > 1) {
stats[key]["gas"] = textContents($($value.children()[1]))
}
break;
default:
stats[key] = textContents($value);
break;
}
});
return stats;
}
function getBasicStats($basics, $) {
var stats = {};
$basics.each(function(i, stat){
var key = $(stat).find('.title').text().toLowerCase().replace(':', '');
var $value = $(stat).find('.content');
switch(key) {
case "armor":
stats[key] = {
"initial": textContents($value),
"after_upgrades": textContents($value.find('.max-upgrade')).replace('(', '').replace(')', '')
};
break;
case "shield":
stats[key] = {
"value": $value.find('.has-tooltip').text(),
"regeneration": textContents($value.find("#tooltip-shields"))
};
break;
case "movement":
stats[key] = {
"label": textContents($value.find('.has-tooltip')),
"acceleration": textContents($value.find('#tooltip-movement')).split(" ")[1]
};
break;
case "energy":
stats[key] = {
"value": $value.find('.has-tooltip').text(),
"regeneration": textContents($value.find('#tooltip-energy')).split(" ")[1],
"starting": textContents($value.find('#tooltip-energy')).split(" ")[0]
}
break;
case "life":
if($value.find('.has-tooltip').text() != "") {
stats[key] = $value.find('.has-tooltip').text();
} else {
stats[key] = textContents($value);
}
break;
default:
stats[key] = textContents($value);
break;
}
});
return stats;
}
function stripWhiteSpace(string) {
return string.replace(/\r?\n|\r|\t/g, "");
}
function textContents($element) {
return stripWhiteSpace($element.contents().filter(function(){
return this.type === 'text';
}).text()).trim();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment