Last active
August 29, 2015 14:20
-
-
Save withinboredom/8b5e67869486dd202e6b to your computer and use it in GitHub Desktop.
a dumb js scraper using iframes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
var items = []; | |
window.inventory = []; | |
window.errors = []; | |
Array.prototype.clean = function(deleteValue) { | |
for (var i = 0; i < this.length; i++) { | |
if (this[i] == deleteValue) { | |
this.splice(i, 1); | |
i--; | |
} | |
else { | |
this[i] = this[i].trim(); | |
} | |
} | |
return this; | |
}; | |
var categories = ['subcat_964', 'subcat_73']; | |
for (var i = 0; i < categories.length; i++) { | |
$("." + categories[i]).each(function (index, tag) { | |
items.push($(tag).find('a').attr('href')); | |
}); | |
} | |
var getAll = function(index) { | |
if (index == null) { index = 0 } | |
$("<iframe class='crapper' src='" + items[index] + "'></iframe>") | |
.appendTo('body') | |
.load(function() { | |
try { | |
frames[0].Array.prototype.clean = Array.prototype.clean; | |
var obj = {}; | |
obj.name = frames[0].$('.detailtitle').text(); | |
obj.science = frames[0].$('span i').text(); | |
obj.description = frames[0].$('span p').text().trim(); | |
var deets = frames[0].$('.category-description'); | |
obj.sku = $(deets.get(0)).text().trim().split("#")[1]; | |
obj.wholesale = $(deets.get(1)).html().split("<br>")[1].split("$")[1].trim(); | |
obj.retail = $(deets.get(2)).html().split("<br>")[1].split("$")[1].trim(); | |
obj.size = $(deets.get(4)).text().trim().split(" "); | |
obj.benefits = frames[0].$(frames[0].$("#tabs li :contains(Benefits)") | |
.attr("href")) | |
.html() | |
.replace(/ /g, "") | |
.replace(/\n/g, "") | |
.replace(/\t/g, "") | |
.replace(/•/g, "") | |
.trim() | |
.split("<br>").clean(""); | |
var resources = frames[0].$(frames[0].$("#tabs li :contains(Resources)").attr("href") + " a")[1]; | |
obj.picture = typeof resources === "undefined" ? "none" : | |
$(resources).attr("href"); | |
window.inventory.push(obj); | |
} | |
catch (ex) { | |
try { | |
errors.push({ | |
item: items[index], | |
exception: ex | |
}); | |
} | |
catch (ex) { | |
console.log("failed to retrieve item for inventory"); | |
} | |
} | |
finally { | |
$('iframe').remove(); | |
} | |
var wait = 100 + Math.random(1000); | |
setTimeout(function() { | |
if (index + 1 < items.length) { | |
getAll(index + 1); | |
} | |
}, wait); | |
}); | |
}; | |
getAll(); | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment