Can be used like this
node scraper.js > results.txt
cat results.txt | awk -F '\t' '{system("curl --create-dirs --retry 5 -z " $2 " -o " $2 " " $3 )}'
| var request = require("request"), | |
| cheerio = require("cheerio"), | |
| root = "http://gamemusic.siroro.co.uk/"; | |
| function process(url, dir) { | |
| request(url, function(error, response, body) { | |
| if (!error) { | |
| var $ = cheerio.load(body), | |
| filenames = $("span.file-name"), | |
| alert = $("div.alert")[0]; | |
| if (!alert) { | |
| filenames.each(function(index, element) { | |
| var name = $(element).text().replace(/^\s+/, "").replace(/\s+$/, ""), | |
| link = root + $(element).parent().parent().attr("href"), | |
| folder = $(element).children(".fa-folder")[0]; | |
| if (name == "..") {} // do nothing | |
| else if (!folder) { | |
| console.log("FOUND:\t\""+dir+name+"\"\t"+link); | |
| } else { | |
| setTimeout(process(link, dir+name+"/"), 0); | |
| } | |
| }); | |
| } | |
| } else { | |
| console.log("RESPONSE ERROR: "+error); | |
| } | |
| }); | |
| } | |
| process(root, ""); |