-
-
Save wlepinski/8783215 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Browser = require('zombie'), | |
url = require('url'), | |
fs = require('fs'), | |
$q = require('Q'), | |
saveDir = __dirname + '/_snapshots'; | |
var scriptTagRegex = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi; | |
var stripScriptTags = function(html) { | |
return html.replace(scriptTagRegex, ''); | |
} | |
var mkdirParent = function(dirPath, mode, callback) { | |
//Call the standard fs.mkdir | |
fs.mkdir(dirPath, mode, function(error) { | |
//When it fail in this way, do the custom steps | |
if (error && error.errno === 34) { | |
//Create all the parents recursively | |
fs.mkdirParent(path.dirname(dirPath), mode, callback); | |
//And then the directory | |
fs.mkdirParent(dirPath, mode, callback); | |
} | |
//Manually run the callback since we used our own callback to do all these | |
callback && callback(error); | |
}); | |
}; | |
var saveSnapshot = function(uri, body) { | |
var lastIdx = uri.lastIndexOf('#!/'); | |
if (lastIdx < 0) { | |
// If we're using html5mode | |
path = url.parse(uri).pathname; | |
} else { | |
// If we're using hashbang mode | |
path = | |
uri.substring(lastIdx + 2, uri.length); | |
} | |
if (path === '/') path = "/index.html"; | |
if (path.indexOf('.html') == -1) | |
path += ".html"; | |
var filename = saveDir + path; | |
console.log("Saving ", uri, " to ", filename); | |
var dirname = require("path").dirname(filename); | |
mkdirParent(dirname); | |
fs.open(filename, 'w', function(e, fd) { | |
if (e) return; | |
fs.write(fd, body); | |
}); | |
}; | |
var browserOpts = { | |
waitFor: "100ms", | |
loadCSS: false, | |
waitDuration: "100ms" | |
} | |
var browser = new Browser(browserOpts); | |
var crawlPage = function(idx, arr) { | |
// location = window.location | |
if (idx < arr.length) { | |
var uri = arr[idx]; | |
console.time("voy"); | |
var promise = browser.visit(uri) | |
.then(function() { | |
console.timeEnd("voy"); | |
var intervalId = setInterval(function() { | |
console.log("checking status") | |
var status = browser.body.getAttribute('data-status'); | |
console.log(status); | |
if (status === "ready") { | |
clearInterval(intervalId); | |
// Turn links into absolute links | |
// and save them, if we need to | |
// and we haven't already crawled them | |
var links = browser.queryAll('a'); | |
links.forEach(function(link) { | |
var href = link.getAttribute('href'); | |
var absUrl = url.resolve(uri, href); | |
link.setAttribute('href', absUrl); | |
if (arr.indexOf(absUrl) < 0) { | |
arr.push(absUrl); | |
} | |
}); | |
// Save | |
saveSnapshot(uri, browser.html()); | |
// Call again on the next iteration | |
crawlPage(idx+1, arr); | |
} | |
}, 500); | |
var d = $q.defer(); | |
}); | |
} | |
} | |
crawlPage(0, ["http://localhost:4000/#!/"]); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment