-
-
Save grim-reapper/55c92fb9ac7bc7326444 to your computer and use it in GitHub Desktop.
PhantomJS crawler written to detect Mixed Content
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var uniqUrls = []; | |
var urlsToBrowse = []; | |
var browsedUrls = []; | |
function open(url, callback) { | |
var page = require('webpage').create(); | |
page.settings.loadImages = true; | |
page.onResourceReceived = function (response) { | |
if (response.stage == "start" && response.url.substr(0, 4) === "http" && uniqUrls.indexOf(response.url) === -1) { | |
uniqUrls.push(response.url); | |
} | |
} | |
page.open(url, function(status) { | |
browsedUrls.push(url); | |
if (status !== "success") { | |
if (url === args[1]) { | |
console.log("Couldn't open " + url); | |
phantom.exit(1); | |
} | |
else { | |
console.log("fail " + url); | |
} | |
} | |
else { | |
var uniqHrefs = page.evaluate(function() { | |
var uniqHrefs = []; | |
var l = document.links; | |
for(var i=0; i<l.length; i++) { | |
var href = l[i].getAttribute("href"); | |
if (href && href.length > 1 && href.charAt(0) == '/' && href.charAt(1) != '/') { | |
href = href.replace(/\/$/, ''); | |
if (uniqHrefs.indexOf(href) === -1) { | |
uniqHrefs.push(href); | |
} | |
} | |
} | |
return uniqHrefs; | |
}); | |
if (uniqHrefs) { | |
uniqHrefs.forEach(function(href) { | |
var url_without_path = url.split("/").slice(0,3).join("/"); | |
var urlFromHref = url_without_path + href; | |
if (browsedUrls.indexOf(urlFromHref) === -1 && urlsToBrowse.indexOf(urlFromHref) === -1) { | |
urlsToBrowse.push(urlFromHref); | |
} | |
}); | |
} | |
} | |
page.close(); | |
callback.apply(); | |
}); | |
} | |
function crawl() { | |
if (urlsToBrowse.length == 0) { | |
uniqUrls.sort(); | |
uniqUrls.forEach(function(url) { | |
console.log("uniq " + url); | |
}); | |
phantom.exit(0); | |
} | |
else { | |
var url = urlsToBrowse.shift(); | |
console.log("open " + url); | |
open(url, crawl); | |
} | |
} | |
var args = require('system').args; | |
if (args.length === 1) { | |
console.log('Please specify a URL.'); | |
phantom.exit(1); | |
} | |
urlsToBrowse.push(args[1]); | |
crawl(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment