Created
July 22, 2014 17:21
-
-
Save majodev/0e5e3180bdff9865fdd2 to your computer and use it in GitHub Desktop.
Capture screenshots (png) from multiple sites with phantomjs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// How to: save as capture.js and run with "phantomjs capture.js" | |
// Setup by modifying URLS, PAGE_WIDTH AND PAGE_HEIGHT constants! | |
// Hint: set PAGE_WIDTH or PAGE_HEIGHT to 0 to capture full page! | |
// modified version of script at http://www.cameronjtinker.com/post/2011/09/26/Take-Screenshot-of-all-HTML-documents-in-a-folder-using-PhantomJS.aspx | |
var PAGE_WIDTH = 960; | |
var PAGE_HEIGHT = 640; | |
var URLS = [ | |
"http://github.com", | |
"http://twitter.com" | |
]; | |
// phantomjs page object and helper flag | |
var page = require('webpage').create(), | |
loadInProgress = false, | |
pageIndex = 0; | |
// set clip and viewport based on PAGE_WIDTH and PAGE_HEIGHT constants | |
if (PAGE_WIDTH > 0 && PAGE_HEIGHT > 0) { | |
page.viewportSize = { | |
width: PAGE_WIDTH, | |
height: PAGE_HEIGHT | |
}; | |
page.clipRect = { | |
top: 0, | |
left: 0, | |
width: PAGE_WIDTH, | |
height: PAGE_HEIGHT | |
}; | |
} | |
// page handlers | |
page.onLoadStarted = function() { | |
loadInProgress = true; | |
console.log('page ' + (pageIndex + 1) + ' load started'); | |
}; | |
page.onLoadFinished = function() { | |
loadInProgress = false; | |
page.render("images/output" + (pageIndex + 1) + "_" + PAGE_WIDTH + "x" + PAGE_HEIGHT + ".png"); | |
console.log('page ' + (pageIndex + 1) + ' load finished'); | |
pageIndex++; | |
}; | |
// try to load or process a new page every 250ms | |
setInterval(function() { | |
if (!loadInProgress && pageIndex < URLS.length) { | |
console.log("image " + (pageIndex + 1)); | |
page.open(URLS[pageIndex]); | |
} | |
if (pageIndex == URLS.length) { | |
console.log("image render complete!"); | |
phantom.exit(); | |
} | |
}, 250); | |
console.log('Number of URLS: ' + URLS.length); |
Is it possible to give URLS
a sitemap.xml with all the pages you want to crawl. Instead of having to generate a list yourself?
Great code. Is there a way to have customized output file names with the domain included in the name? when making screenshots for thousands of websites it is an impossible task to associate the screenshot with the proper website/domain
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry I feel like using the setInterval function isn't the best way to implement this. You should be using callbacks.