Created
July 28, 2011 22:23
-
-
Save khrona/1112719 to your computer and use it in GitHub Desktop.
Captures a web-page to a JPEG (or series of JPEGs if the height is too great). Uses Awesomium 1.6.2 (r159+) and C API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Awesomium/awesomium_capi.h> | |
#include <string.h> | |
#include <stdio.h> | |
#if defined(__WIN32__) || defined(_WIN32) | |
#include <windows.h> | |
#elif defined(__APPLE__) | |
#include <unistd.h> | |
#endif | |
// The URL to render | |
#define URL "http://en.wikipedia.org/wiki/World_Wide_Web" | |
// The base width of the web-page to render (we automatically resize | |
// to the width of the web-page but it's important that you specify | |
// this for web-pages that don't have a minimum width). | |
#define WIDTH 1000 | |
// The maximum height of each image (the page will be split | |
// up into multiple images if it is larger than this size). | |
#define MAX_IMAGE_HEIGHT 1000 | |
// Number of milliseconds to sleep during each update | |
#define SLEEP_MS 50 | |
// Whether or not we should enable Flash plugins | |
#define ENABLE_PLUGINS false | |
// Number of updates to force at the end of load (to allow | |
// Flash content to render, etc.). | |
#define FORCE_NUM_UPDATES 10 | |
// CSS script to disable scrollbars | |
#define SCROLLBAR_CSS "::-webkit-scrollbar { width: 0px; height: 0px; } " | |
// Global vars for the program | |
bool gotPageDimensions = false; | |
int cWidth = 0; | |
int cHeight = 0; | |
int cScrollY = 0; | |
// Some forward declarations: | |
void onGetScrollData(awe_webview* caller, | |
int contentWidth, | |
int contentHeight, | |
int preferredWidth, | |
int scrollX, | |
int scrollY); | |
void sleepMs(int sleepTime); | |
void updateCore(); | |
void resizeNow(awe_webview* webView, int width, int height); | |
void scrollToNow(awe_webview* webView, int y); | |
void renderTo(awe_webview* webView, const char* filename); | |
/** | |
* Main Program: Automatically renders an entire web-page to | |
* a JPEG (or series of JPEGs if the web-page is | |
* greater than MAX_IMAGE_HEIGHT). | |
*/ | |
int main() | |
{ | |
// Disable scrollbar rendering | |
awe_string* custom_css_str = awe_string_create_from_ascii( | |
SCROLLBAR_CSS, | |
strlen(SCROLLBAR_CSS)); | |
// Create our WebCore singleton with plugins enabled and our custom CSS | |
awe_webcore_initialize(ENABLE_PLUGINS, true, false, awe_string_empty(), | |
awe_string_empty(), awe_string_empty(), | |
AWE_LL_NORMAL, false, awe_string_empty(), true, | |
awe_string_empty(), awe_string_empty(), | |
awe_string_empty(), awe_string_empty(), | |
awe_string_empty(), awe_string_empty(), false, 0, | |
false, false, custom_css_str); | |
awe_string_destroy(custom_css_str); | |
/** | |
* Create a new WebView instance with a certain width and height, using | |
* the WebCore we just created. | |
*/ | |
awe_webview* webView = awe_webcore_create_webview(WIDTH, MAX_IMAGE_HEIGHT, | |
false); | |
// Bind our scroll data callback | |
awe_webview_set_callback_get_scroll_data(webView, onGetScrollData); | |
// Create our URL string | |
awe_string* url_str = awe_string_create_from_ascii(URL, strlen(URL)); | |
// Load the URL into our WebView instance | |
awe_webview_load_url(webView, url_str, awe_string_empty(), | |
awe_string_empty(), awe_string_empty()); | |
// Destroy our URL string | |
awe_string_destroy(url_str); | |
printf("Page is now loading...\n"); | |
// Wait for our WebView to finish loading | |
while(awe_webview_is_loading_page(webView)) | |
updateCore(); | |
// Force a couple updates (for Flash loading, etc.) | |
for(int i = 0; i < FORCE_NUM_UPDATES; i++) | |
updateCore(); | |
printf("Page has finished loading.\n"); | |
// Get the page dimensions now. | |
awe_webview_request_scroll_data(webView, awe_string_empty()); | |
while(!gotPageDimensions) | |
updateCore(); | |
// If our content height is larger than the max image height, | |
// we will split the render up into multiple images | |
if(cHeight > MAX_IMAGE_HEIGHT) | |
{ | |
resizeNow(webView, cWidth, MAX_IMAGE_HEIGHT); | |
int imgCount = 0; | |
for(int i = 0; i < cHeight; i += MAX_IMAGE_HEIGHT, imgCount++) | |
{ | |
if(cHeight - i < MAX_IMAGE_HEIGHT) | |
resizeNow(webView, cWidth, cHeight - i); | |
char filename[50]; | |
int len = sprintf(filename, "./result_%d.jpg", imgCount); | |
scrollToNow(webView, i); | |
renderTo(webView, filename); | |
} | |
} | |
else // Otherwise, just render it all to a single image | |
{ | |
resizeNow(webView, cWidth, cHeight); | |
renderTo(webView, "./result.jpg"); | |
} | |
// Destroy our WebView instance | |
awe_webview_destroy(webView); | |
updateCore(); | |
// Destroy our WebCore instance | |
awe_webcore_shutdown(); | |
return 0; | |
} | |
void onGetScrollData(awe_webview* caller, | |
int contentWidth, | |
int contentHeight, | |
int preferredWidth, | |
int scrollX, | |
int scrollY) | |
{ | |
cWidth = contentWidth; | |
cHeight = contentHeight; | |
cScrollY = scrollY; | |
gotPageDimensions = true; | |
} | |
// Sleep for a specified length | |
void sleepMs(int sleepTime) | |
{ | |
#if defined(__WIN32__) || defined(_WIN32) | |
Sleep(sleepTime); | |
#elif defined(__APPLE__) | |
usleep(sleepTime * 1000); | |
#endif | |
} | |
// Update the WebCore | |
void updateCore() | |
{ | |
// Sleep a little bit to let background threads work | |
sleepMs(SLEEP_MS); | |
awe_webcore_update(); | |
} | |
// Resize immediately | |
void resizeNow(awe_webview* webView, int width, int height) | |
{ | |
awe_webview_resize(webView, width, height, true, 1000); | |
while(awe_webview_is_resizing(webView)) | |
updateCore(); | |
} | |
// Scroll the page immediately | |
void scrollToNow(awe_webview* webView, int y) | |
{ | |
if(cScrollY == y) | |
return; | |
char buffer[50]; | |
int len = sprintf(buffer, "window.scrollTo(0, %d);", y); | |
awe_string* js_str = awe_string_create_from_ascii(buffer, len); | |
// We use execute_javascript_with_result to force it to run | |
// synchronously | |
awe_jsvalue* result = awe_webview_execute_javascript_with_result(webView, | |
js_str, awe_string_empty(), 1000); | |
awe_jsvalue_destroy(result); | |
awe_string_destroy(js_str); | |
// For extra measure, we'll update the scroll data now to make | |
// sure we've got the most recent copy of the page | |
gotPageDimensions = false; | |
awe_webview_request_scroll_data(webView, awe_string_empty()); | |
while(!gotPageDimensions) | |
updateCore(); | |
} | |
// Render the page to a certain JPEG filename | |
void renderTo(awe_webview* webView, const char* filename) | |
{ | |
const awe_renderbuffer* renderBuffer = awe_webview_render(webView); | |
if(renderBuffer != NULL) | |
{ | |
awe_string* filename_str = awe_string_create_from_ascii(filename, | |
strlen(filename)); | |
// Save our RenderBuffer directly to a JPEG image | |
awe_renderbuffer_save_to_jpeg(renderBuffer, filename_str, 90); | |
awe_string_destroy(filename_str); | |
printf("Saved a render of the page to %s.\n", filename); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment