Last active
December 10, 2015 19:29
-
-
Save jlewin/4482124 to your computer and use it in GitHub Desktop.
Using the FileSystem/File API to dump page contents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Customized version of the FileSystem API examples described at http://www.html5rocks.com/en/tutorials/file/filesystem/ | |
// - Primarily used to dump the microdata example content protrayed in http://www.w3.org/html/wg/drafts/microdata/master/ | |
// - Runs on any page containing one or more pre elements, but assumes that jQuery has been loaded as either part of the | |
// page or after the fact via jQuerify. | |
// - On Windows 8, the resulting files appear to be created in "%LOCALAPPDATA%\Google\Chrome\User Data\Default\File System" | |
// Example: C:\Users\<userName>\AppData\Local\Google\Chrome\User Data\Default\File System\001\p\00 | |
// Define the success callback for webkitRequestFileSystem() | |
function onInitFs(fs) { | |
// Generate unique file names | |
var i = 0; | |
// Grab each pre element and write its contents to disk | |
$('pre').each(function () { | |
var content = $(this).text(), | |
fileName = 'preContent' + i++ + '.txt'; | |
fs.root.getFile(fileName, { create: true }, function (fileEntry) { | |
// Create a FileWriter object for our FileEntry | |
fileEntry.createWriter(function (fileWriter) { | |
fileWriter.onwriteend = function (e) { | |
console.log('Write completed.'); | |
}; | |
fileWriter.onerror = function (e) { | |
console.log('Write failed: ' + e.toString()); | |
}; | |
// Create a new Blob and write to the output stream/FileWriter | |
fileWriter.write(new Blob([content], { type: "text/plain" })); | |
}, errorHandler); | |
}, errorHandler); | |
}); | |
} | |
// Request persistent storage and when allowed, call into webkitRequestFileSystem and thus onInitFS | |
window.webkitStorageInfo.requestQuota(PERSISTENT, 1024 * 1024, function (grantedBytes) { | |
window.webkitRequestFileSystem(PERSISTENT, grantedBytes, onInitFs, errorHandler); | |
}, errorHandler); | |
function errorHandler(e) { | |
var msg = ''; | |
switch (e.code) { | |
case FileError.QUOTA_EXCEEDED_ERR: | |
msg = 'QUOTA_EXCEEDED_ERR'; | |
break; | |
case FileError.NOT_FOUND_ERR: | |
msg = 'NOT_FOUND_ERR'; | |
break; | |
case FileError.SECURITY_ERR: | |
msg = 'SECURITY_ERR'; | |
break; | |
case FileError.INVALID_MODIFICATION_ERR: | |
msg = 'INVALID_MODIFICATION_ERR'; | |
break; | |
case FileError.INVALID_STATE_ERR: | |
msg = 'INVALID_STATE_ERR'; | |
break; | |
default: | |
msg = 'Unknown Error'; | |
break; | |
}; | |
console.log('Error: ' + msg); | |
} |
Whoops. It looks like the reason the content was HtmlEncoded is due to the fact that I was calling .html() on pre elements rather than .text(). It's not entirely obvious that .text() on pre is what is needed to extract the unadulterated html but at least I've finally made sense of the confusing behavior and resolved the mix-up.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Unexpectedly, the content written to disk is HtmlEncoded. Haven't figured out where in the specs this behavior is documented and switching to text/html doesn't have an impact on the writer