Created
November 1, 2010 13:22
-
-
Save teramako/658148 to your computer and use it in GitHub Desktop.
iframeにテキストを読み込ませてHTMLパースを行う
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* HTML テキストをパースするサンプル | |
* @see http://mxr.mozilla.org/mozilla/source/browser/components/microsummaries/src/nsMicrosummaryService.js | |
* | |
* スクリプトや画像を読み込まず、パースのみをするはず。 | |
* 当然ながら XPCOM の使用特権(Chrome特権)が必要 | |
*/ | |
/* | |
// Sample 1 | |
httpGet("http://example.com", | |
function success(doc){ ....; }, | |
function error(xhr) { ....; } | |
); | |
// Sample 2 | |
parse ( | |
<><![CDATA[ | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<title>hoge</title> | |
</head> | |
<body> | |
<p>hoge<br>foo<br>bar</p> | |
</body> | |
</html> | |
]]><>.toString(), | |
"http://example.com/", | |
function(doc){ | |
alert((new XMLSerializer).serializeToString(doc)); | |
}); | |
*/ | |
function httpGet (url, loadCallback, errorCallback) { | |
var xhr = new XMLHttpRequest(); | |
xhr.mozBackgroundRequest = true; | |
xhr.open("GET", url, true); | |
xhr.onreadystatechange = function (){ | |
if (xhr.readyState == 4) { | |
if (xhr.status == 200) { | |
if (xhr.responseXML){ | |
loadCallback(xhr.responseXML); | |
} else { | |
htmlParse(xhr.responseText, url, loadCallback); | |
} | |
} else if (errorCallback) { | |
errorCallback(xhr); | |
} | |
} | |
}; | |
xhr.send(null); | |
} | |
function ensureURI (uri) { | |
if (typeof uri == "string") { | |
uri = Cc["@mozilla.org/network/io-service;1"].getService(Ci.nsIIOService).newURI(uri, null, null); | |
} | |
if (!(uri.schemeIs("http") || uri.schemeIs("https") || uri.schemeIs("file"))) | |
throw Cr.NS_ERROR_DOM_BAD_URI; | |
return uri; | |
} | |
function htmlParse (htmlText, uri, callback) { | |
uri = ensureURI (uri); | |
var iframe = document.createElement("iframe"); | |
iframe.setAttribute("collapsed", true); | |
iframe.setAttribute("type", "content"); | |
document.documentElement.appendChild(iframe); | |
var webNav = iframe.docShell.QueryInterface(Ci.nsIWebNavigation); | |
webNav.stop(Ci.nsIWebNavigation.STOP_NETWORK); | |
iframe.docShell.allowJavascript = false; | |
iframe.docShell.allowDNSPrefetch = false; | |
iframe.docShell.allowAuth = false; | |
iframe.docShell.allowPlugins = false; | |
iframe.docShell.allowMetaRedirects = false; | |
iframe.docShell.allowImages = false; | |
iframe.docShell.allowSubframes = false; | |
function parseHandler (event) { | |
event.target.removeEventListener("DOMContentLoaded", arguments.callee, true); | |
try { | |
callback(iframe.contentDocument.wrappedJSObject); | |
} finally { | |
document.documentElement.removeChild(iframe); | |
} | |
} | |
var converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"] | |
.createInstance(Ci.nsIScriptableUnicodeConverter); | |
converter.charset = "UTF-8"; | |
var stream = converter.convertToInputStream(htmlText); | |
var channel = Cc["@mozilla.org/network/input-stream-channel;1"] | |
.createInstance(Ci.nsIInputStreamChannel); | |
channel.setURI(uri); | |
channel.contentStream = stream; | |
var request = channel.QueryInterface(Ci.nsIRequest); | |
request.loadFlags |= Ci.nsIRequest.LOAD_BACKGROUND; | |
var baseChannel = channel.QueryInterface(Ci.nsIChannel); | |
baseChannel.contentType = "text/html"; | |
baseChannel.contentCharset = "UTF-8"; | |
iframe.addEventListener("DOMContentLoaded", parseHandler, true); | |
var uriLoader = Cc["@mozilla.org/uriloader;1"].getService(Ci.nsIURILoader); | |
uriLoader.openURI(channel, true, iframe.docShell); | |
} | |
// vim: sw=2 ts=2 et: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* HTML テキストをパースするサンプル | |
* nsIChannelを生成して直接開くバージョン | |
* @see http://mxr.mozilla.org/mozilla/source/browser/components/microsummaries/src/nsMicrosummaryService.js | |
* | |
* スクリプトや画像を読み込まず、パースのみをするはず。 | |
* 当然ながら XPCOM の使用特権(Chrome特権)が必要 | |
*/ | |
const IOService = Cc["@mozilla.org/network/io-service;1"].getService(Ci.nsIIOService); | |
const URILoader = Cc["@mozilla.org/uriloader;1"].getService(Ci.nsIURILoader); | |
function httpGet (url, callback, ) { | |
htmlParse(getHttpChannel(url), callback); | |
} | |
function getHttpChannel (url) { | |
var channel = (url instanceof Ci.nsIURI) ? | |
IOService.newChannelFromURI(url) : | |
IOService.newChannel(url, null,null); | |
var uri = channel.URI; | |
if (uri.schemeIs("file")) | |
return channel.QueryInterface(Ci.nsIFileChannel); | |
else if (uri.schemeIs("http") || uri.schemeIs("https")) | |
return channel.QueryInterface(Ci.nsIHttpChannel); | |
else | |
throw Cr.NS_ERROR_DOM_BAD_URI; | |
} | |
function htmlParse (channel, callback) { | |
var iframe = document.createElement("iframe"); | |
iframe.setAttribute("collapsed", true); | |
iframe.setAttribute("type", "content"); | |
document.documentElement.appendChild(iframe); | |
var webNav = iframe.docShell.QueryInterface(Ci.nsIWebNavigation); | |
webNav.stop(Ci.nsIWebNavigation.STOP_NETWORK); | |
iframe.docShell.allowJavascript = false; | |
iframe.docShell.allowDNSPrefetch = false; | |
iframe.docShell.allowAuth = false; | |
iframe.docShell.allowPlugins = false; | |
iframe.docShell.allowMetaRedirects = false; | |
iframe.docShell.allowImages = false; | |
iframe.docShell.allowSubframes = false; | |
function parseHandler (event) { | |
event.target.removeEventListener("DOMContentLoaded", arguments.callee, true); | |
try { | |
callback(iframe.contentDocument.wrappedJSObject); | |
} finally { | |
document.documentElement.removeChild(iframe); | |
} | |
} | |
iframe.addEventListener("DOMContentLoaded", parseHandler, true); | |
URILoader.openURI(channel, true, iframe.docShell); | |
} | |
// vim: sw=2 ts=2 et: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment