Skip to content

Instantly share code, notes, and snippets.

@teramako
Created November 1, 2010 13:22
Show Gist options
  • Save teramako/658148 to your computer and use it in GitHub Desktop.
Save teramako/658148 to your computer and use it in GitHub Desktop.
iframeにテキストを読み込ませてHTMLパースを行う
/**
* HTML テキストをパースするサンプル
* @see http://mxr.mozilla.org/mozilla/source/browser/components/microsummaries/src/nsMicrosummaryService.js
*
* スクリプトや画像を読み込まず、パースのみをするはず。
* 当然ながら XPCOM の使用特権(Chrome特権)が必要
*/
/*
// Sample 1
httpGet("http://example.com",
function success(doc){ ....; },
function error(xhr) { ....; }
);
// Sample 2
parse (
<><![CDATA[
<html>
<head>
<meta charset="utf-8">
<title>hoge</title>
</head>
<body>
<p>hoge<br>foo<br>bar</p>
</body>
</html>
]]><>.toString(),
"http://example.com/",
function(doc){
alert((new XMLSerializer).serializeToString(doc));
});
*/
function httpGet (url, loadCallback, errorCallback) {
var xhr = new XMLHttpRequest();
xhr.mozBackgroundRequest = true;
xhr.open("GET", url, true);
xhr.onreadystatechange = function (){
if (xhr.readyState == 4) {
if (xhr.status == 200) {
if (xhr.responseXML){
loadCallback(xhr.responseXML);
} else {
htmlParse(xhr.responseText, url, loadCallback);
}
} else if (errorCallback) {
errorCallback(xhr);
}
}
};
xhr.send(null);
}
function ensureURI (uri) {
if (typeof uri == "string") {
uri = Cc["@mozilla.org/network/io-service;1"].getService(Ci.nsIIOService).newURI(uri, null, null);
}
if (!(uri.schemeIs("http") || uri.schemeIs("https") || uri.schemeIs("file")))
throw Cr.NS_ERROR_DOM_BAD_URI;
return uri;
}
function htmlParse (htmlText, uri, callback) {
uri = ensureURI (uri);
var iframe = document.createElement("iframe");
iframe.setAttribute("collapsed", true);
iframe.setAttribute("type", "content");
document.documentElement.appendChild(iframe);
var webNav = iframe.docShell.QueryInterface(Ci.nsIWebNavigation);
webNav.stop(Ci.nsIWebNavigation.STOP_NETWORK);
iframe.docShell.allowJavascript = false;
iframe.docShell.allowDNSPrefetch = false;
iframe.docShell.allowAuth = false;
iframe.docShell.allowPlugins = false;
iframe.docShell.allowMetaRedirects = false;
iframe.docShell.allowImages = false;
iframe.docShell.allowSubframes = false;
function parseHandler (event) {
event.target.removeEventListener("DOMContentLoaded", arguments.callee, true);
try {
callback(iframe.contentDocument.wrappedJSObject);
} finally {
document.documentElement.removeChild(iframe);
}
}
var converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Ci.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
var stream = converter.convertToInputStream(htmlText);
var channel = Cc["@mozilla.org/network/input-stream-channel;1"]
.createInstance(Ci.nsIInputStreamChannel);
channel.setURI(uri);
channel.contentStream = stream;
var request = channel.QueryInterface(Ci.nsIRequest);
request.loadFlags |= Ci.nsIRequest.LOAD_BACKGROUND;
var baseChannel = channel.QueryInterface(Ci.nsIChannel);
baseChannel.contentType = "text/html";
baseChannel.contentCharset = "UTF-8";
iframe.addEventListener("DOMContentLoaded", parseHandler, true);
var uriLoader = Cc["@mozilla.org/uriloader;1"].getService(Ci.nsIURILoader);
uriLoader.openURI(channel, true, iframe.docShell);
}
// vim: sw=2 ts=2 et:
/**
* HTML テキストをパースするサンプル
* nsIChannelを生成して直接開くバージョン
* @see http://mxr.mozilla.org/mozilla/source/browser/components/microsummaries/src/nsMicrosummaryService.js
*
* スクリプトや画像を読み込まず、パースのみをするはず。
* 当然ながら XPCOM の使用特権(Chrome特権)が必要
*/
const IOService = Cc["@mozilla.org/network/io-service;1"].getService(Ci.nsIIOService);
const URILoader = Cc["@mozilla.org/uriloader;1"].getService(Ci.nsIURILoader);
function httpGet (url, callback, ) {
htmlParse(getHttpChannel(url), callback);
}
function getHttpChannel (url) {
var channel = (url instanceof Ci.nsIURI) ?
IOService.newChannelFromURI(url) :
IOService.newChannel(url, null,null);
var uri = channel.URI;
if (uri.schemeIs("file"))
return channel.QueryInterface(Ci.nsIFileChannel);
else if (uri.schemeIs("http") || uri.schemeIs("https"))
return channel.QueryInterface(Ci.nsIHttpChannel);
else
throw Cr.NS_ERROR_DOM_BAD_URI;
}
function htmlParse (channel, callback) {
var iframe = document.createElement("iframe");
iframe.setAttribute("collapsed", true);
iframe.setAttribute("type", "content");
document.documentElement.appendChild(iframe);
var webNav = iframe.docShell.QueryInterface(Ci.nsIWebNavigation);
webNav.stop(Ci.nsIWebNavigation.STOP_NETWORK);
iframe.docShell.allowJavascript = false;
iframe.docShell.allowDNSPrefetch = false;
iframe.docShell.allowAuth = false;
iframe.docShell.allowPlugins = false;
iframe.docShell.allowMetaRedirects = false;
iframe.docShell.allowImages = false;
iframe.docShell.allowSubframes = false;
function parseHandler (event) {
event.target.removeEventListener("DOMContentLoaded", arguments.callee, true);
try {
callback(iframe.contentDocument.wrappedJSObject);
} finally {
document.documentElement.removeChild(iframe);
}
}
iframe.addEventListener("DOMContentLoaded", parseHandler, true);
URILoader.openURI(channel, true, iframe.docShell);
}
// vim: sw=2 ts=2 et:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment