Skip to content

Instantly share code, notes, and snippets.

@cowboy
Created December 16, 2010 02:44

Revisions

  1. cowboy revised this gist Aug 8, 2011. No changes.
  2. cowboy revised this gist Aug 8, 2011. 1 changed file with 47 additions and 63 deletions.
    110 changes: 47 additions & 63 deletions jquery.ba-htmldoc.js
    Original file line number Diff line number Diff line change
    @@ -1,89 +1,73 @@
    /*!
    * jQuery htmlDoc "fixer" - v0.2pre - 12/15/2010
    * jQuery htmlDoc "fixer" - v0.2pre - 8/8/2011
    * http://benalman.com/projects/jquery-misc-plugins/
    *
    *
    * Copyright (c) 2010 "Cowboy" Ben Alman
    * Dual licensed under the MIT and GPL licenses.
    * http://benalman.com/about/license/
    */

    (function($){

    var // RegExp that matches opening and closing HTML, HEAD, BODY tags.
    // $1 = slash, $2 = tag name, $3 = attributes
    rtag = /<(\/?)(html|head|body)(\s+[^>]*)?>/ig,

    // Unique id prefix for selecting placeholder elements.
    prefix = 'hd' + +new Date();

    $.htmlDoc = function( str ) {
    var // A collection of "intended" elements that can't be rendered
    // cross-browser with .innerHTML, for which placeholders must be
    // swapped.
    elems = $([]),

    // Input HTML string, parsed to include placeholder DIVs.
    parsed,

    // A node under which a temporary DOM tree can be constructed.
    root;

    // Replace HTML, HEAD, BODY tags with DIV placeholders.
    parsed = str.replace( rtag, function( tag, slash, name, attrs ) {

    var // Current intended / placeholder element index.
    len = elems.length,

    // Temporary object in which to hold attributes.
    obj = {};

    (function($) {
    // RegExp that matches opening and closing browser-stripped tags.
    // $1 = slash, $2 = tag name, $3 = attributes
    var matchTag = /<(\/?)(html|head|body|title|base|meta)(\s+[^>]*)?>/ig;
    // Unique id prefix for selecting placeholder elements.
    var prefix = 'hd' + +new Date;
    // A node under which a temporary DOM tree can be constructed.
    var parent;

    $.htmlDoc = function(html) {
    // A collection of "intended" elements that can't be rendered cross-browser
    // with .innerHTML, for which placeholders must be swapped.
    var elems = $();
    // Input HTML string, parsed to include placeholder DIVs. Replace HTML,
    // HEAD, BODY tags with DIV placeholders.
    var htmlParsed = html.replace(matchTag, function(tag, slash, name, attrs) {
    // Temporary object in which to hold attributes.
    var obj = {};
    // If this is an opening tag...
    if ( !slash ) {

    // Add an element of this name into the collection of elements. Note
    // that if a string of attributes is added at this point, it fails.
    elems = elems.add( '<' + name + '/>' );

    elems = elems.add('<' + name + '/>');
    // If the original tag had attributes, create a temporary div with
    // those attributes. Then, copy each attribute from the temporary div
    // over to the temporary object.
    if ( attrs ) {
    $.each( $( '<div' + attrs + '/>' )[0].attributes, function(i,v){
    obj[ v.name ] = v.value;
    $.each($('<div' + attrs + '/>')[0].attributes, function(i, attr) {
    obj[attr.name] = attr.value;
    });
    }

    // Set the attributes of the intended object based on the attributes
    // copied in the previous step.
    elems.eq( len ).attr( obj );
    elems.eq(-1).attr(obj);
    }

    // A placeholder div with a unique id replaces the intended element's
    // tag in the parsed HTML string.
    return '<' + slash + 'div'
    + ( slash ? '' : ' id="' + prefix + len + '"' ) + '>';
    + (slash ? '' : ' id="' + prefix + (elems.length - 1) + '"') + '>';
    });

    // If placeholder elements were necessary...
    if ( elems.length ) {

    // Create the root node and append the parsed, place-held HTML.
    root = $('<div/>').html( parsed );

    // Replace each placeholder element with its intended element.
    $.each( elems, function(i,v){
    var elem = root.find( '#' + prefix + i ).before( elems[i] );
    elems.eq(i).html( elem.contents() );
    elem.remove();
    });

    // Return the topmost intended element(s), sans text nodes.
    return root.children();

    // If no placeholder elements were necessary, just return normal
    // jQuery-parsed HTML.
    if ( !elems.length ) {
    return $(html);
    }
    // Create parent node if it hasn't been created yet.
    if ( !parent ) {
    parent = $('<div/>');
    }

    // No placeholder elements were necessary, so just return a normal
    // jQuery-parsed HTML string.
    return $(str);
    // Create the parent node and append the parsed, place-held HTML.
    parent.html(htmlParsed);
    // Replace each placeholder element with its intended element.
    $.each(elems, function(i) {
    var elem = parent.find('#' + prefix + i).before(elems[i]);
    elems.eq(i).html(elem.contents());
    elem.remove();
    });
    // Return the topmost intended element(s), sans text nodes, while removing
    // them from the parent element with unwrap.
    return parent.children().unwrap();
    };
    })(jQuery);

    }(jQuery));
  3. cowboy revised this gist Dec 16, 2010. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions jquery.ba-htmldoc.js
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,7 @@
    // $1 = slash, $2 = tag name, $3 = attributes
    rtag = /<(\/?)(html|head|body)(\s+[^>]*)?>/ig,

    // Unique id/class prefix for selecting placeholder elements.
    // Unique id prefix for selecting placeholder elements.
    prefix = 'hd' + +new Date();

    $.htmlDoc = function( str ) {
    @@ -58,8 +58,8 @@
    elems.eq( len ).attr( obj );
    }

    // A placeholder div with a unique id and class replaces the intended
    // element's tag in the parsed HTML string.
    // A placeholder div with a unique id replaces the intended element's
    // tag in the parsed HTML string.
    return '<' + slash + 'div'
    + ( slash ? '' : ' id="' + prefix + len + '"' ) + '>';
    });
  4. cowboy revised this gist Dec 16, 2010. 1 changed file with 53 additions and 18 deletions.
    71 changes: 53 additions & 18 deletions jquery.ba-htmldoc.js
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@
    /*!
    * jQuery htmlDoc "fixer" - v0.1pre - 12/15/2010
    * jQuery htmlDoc "fixer" - v0.2pre - 12/15/2010
    * http://benalman.com/projects/jquery-misc-plugins/
    *
    * Copyright (c) 2010 "Cowboy" Ben Alman
    @@ -9,45 +9,80 @@

    (function($){

    var rtag = /<(\/?)(html|head|body)(\s+[^>]*)?>/ig,
    base = 'hd' + +new Date();
    var // RegExp that matches opening and closing HTML, HEAD, BODY tags.
    // $1 = slash, $2 = tag name, $3 = attributes
    rtag = /<(\/?)(html|head|body)(\s+[^>]*)?>/ig,

    // Unique id/class prefix for selecting placeholder elements.
    prefix = 'hd' + +new Date();

    $.htmlDoc = function( str ) {
    var elems = $([]),
    parsed,
    html;
    var // A collection of "intended" elements that can't be rendered
    // cross-browser with .innerHTML, for which placeholders must be
    // swapped.
    elems = $([]),

    // Input HTML string, parsed to include placeholder DIVs.
    parsed,

    // A node under which a temporary DOM tree can be constructed.
    root;

    // Replace HTML, HEAD, BODY tags with DIV placeholders.
    parsed = str.replace( rtag, function( tag, slash, name, attrs ) {
    var obj = {};

    var // Current intended / placeholder element index.
    len = elems.length,

    // Temporary object in which to hold attributes.
    obj = {};

    // If this is an opening tag...
    if ( !slash ) {

    // Add an element of this name into the collection of elements. Note
    // that if a string of attributes is added at this point, it fails.
    elems = elems.add( '<' + name + '/>' );

    attrs && $.each( $( '<div' + attrs + '/>' )[0].attributes, function(i,v){
    obj[ v.name ] = v.value;
    });
    // If the original tag had attributes, create a temporary div with
    // those attributes. Then, copy each attribute from the temporary div
    // over to the temporary object.
    if ( attrs ) {
    $.each( $( '<div' + attrs + '/>' )[0].attributes, function(i,v){
    obj[ v.name ] = v.value;
    });
    }

    elems.eq( elems.length - 1 ).attr( obj );
    // Set the attributes of the intended object based on the attributes
    // copied in the previous step.
    elems.eq( len ).attr( obj );
    }

    return '<' + slash + 'div' + ( slash ? ''
    : ' class="' + base + '" id="' + base + ( elems.length - 1 ) + '"' ) + '>';
    // A placeholder div with a unique id and class replaces the intended
    // element's tag in the parsed HTML string.
    return '<' + slash + 'div'
    + ( slash ? '' : ' id="' + prefix + len + '"' ) + '>';
    });

    // If placeholder elements were necessary...
    if ( elems.length ) {
    html = $('<div/>');

    $( parsed ).filter( '.' + base ).appendTo( html );
    // Create the root node and append the parsed, place-held HTML.
    root = $('<div/>').html( parsed );

    // Replace each placeholder element with its intended element.
    $.each( elems, function(i,v){
    var elem = html.find( '#' + base + i ).before( elems[i] );
    elems.eq(i).append( elem.contents() );
    var elem = root.find( '#' + prefix + i ).before( elems[i] );
    elems.eq(i).html( elem.contents() );
    elem.remove();
    });

    return html.children();
    // Return the topmost intended element(s), sans text nodes.
    return root.children();
    }

    // No placeholder elements were necessary, so just return a normal
    // jQuery-parsed HTML string.
    return $(str);
    };

  5. cowboy created this gist Dec 16, 2010.
    54 changes: 54 additions & 0 deletions jquery.ba-htmldoc.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,54 @@
    /*!
    * jQuery htmlDoc "fixer" - v0.1pre - 12/15/2010
    * http://benalman.com/projects/jquery-misc-plugins/
    *
    * Copyright (c) 2010 "Cowboy" Ben Alman
    * Dual licensed under the MIT and GPL licenses.
    * http://benalman.com/about/license/
    */

    (function($){

    var rtag = /<(\/?)(html|head|body)(\s+[^>]*)?>/ig,
    base = 'hd' + +new Date();

    $.htmlDoc = function( str ) {
    var elems = $([]),
    parsed,
    html;

    parsed = str.replace( rtag, function( tag, slash, name, attrs ) {
    var obj = {};

    if ( !slash ) {
    elems = elems.add( '<' + name + '/>' );

    attrs && $.each( $( '<div' + attrs + '/>' )[0].attributes, function(i,v){
    obj[ v.name ] = v.value;
    });

    elems.eq( elems.length - 1 ).attr( obj );
    }

    return '<' + slash + 'div' + ( slash ? ''
    : ' class="' + base + '" id="' + base + ( elems.length - 1 ) + '"' ) + '>';
    });

    if ( elems.length ) {
    html = $('<div/>');

    $( parsed ).filter( '.' + base ).appendTo( html );

    $.each( elems, function(i,v){
    var elem = html.find( '#' + base + i ).before( elems[i] );
    elems.eq(i).append( elem.contents() );
    elem.remove();
    });

    return html.children();
    }

    return $(str);
    };

    })(jQuery);
    51 changes: 51 additions & 0 deletions readme.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,51 @@
    From the jQuery API docs for .load():

    jQuery uses the browser's .innerHTML property to parse the retrieved
    document and insert it into the current document. During this process,
    browsers often filter elements from the document such as <html>,
    <title>, or <head> elements. As a result, the elements retrieved by
    .load() may not be exactly the same as if the document were retrieved
    directly by the browser.

    Using jQuery, and given this test.html:

    <!DOCTYPE HTML>
    <html lang="en-US">
    <head>
    <title>Test page</title>
    </head>
    <body>
    <div id="content">
    <p>stuff</p>
    <p>more stuff</p>
    </div>
    </body>
    </html>

    This behavior can be seen:

    $.get( 'test.html', function( html ) {
    // Not great: [, <title>​Test page​</title>, , <div id=​"content">​…​</div>, ]
    console.log( $(html) );

    // This fails: []
    console.log( $(html).find( '#content') );

    // This selects the content div, but.. ugly.
    console.log( $(html).filter( '#content') );

    // This also selects the content div, but.. also ugly.
    console.log( $('<div/>').html( html ).find( '#content' ) );
    });

    This, on the other hand, works as you'd expect, and attributes should
    be properly preserved:

    $.get( 'test.html', function( html ) {
    var hd = $.htmlDoc( html );

    console.log( hd.filter( 'html' ).length ); // 1
    console.log( hd.filter( 'html' ).attr( 'lang' ) ); // "en-US"
    console.log( hd.find( 'head' ).length ); // 1
    console.log( hd.find( 'body' ).length ); // 1
    });