Skip to content

Instantly share code, notes, and snippets.

Last active August 13, 2017 13:04
Show Gist options
  • Save ganeshv/8349757 to your computer and use it in GitHub Desktop.
Save ganeshv/8349757 to your computer and use it in GitHub Desktop.
Browserified cheerio.js.

Browserified Cheerio

Manually browserified cheerio.js. Edit 3 files, one in cheerio itself and two dependencies - lib/cheerio.js, - node_modules/domutils/index.js - node_modules/entities/index.js to replace dynamic "require" lines with static equivalents. Holds up to simple hand testing. cheerio is exposed in the browser as a global (window.cheerio).

!function(e){if("object"==typeof exports)module.exports=e();else if("function"==typeof define&&define.amd)define(e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.cheerio=e()}}(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);throw new Error("Cannot find module '"+o+"'")}var f=n[o]={exports:{}};t[o][0].call(f.exports,function(e){var n=t[o][1][e];return s(n?n:e)},f,f.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
var _ = require('underscore'),
utils = require('../utils'),
isTag = utils.isTag,
decode = utils.decode,
encode = utils.encode,
hasOwn = Object.prototype.hasOwnProperty,
rspace = /\s+/,
// Lookup table for coercing string data-* attributes to their corresponding
// JavaScript primitives
primitives = {
null: null,
true: true,
false: false
// Attributes that are booleans
rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,
// Matches strings that look like JSON objects or arrays
rbrace = /^(?:\{[\w\W]*\}|\[[\w\W]*\])$/;
var setAttr = function(el, name, value) {
if (typeof name === 'object') return _.extend(el.attribs, name);
if (value === null) {
removeAttribute(el, name);
} else {
el.attribs[name] = encode(value);
return el.attribs;
var attr = exports.attr = function(name, value) {
// Set the value (with attr map support)
if (typeof name === 'object' || value !== undefined) {
if (_.isFunction(value)) {
return this.each(function(i, el) {
setAttr(el, name,, i, el.attribs[name]));
return this.each(function(i, el) {
el.attribs = setAttr(el, name, value);
var elem = this[0];
if (!elem || !isTag(elem)) return;
if (!elem.attribs) {
elem.attribs = {};
// Return the entire attribs object if no attribute specified
if (!name) {
for (var a in elem.attribs) {
elem.attribs[a] = decode(elem.attribs[a]);
return elem.attribs;
if (, name)) {
// Get the (decoded) attribute
return decode(elem.attribs[name]);
var setData = function(el, name, value) {
if (typeof name === 'object') return _.extend(, name);
if (typeof name === 'string' && value !== undefined) {[name] = encode(value);
} else if (typeof name === 'object') {
// If its an object, loop through it
_.each(name, function(value, key) {[key] = encode(value);
var data = = function(name, value) {
var elem = this[0];
if (!elem || !isTag(elem)) return;
if (! { = {};
// Return the entire data object if no data specified
if (!name) {
_.each(, function(value, key) {[key] = decode(value);
// Set the value (with attr map support)
if (typeof name === 'object' || value !== undefined) {
this.each(function(i, el) { = setData(el, name, value);
return this;
} else if (, name)) {
// Get the (decoded) data
var val = decode([name]);
if (, val)) {
val = primitives[val];
} else if (val === String(Number(val))) {
val = Number(val);
} else if (rbrace.test(val)) {
val = JSON.parse(val);
return val;
} else if (typeof name === 'string' && value === undefined) {
return undefined;
return this;
* Get the value of an element
var val = exports.val = function(value) {
var querying = arguments.length === 0,
element = this[0];
if(!element) return;
switch ( {
case 'textarea':
return querying ? this.text() : this.each(function() {
case 'input':
switch (this.attr('type')) {
case 'radio':
var queryString = 'input[type=radio][name=' + this.attr('name') + ']:checked';
var parentEl, root;
// Go up until we hit a form or root
parentEl = this.closest('form');
if (parentEl.length === 0) {
root = (this.parents().last()[0] || this[0]).root;
parentEl = this._make(root);
if (querying) {
return parentEl.find(queryString).attr('value');
} else {
parentEl.find('input[type=radio][value="' + value + '"]').attr('checked', '');
return this;
return querying ? this.attr('value') : this.each(function() {
this.attr('value', value);
case 'select':
var option = this.find('option:selected'),
if (option === undefined) return undefined;
if (!querying) {
if (!this.attr().hasOwnProperty('multiple') && typeof value == 'object') {
return this;
if (typeof value != 'object') {
value = [value];
for (var i = 0; i < value.length; i++) {
this.find('option[value="' + value[i] + '"]').attr('selected', '');
return this;
returnValue = option.attr('value');
if (this.attr().hasOwnProperty('multiple')) {
returnValue = [];
option.each(function() {
return returnValue;
case 'option':
if (!querying) {
this.attr('value', value);
return this;
return this.attr('value');
* Remove an attribute
var removeAttribute = function(elem, name) {
if (!isTag(elem.type) || !elem.attribs || !, name))
if (rboolean.test(elem.attribs[name]))
elem.attribs[name] = false;
delete elem.attribs[name];
var removeAttr = exports.removeAttr = function(name) {
this.each(function(i, elem) {
removeAttribute(elem, name);
return this;
var hasClass = exports.hasClass = function(className) {
return _.any(this, function(elem) {
var attrs = elem.attribs;
return attrs && _.contains((attrs['class'] || '').split(rspace), className);
var addClass = exports.addClass = function(value) {
// Support functions
if (_.isFunction(value)) {
this.each(function(i) {
var className = this.attr('class') || '';
this.addClass([0], i, className));
// Return if no value or not a string or function
if (!value || !_.isString(value)) return this;
var classNames = value.split(rspace),
numElements = this.length,
for (var i = 0; i < numElements; i++) {
$elem = this._make(this[i]);
// If selected element isnt a tag, move on
if (!isTag(this[i])) continue;
// If we don't already have classes
if (!$elem.attr('class')) {
$elem.attr('class', classNames.join(' ').trim());
} else {
setClass = ' ' + $elem.attr('class') + ' ';
numClasses = classNames.length;
// Check if class already exists
for (var j = 0; j < numClasses; j++) {
if (!~setClass.indexOf(' ' + classNames[j] + ' '))
setClass += classNames[j] + ' ';
$elem.attr('class', setClass.trim());
return this;
var removeClass = exports.removeClass = function(value) {
var split = function(className) {
return className ? className.trim().split(rspace) : [];
var classes, removeAll;
// Handle if value is a function
if (_.isFunction(value)) {
return this.each(function(i, el) {
this.removeClass([0], i, el.attribs['class'] || ''));
classes = split(value);
removeAll = arguments.length === 0;
return this.each(function(i, el) {
if (!isTag(el)) return;
el.attribs.class = removeAll ?
'' :
_.difference(split(el.attribs.class), classes).join(' ');
var toggleClass = exports.toggleClass = function(value, stateVal) {
// Support functions
if (_.isFunction(value)) {
return this.each(function(i, el) {
this.toggleClass(, i, el.attribs['class'] || '', stateVal), stateVal);
// Return if no value or not a string or function
if (!value || !_.isString(value)) return this;
var classNames = value.split(rspace),
numClasses = classNames.length,
isBool = typeof stateVal === 'boolean',
numElements = this.length,
for (var i = 0; i < numElements; i++) {
$elem = this._make(this[i]);
// If selected element isnt a tag, move on
if (!isTag(this[i])) continue;
// Check if class already exists
for (var j = 0; j < numClasses; j++) {
// check each className given, space separated list
state = isBool ? stateVal : !$elem.hasClass(classNames[j]);
$elem[state ? 'addClass' : 'removeClass'](classNames[j]);
return this;
var is = = function (selector) {
if (selector) {
return this.filter(selector).length > 0;
return false;
var _ = require('underscore');
var toString = Object.prototype.toString;
* Set / Get css.
* @param {String|Object} prop
* @param {String} val
* @return {self}
* @api public
exports.css = function(prop, val) {
if (arguments.length === 2 ||
// When `prop` is a "plain" object
( === '[object Object]')) {
return this.each(function(idx) {
this._setCss(prop, val, idx);
} else {
return this._getCss(prop);
* Set styles of all elements.
* @param {String|Object} prop
* @param {String} val
* @param {Number} idx - optional index within the selection
* @return {self}
* @api private
exports._setCss = function(prop, val, idx) {
if ('string' == typeof prop) {
var styles = this._getCss();
if (_.isFunction(val)) {
val =[0], idx, this[0]);
if (val === '') {
delete styles[prop];
} else if (val != null) {
styles[prop] = val;
return this.attr('style', stringify(styles));
} else if ('object' == typeof prop) {
this._setCss(k, prop[k]);
}, this);
return this;
* Get parsed styles of the first element.
* @param {String} prop
* @return {Object}
* @api private
exports._getCss = function(prop) {
var styles = parse(this.attr('style'));
if (typeof prop === 'string') {
return styles[prop];
} else if (_.isArray(prop)) {
return _.pick(styles, prop);
} else {
return styles;
* Stringify `obj` to styles.
* @param {Object} obj
* @return {Object}
* @api private
function stringify(obj) {
return Object.keys(obj || {})
.reduce(function(str, prop){
return str += ''
+ (str ? ' ' : '')
+ prop
+ ': '
+ obj[prop]
+ ';';
}, '');
* Parse `styles`.
* @param {String} styles
* @return {Object}
* @api private
function parse(styles) {
styles = (styles || '').trim();
if (!styles) return {};
return styles
.reduce(function(obj, str){
var n = str.indexOf(':');
// skip if there is no :, or if it is the first/last character
if (n < 1 || n === str.length-1) return obj;
obj[str.slice(0,n).trim()] = str.slice(n+1).trim();
return obj;
}, {});
var _ = require('underscore'),
parse = require('../parse'),
$ = require('../static'),
updateDOM = parse.update,
evaluate = parse.evaluate,
encode = require('../utils').encode,
slice = Array.prototype.slice;
// Create an array of nodes, recursing into arrays and parsing strings if
// necessary
var makeDomArray = function(elem) {
if (elem == null) {
return [];
} else if (elem.cheerio) {
return elem.toArray();
} else if (_.isArray(elem)) {
return _.flatten(;
} else if (_.isString(elem)) {
return evaluate(elem);
} else {
return [elem];
var _insert = function(concatenator) {
return function() {
var elems =,
dom = makeDomArray(elems);
return this.each(function(i, el) {
if (_.isFunction(elems[0])) {
dom = makeDomArray(elems[0].call(el, i, this.html()));
updateDOM(concatenator(dom, el.children || (el.children = [])), el);
var append = exports.append = _insert(function(dom, children) {
return children.concat(dom);
var prepend = exports.prepend = _insert(function(dom, children) {
return dom.concat(children);
var after = exports.after = function() {
var elems =,
dom = makeDomArray(elems);
this.each(function(i, el) {
var parent = el.parent || el.root,
siblings = parent.children,
index = siblings.indexOf(el);
// If not found, move on
if (!~index) return;
if (_.isFunction(elems[0])) {
dom = makeDomArray(elems[0].call(el, i));
// Add element after `this` element
siblings.splice.apply(siblings, [++index, 0].concat(dom));
// Update next, prev, and parent pointers
updateDOM(siblings, parent);
return this;
var before = exports.before = function() {
var elems =,
dom = makeDomArray(elems);
this.each(function(i, el) {
var parent = el.parent || el.root,
siblings = parent.children,
index = siblings.indexOf(el);
// If not found, move on
if (!~index) return;
if (_.isFunction(elems[0])) {
dom = makeDomArray(elems[0].call(el, i));
// Add element before `el` element
siblings.splice.apply(siblings, [index, 0].concat(dom));
// Update next, prev, and parent pointers
updateDOM(siblings, parent);
return this;
var remove = exports.remove = function(selector) {
var elems = this;
// Filter if we have selector
if (selector)
elems = elems.filter(selector);
elems.each(function(i, el) {
var parent = el.parent || el.root,
siblings = parent.children,
index = siblings.indexOf(el);
if (!~index) return;
siblings.splice(index, 1);
// Update next, prev, and parent pointers
updateDOM(siblings, parent);
return this;
var replaceWith = exports.replaceWith = function(content) {
var dom = makeDomArray(content);
this.each(function(i, el) {
var parent = el.parent || el.root,
siblings = parent.children,
if (_.isFunction(content)) {
dom = makeDomArray(, i));
// In the case that `dom` contains nodes that already exist in other
// structures, ensure those nodes are properly removed.
updateDOM(dom, null);
index = siblings.indexOf(el);
// Completely remove old element
siblings.splice.apply(siblings, [index, 1].concat(dom));
el.parent = el.prev = = null;
updateDOM(siblings, parent);
return this;
var empty = exports.empty = function() {
this.each(function(i, el) {
el.children = [];
return this;
* Set/Get the HTML
var html = exports.html = function(str) {
if (str === undefined) {
if (!this[0] || !this[0].children) return null;
return $.html(this[0].children);
str = str.cheerio ? str.toArray() : evaluate(str);
this.each(function(i, el) {
el.children = str;
updateDOM(el.children, el);
return this;
var toString = exports.toString = function() {
return $.html(this);
var text = exports.text = function(str) {
// If `str` is undefined, act as a "getter"
if (str === undefined) {
return $.text(this);
} else if (_.isFunction(str)) {
// Function support
return this.each(function(i, el) {
return this.text(, i, this.text()));
var elem = {
data: encode(str),
type: 'text',
parent: null,
prev: null,
next: null,
children: []
// Append text node to each selected elements
this.each(function(i, el) {
el.children = elem;
updateDOM(el.children, el);
return this;
var clone = exports.clone = function() {
// Turn it into HTML, then recreate it,
// Seems to be the easiest way to reconnect everything correctly
return this._make($.html(this));
var _ = require('underscore'),
select = require('CSSselect'),
utils = require('../utils'),
isTag = utils.isTag;
var find = exports.find = function(selector) {
return this._make(select(selector, [];
// Get the parent of each element in the current set of matched elements,
// optionally filtered by a selector.
var parent = exports.parent = function(selector) {
var set = [];
var $set;
this.each(function(idx, elem) {
var parentElem = elem.parent;
if (parentElem && set.indexOf(parentElem) < 0) {
$set = this._make(set);
if (arguments.length) {
$set = $set.filter(selector);
return $set;
var parents = exports.parents = function(selector) {
var parentNodes = [];
// When multiple DOM elements are in the original set, the resulting set will
// be in *reverse* order of the original elements as well, with duplicates
// removed.
this.toArray().reverse().forEach(function(elem) {
traverseParents(this, elem.parent, selector, Infinity)
.forEach(function(node) {
if (parentNodes.indexOf(node) === -1) {
}, this);
return this._make(parentNodes);
// For each element in the set, get the first element that matches the selector
// by testing the element itself and traversing up through its ancestors in the
// DOM tree.
var closest = exports.closest = function(selector) {
var set = [];
if (!selector) {
return this._make(set);
this.each(function(idx, elem) {
var closestElem = traverseParents(this, elem, selector, 1)[0];
// Do not add duplicate elements to the set
if (closestElem && set.indexOf(closestElem) < 0) {
return this._make(set);
var next = = function() {
if (!this[0]) { return this; }
var elems = [];
_.forEach(this, function(elem) {
while ((elem = {
if (isTag(elem)) {
return this._make(elems);
var nextAll = exports.nextAll = function(selector) {
if (!this[0]) { return this; }
var elems = [];
_.forEach(this, function(elem) {
while ((elem = {
if (isTag(elem) && elems.indexOf(elem) === -1) {
return this._make(selector ? select(selector, elems) : elems);
var nextUntil = exports.nextUntil = function(selector, filter) {
if (!this[0]) { return this; }
var elems = [], untilNode, untilNodes;
if (typeof selector === 'string') {
untilNode = select(selector, this.nextAll().toArray())[0];
} else if (selector && selector.cheerio) {
untilNodes = selector.toArray();
} else if (selector) {
untilNode = selector;
_.forEach(this, function(elem) {
while ((elem = {
if ((untilNode && elem !== untilNode) ||
(untilNodes && untilNodes.indexOf(elem) === -1) ||
(!untilNode && !untilNodes)) {
if (isTag(elem) && elems.indexOf(elem) === -1) {
} else {
return this._make(filter ? select(filter, elems) : elems);
var prev = exports.prev = function() {
if (!this[0]) { return this; }
var elems = [];
_.forEach(this, function(elem) {
while ((elem = elem.prev)) {
if (isTag(elem)) {
return this._make(elems);
var prevAll = exports.prevAll = function(selector) {
if (!this[0]) { return this; }
var elems = [];
_.forEach(this, function(elem) {
while ((elem = elem.prev)) {
if (isTag(elem) && elems.indexOf(elem) === -1) {
return this._make(selector ? select(selector, elems) : elems);
var prevUntil = exports.prevUntil = function(selector, filter) {
if (!this[0]) { return this; }
var elems = [], untilNode, untilNodes;
if (typeof selector === 'string') {
untilNode = select(selector, this.prevAll().toArray())[0];
} else if (selector && selector.cheerio) {
untilNodes = selector.toArray();
} else if (selector) {
untilNode = selector;
_.forEach(this, function(elem) {
while ((elem = elem.prev)) {
if ((untilNode && elem !== untilNode) ||
(untilNodes && untilNodes.indexOf(elem) === -1) ||
(!untilNode && !untilNodes)) {
if (isTag(elem) && elems.indexOf(elem) === -1) {
} else {
return this._make(filter ? select(filter, elems) : elems);
var siblings = exports.siblings = function(selector) {
var elems = _.filter(
this.parent() ? this.parent().children() : this.siblingsAndMe(),
function(elem) { return isTag(elem) && !; },
if (selector !== undefined) {
elems = this._make(select(selector, elems));
return this._make(elems);
var children = exports.children = function(selector) {
var elems = _.reduce(this, function(memo, elem) {
return memo.concat(_.filter(elem.children, isTag));
}, []);
if (selector === undefined) return this._make(elems);
else if (_.isNumber(selector)) return this._make(elems[selector]);
return this._make(elems).filter(selector);
var contents = exports.contents = function() {
return this._make(_.reduce(this, function(all, elem) {
all.push.apply(all, elem.children);
return all;
}, []));
var each = exports.each = function(fn) {
var i = 0, len = this.length;
while (i < len &&[i]), i, this[i]) !== false) ++i;
return this;
var map = = function(fn) {
return this._make(_.reduce(this, function(memo, el, i) {
var val =, i, el);
return val == null ? memo : memo.concat(val);
}, []));
var filter = exports.filter = function(match) {
var make = _.bind(this._make, this);
var filterFn;
if (_.isString(match)) {
filterFn = function(el) {
return select(match, [el])[0] === el;
} else if (_.isFunction(match)) {
filterFn = function(el, i) {
return, i, el);
} else if (match.cheerio) {
filterFn =;
} else {
filterFn = function(el) {
return match === el;
return make(_.filter(this, filterFn));
var first = exports.first = function() {
return this[0] ? this._make(this[0]) : this;
var last = exports.last = function() {
return this[0] ? this._make(this[this.length - 1]) : this;
// Reduce the set of matched elements to the one at the specified index.
var eq = exports.eq = function(i) {
i = +i;
if (i < 0) i = this.length + i;
return this[i] ? this._make(this[i]) : this._make([]);
var slice = exports.slice = function() {
return this._make([].slice.apply(this, arguments));
function traverseParents(self, elem, selector, limit) {
var elems = [];
while (elem && elems.length < limit) {
if (!selector || self._make(elem).filter(selector).length) {
elem = elem.parent;
return elems;
// End the most recent filtering operation in the current chain and return the
// set of matched elements to its previous state.
var end = exports.end = function() {
return this.prevObject || this._make([]);
Module dependencies
var path = require('path'),
parse = require('./parse'),
evaluate = parse.evaluate,
_ = require('underscore');
* The API
var api = ['attributes', 'traversing', 'manipulation', 'css'];
* A simple way to check for HTML strings or ID strings
var quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/;
* Static Methods
var $ = require('./static');
* Instance of cheerio
var Cheerio = module.exports = function(selector, context, root) {
if (!(this instanceof Cheerio)) return new Cheerio(selector, context, root);
// $(), $(null), $(undefined), $(false)
if (!selector) return this;
if (root) {
if (typeof root === 'string') root = parse(root);
this._root =, root);
// $($)
if (selector.cheerio) return selector;
// $(dom)
if ( || selector.type === 'text' || selector.type === 'comment')
selector = [selector];
// $([dom])
if (Array.isArray(selector)) {
_.forEach(selector, function(elem, idx) {
this[idx] = elem;
}, this);
this.length = selector.length;
return this;
// $(<html>)
if (typeof selector === 'string' && isHtml(selector)) {
return, parse(selector).children);
// If we don't have a context, maybe we have a root, from loading
if (!context) {
context = this._root;
} else if (typeof context === 'string') {
if (isHtml(context)) {
// $('li', '<ul>...</ul>')
context = parse(context);
context =, context);
} else {
// $('li', 'ul')
selector = [context, selector].join(' ');
context = this._root;
// If we still don't have a context, return
if (!context) return this;
// #id, .class, tag
return context.find(selector);
* Mix in `static`
_.extend(Cheerio, require('./static'));
* Set a signature of the object
Cheerio.prototype.cheerio = '[cheerio object]';
* Cheerio default options
Cheerio.prototype.options = {
normalizeWhitespace: false,
xmlMode: false,
lowerCaseTags: false
* Make cheerio an array-like object
Cheerio.prototype.length = 0;
Cheerio.prototype.splice = Array.prototype.splice;
* Check if string is HTML
var isHtml = function(str) {
// Faster than running regex, if str starts with `<` and ends with `>`, assume it's HTML
if (str.charAt(0) === '<' && str.charAt(str.length - 1) === '>' && str.length >= 3) return true;
// Run the regex
var match = quickExpr.exec(str);
return !!(match && match[1]);
* Make a cheerio object
* @api private
Cheerio.prototype._make = function(dom) {
var cheerio = new Cheerio(dom);
cheerio.prevObject = this;
return cheerio;
* Turn a cheerio object into an array
Cheerio.prototype.toArray = function() {
return [], 0);
* Plug in the API
api.forEach(function(mod) {
_.extend(Cheerio.prototype, require('./api/' + mod));
_apimods = [require('./api/attributes'), require('./api/traversing'),
require('./api/manipulation'), require('./api/css')];
_apimods.forEach(function(mod) {
_.extend(Cheerio.prototype, mod);
Module Dependencies
var htmlparser = require('htmlparser2'),
_ = require('underscore'),
isTag = require('./utils').isTag,
camelCase = require('./utils').camelCase;
exports = module.exports = function(content, options) {
var dom = evaluate(content, options);
// Generic root element
var root = {
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: []
// Update the dom using the root
update(dom, root);
return root;
var evaluate = exports.evaluate = function(content, options) {
// options = options || $.fn.options;
var handler = new htmlparser.DomHandler(options),
parser = new htmlparser.Parser(handler, options);
_.forEach(handler.dom, parseData);
return handler.dom;
Update the dom structure, for one changed layer
var update = exports.update = function(arr, parent) {
// normalize
if (!Array.isArray(arr)) arr = [arr];
// Update parent
if (parent) {
parent.children = arr;
} else {
parent = null;
// Update neighbors
for (var i = 0; i < arr.length; i++) {
var node = arr[i];
// Cleanly remove existing nodes from their previous structures.
var oldSiblings = node.parent && node.parent.children;
if (oldSiblings && oldSiblings !== arr) {
oldSiblings.splice(oldSiblings.indexOf(node), 1);
if (node.prev) { =;
if ( { = node.prev;
node.prev = arr[i - 1] || null; = arr[i + 1] || null;
if (parent && parent.type === 'root') {
node.root = parent;
node.parent = null;
} else {
delete node.root;
node.parent = parent;
return parent;
* Extract element data according to `data-*` element attributes and store in
* a key-value hash on the element's `data` attribute. Repeat for any and all
* descendant elements.
* @param {Object} elem Element
var parseData = exports.parseData = function(elem) {
if ( === undefined) = {};
var value;
for (var key in elem.attribs) {
if (key.substr(0, 5) === 'data-') {
value = elem.attribs[key];
key = key.slice(5);
key = camelCase(key);[key] = value;
_.forEach(elem.children, parseData);
// module.exports = $.extend(exports);
Module dependencies
var _ = require('underscore');
var utils = require('./utils');
var decode = utils.decode;
var encode = utils.encode;
Boolean Attributes
var rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i;
Format attributes
var formatAttrs = function(attributes) {
if (!attributes) return '';
var output = [],
// Loop through the attributes
for (var key in attributes) {
value = attributes[key];
if (!value && (rboolean.test(key) || key === '/')) {
} else {
output.push(key + '="' + encode(decode(value)) + '"');
return output.join(' ');
Self-enclosing tags (stolen from node-htmlparser)
var singleTag = {
area: 1,
base: 1,
basefont: 1,
br: 1,
col: 1,
frame: 1,
hr: 1,
img: 1,
input: 1,
isindex: 1,
link: 1,
meta: 1,
param: 1,
embed: 1,
include: 1,
'yield': 1
Tag types from htmlparser
var tagType = {
tag: 1,
script: 1,
link: 1,
style: 1,
template: 1
var render = module.exports = function(dom, opts) {
if (!Array.isArray(dom) && !dom.cheerio) dom = [dom];
opts = opts || {};
var output = [],
xmlMode = opts.xmlMode || false;
_.each(dom, function(elem) {
var pushVal;
if (tagType[elem.type])
pushVal = renderTag(elem, xmlMode);
else if (elem.type === 'directive')
pushVal = renderDirective(elem);
else if (elem.type === 'comment')
pushVal = renderComment(elem);
pushVal = renderText(elem);
// Push rendered DOM node
if (elem.children)
output.push(render(elem.children, opts));
if ((!singleTag[] || xmlMode) && tagType[elem.type]) {
if (!isClosedTag(elem, xmlMode)) {
output.push('</' + + '>');
return output.join('');
var isClosedTag = function(elem, xmlMode){
return (xmlMode && (!elem.children || elem.children.length === 0));
var renderTag = function(elem, xmlMode) {
var tag = '<' +;
if (elem.attribs && _.size(elem.attribs)) {
tag += ' ' + formatAttrs(elem.attribs);
if (isClosedTag(elem, xmlMode)) {
tag += '/';
return tag + '>';
var renderDirective = function(elem) {
return '<' + + '>';
var renderText = function(elem) {
var renderComment = function(elem) {
return '<!--' + + '-->';
// module.exports = $.extend(exports);
* Module dependencies
var select = require('CSSselect'),
parse = require('./parse'),
render = require('./render'),
decode = require('./utils').decode;
* $.load(str)
var load = exports.load = function(str, options) {
var Cheerio = require('./cheerio'),
root = parse(str, options);
var initialize = function(selector, context, r) {
return new Cheerio(selector, context, r || root);
// Add in the static methods
initialize.__proto__ = exports;
// Add in the root
initialize._root = root;
return initialize;
* $.html([selector | dom])
var html = exports.html = function(dom) {
if (dom) {
dom = (typeof dom === 'string') ? select(dom, this._root) : dom;
return render(dom);
} else if (this._root && this._root.children) {
return render(this._root.children);
} else {
return '';
* $.xml([selector | dom])
var xml = exports.xml = function(dom) {
if (dom) {
dom = (typeof dom === 'string') ? select(dom, this._root) : dom;
return render(dom, { xmlMode: true });
} else if (this._root && this._root.children) {
return render(this._root.children, { xmlMode: true });
} else {
return '';
* $.text(dom)
var text = exports.text = function(elems) {
if (!elems) return '';
var ret = '',
len = elems.length,
for (var i = 0; i < len; i ++) {
elem = elems[i];
if (elem.type === 'text') ret += decode(;
else if (elem.children && elem.type !== 'comment') {
ret += text(elem.children);
return ret;
* $.parseHTML(data [, context ] [, keepScripts ])
* Parses a string into an array of DOM nodes. The `context` argument has no
* meaning for Cheerio, but it is maintained for API compatability with jQuery.
var parseHTML = exports.parseHTML = function(data, context, keepScripts) {
var parsed;
if (!data || typeof data !== 'string') {
return null;
if (typeof context === 'boolean') {
keepScripts = context;
parsed = this.load(data);
if (!keepScripts) {
return parsed.root()[0].children;
* $.root()
var root = exports.root = function() {
return this(this._root);
* $.contains()
var contains = exports.contains = function(container, contained) {
// According to the jQuery API, an element does not "contain" itself
if (contained === container) {
return false;
// Step up the descendents, stopping when the root element is reached
// (signaled by `.parent` returning a reference to the same object)
while (contained && contained !== contained.parent) {
contained = contained.parent;
if (contained === container) {
return true;
return false;
* Module Dependencies
var entities = require('entities');
* HTML Tags
var tags = { tag: true, script: true, style: true };
* Check if the DOM element is a tag
* isTag(type) includes <script> and <style> tags
exports.isTag = function(type) {
if (type.type) type = type.type;
return tags[type] || false;
* Convert a string to camel case notation.
* @param {String} str String to be converted.
* @return {String} String in camel case notation.
exports.camelCase = function(str) {
return str.replace(/[_.-](\w|$)/g, function(_, x) {
return x.toUpperCase();
* Expose encode and decode methods from FB55's node-entities library
* 0 = XML, 1 = HTML4 and 2 = HTML5
exports.encode = function(str) { return entities.encode(String(str), 0); };
exports.decode = function(str) { return entities.decode(str, 2); };
"use strict";
module.exports = CSSselect;
var Pseudos = require("./lib/pseudos.js"),
DomUtils = require("domutils"),
findOne = DomUtils.findOne,
findAll = DomUtils.findAll,
getChildren = DomUtils.getChildren,
removeSubsets = DomUtils.removeSubsets,
falseFunc = require("./lib/basefunctions.js").falseFunc,
compile = require("./lib/compile.js");
function getSelectorFunc(searchFunc){
return function select(query, elems, options){
if(typeof query !== "function") query = compile(query, options);
if(!Array.isArray(elems)) elems = getChildren(elems);
else elems = removeSubsets(elems);
return searchFunc(query, elems);
var selectAll = getSelectorFunc(function selectAll(query, elems){
return (query === falseFunc || !elems || elems.length === 0) ? [] : findAll(query, elems);
var selectOne = getSelectorFunc(function selectOne(query, elems){
return (query === falseFunc || !elems || elems.length === 0) ? null : findOne(query, elems);
function is(elem, query, options){
return (typeof query === "function" ? query : compile(query, options))(elem);
the exported interface
function CSSselect(query, elems, options){
return selectAll(query, elems, options);
CSSselect.compile = compile;
CSSselect.filters = Pseudos.filters;
CSSselect.pseudos = Pseudos.pseudos;
CSSselect.selectAll = selectAll;
CSSselect.selectOne = selectOne; = is;
//legacy methods (might be removed)
CSSselect.parse = compile;
CSSselect.iterate = selectAll;
var DomUtils = require("domutils"),
hasAttrib = DomUtils.hasAttrib,
getAttributeValue = DomUtils.getAttributeValue,
falseFunc = require("./basefunctions.js").falseFunc;
var reChars = /[-[\]{}()*+?.,\\^$|#\s]/g;
attribute selectors
var attributeRules = {
__proto__: null,
equals: function(next, data){
var name =,
value = data.value;
value = value.toLowerCase();
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.toLowerCase() === value && next(elem);
return function(elem){
return getAttributeValue(elem, name) === value && next(elem);
hyphen: function(next, data){
var name =,
value = data.value,
len = value.length;
value = value.toLowerCase();
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null &&
(attr.length === len || attr.charAt(len) === "-") &&
attr.substr(0, len).toLowerCase() === value &&
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null &&
attr.substr(0, len) === value &&
(attr.length === len || attr.charAt(len) === "-") &&
element: function(next, data){
var name =,
value = data.value;
return falseFunc;
value = value.replace(reChars, "\\$&");
var pattern = "(?:^|\\s)" + value + "(?:$|\\s)",
flags = data.ignoreCase ? "i" : "",
regex = new RegExp(pattern, flags);
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && regex.test(attr) && next(elem);
exists: function(next, data){
var name =;
return function(elem){
return hasAttrib(elem, name) && next(elem);
start: function(next, data){
var name =,
value = data.value,
len = value.length;
if(len === 0){
return falseFunc;
value = value.toLowerCase();
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.substr(0, len).toLowerCase() === value && next(elem);
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.substr(0, len) === value && next(elem);
end: function(next, data){
var name =,
value = data.value,
len = -value.length;
if(len === 0){
return falseFunc;
value = value.toLowerCase();
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.substr(len).toLowerCase() === value && next(elem);
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.substr(len) === value && next(elem);
any: function(next, data){
var name =,
value = data.value;
if(value === ""){
return falseFunc;
var regex = new RegExp(value.replace(reChars, "\\$&"), "i");
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && regex.test(attr) && next(elem);
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.indexOf(value) >= 0 && next(elem);
not: function(next, data){
var name =,
value = data.value;
if(value === ""){
return function(elem){
return !!getAttributeValue(elem, name) && next(elem);
} else if(data.ignoreCase){
value = value.toLowerCase();
return function(elem){
var attr = getAttributeValue(elem, name);
return attr != null && attr.toLowerCase() !== value && next(elem);
return function(elem){
return getAttributeValue(elem, name) !== value && next(elem);
module.exports = {
compile: function(next, data){
return attributeRules[data.action](next, data);
rules: attributeRules
module.exports = {
trueFunc: function trueFunc(){
return true;
falseFunc: function falseFunc(){
return false;
compiles a selector to an executable function
module.exports = compile;
var parse = require("CSSwhat"),
DomUtils = require("domutils"),
isTag = DomUtils.isTag,
Rules = require("./general.js"),
sortRules = require("./sort.js"),
BaseFuncs = require("./basefunctions.js"),
trueFunc = BaseFuncs.trueFunc,
falseFunc = BaseFuncs.falseFunc;
function compile(selector, options){
var next = parse(selector, options)
.reduce(reduceRules, falseFunc);
return function(elem){
return isTag(elem) && next(elem);
function compileRules(arr){
if(arr.length === 0) return falseFunc;
return sortRules(arr).reduce(function(func, rule){
if(func === falseFunc) return func;
return Rules[rule.type](func, rule);
}, trueFunc);
function reduceRules(a, b){
if(b === falseFunc || a === trueFunc){
return a;
if(a === falseFunc || b === trueFunc){
return b;
return function combine(elem){
return a(elem) || b(elem);
//:not and :has have to compile selectors
//doing this in lib/pseudos.js would lead to circular dependencies,
//so we add them here
var Pseudos = require("./pseudos.js"),
filters = Pseudos.filters,
isParent = Pseudos.pseudos.parent,
findOne = DomUtils.findOne,
getChildren = DomUtils.getChildren;
filters.not = function(next, select){
var func = compile(select);
if(func === falseFunc) return next;
if(func === trueFunc) return falseFunc;
return function(elem){
return !func(elem) && next(elem);
filters.has = function(next, selector){
var func = compile(selector);
if(func === falseFunc) return falseFunc;
if(func === trueFunc) return function(elem){
return isParent(elem) && next(elem);
return function has(elem){
return next(elem) && findOne(func, getChildren(elem)) !== null;
var DomUtils = require("domutils"),
isTag = DomUtils.isTag,
getParent = DomUtils.getParent,
getChildren = DomUtils.getChildren,
getSiblings = DomUtils.getSiblings,
getName = DomUtils.getName;
all available rules
module.exports = {
__proto__: null,
attribute: require("./attributes.js").compile,
pseudo: require("./pseudos.js").compile,
tag: function(next, data){
var name =;
return function tag(elem){
return getName(elem) === name && next(elem);
descendant: function(next){
return function descendant(elem){
var found = false;
while(!found && (elem = getParent(elem))){
found = next(elem);
return found;
parent: function(next){
return function parent(elem){
return getChildren(elem).some(next);
child: function(next){
return function child(elem){
var parent = getParent(elem);
return !!parent && next(parent);
sibling: function(next){
return function sibling(elem){
var siblings = getSiblings(elem);
for(var i = 0; i < siblings.length; i++){
if(siblings[i] === elem) break;
if(next(siblings[i])) return true;
return false;
adjacent: function(next){
return function adjacent(elem){
var siblings = getSiblings(elem),
for(var i = 0; i < siblings.length; i++){
if(siblings[i] === elem) break;
lastElement = siblings[i];
return !!lastElement && next(lastElement);
universal: function(next){
return next;
var BaseFuncs = require("./basefunctions.js"),
trueFunc = BaseFuncs.trueFunc,
falseFunc = BaseFuncs.falseFunc;
module.exports = function nthCheck(formula){
return compile(parse(formula));
module.exports.parse = parse;
module.exports.compile = compile;
//[ ['-'|'+']? INTEGER? {N} [ S* ['-'|'+'] S* INTEGER ]?
var re_nthElement = /^([+\-]?\d*n)?\s*(?:([+\-]?)\s*(\d+))?$/;
parses a nth-check formula, returns an array of two numbers
function parse(formula){
formula = formula.trim().toLowerCase();
if(formula === "even"){
return [2, 0];
} else if(formula === "odd"){
return [2, 1];
} else {
var parsed = formula.match(re_nthElement);
throw new SyntaxError("n-th rule couldn't be parsed ('" + formula + "')");
var a;
a = parseInt(parsed[1], 10);
if(parsed[1].charAt(0) === "-") a = -1;
else a = 1;
} else a = 0;
return [
parsed[3] ? parseInt((parsed[2] || "") + parsed[3], 10) : 0
returns a function that checks if an elements index matches the given rule
highly optimized to return the fastest solution
function compile(parsed){
var a = parsed[0],
b = parsed[1] - 1;
//when b <= 0, a*n won't be possible for any matches when a < 0
//besides, the specification says that no element is matched when a and b are 0
if(b < 0 && a <= 0) return falseFunc;
//when a is in the range -1..1, it matches any element (so only b is checked)
if(a ===-1) return function(pos){ return pos <= b; };
if(a === 0) return function(pos){ return pos === b; };
//when b <= 0 and a === 1, they match any element
if(a === 1) return b < 0 ? trueFunc : function(pos){ return pos >= b; };
//when a > 0, modulo can be used to check if there is a match
var bMod = b % a;
if(bMod < 0) bMod += a;
if(a > 1){
return function(pos){
return pos >= b && pos % a === bMod;
a *= -1; //make `a` positive
return function(pos){
return pos <= b && pos % a === bMod;
pseudo selectors
they are available in two forms:
* filters called when the selector
is compiled and return a function
that needs to return next()
* pseudos get called on execution
they need to return a boolean
var DomUtils = require("domutils"),
isTag = DomUtils.isTag,
getText = DomUtils.getText,
getParent = DomUtils.getParent,
getChildren = DomUtils.getChildren,
getSiblings = DomUtils.getSiblings,
hasAttrib = DomUtils.hasAttrib,
getName = DomUtils.getName,
getAttribute= DomUtils.getAttributeValue,
getNCheck = require("./nth-check.js"),
checkAttrib = require("./attributes.js").rules.equals,
BaseFuncs = require("./basefunctions.js"),
trueFunc = BaseFuncs.trueFunc,
falseFunc = BaseFuncs.falseFunc;
//helper methods
function getFirstElement(elems){
for(var i = 0; elems && i < elems.length; i++){
if(isTag(elems[i])) return elems[i];
function getAttribFunc(name, value){
var data = {name: name, value: value};
return function attribFunc(next){
return checkAttrib(next, data);
function getChildFunc(next){
return function(elem){
return !!getParent(elem) && next(elem);
var filters = {
contains: function(next, text){
(text.charAt(0) === "\"" || text.charAt(0) === "'") &&
text.charAt(0) === text.substr(-1)
text = text.slice(1, -1);
return function contains(elem){
return getText(elem).indexOf(text) >= 0 && next(elem);
//location specific methods
//first- and last-child methods return as soon as they find another element
"first-child": function(next){
return function firstChild(elem){
return getFirstElement(getSiblings(elem)) === elem && next(elem);
"last-child": function(next){
return function lastChild(elem){
var siblings = getSiblings(elem);
for(var i = siblings.length - 1; i >= 0; i--){
if(siblings[i] === elem) return next(elem);
if(isTag(siblings[i])) break;
return false;
"first-of-type": function(next){
return function firstOfType(elem){
var siblings = getSiblings(elem);
for(var i = 0; i < siblings.length; i++){
if(siblings[i] === elem) return next(elem);
if(getName(siblings[i]) === getName(elem)) break;
return false;
"last-of-type": function(next){
return function lastOfType(elem){
var siblings = getSiblings(elem);
for(var i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem) return next(elem);
if(getName(siblings[i]) === getName(elem)) break;
return false;
"only-of-type": function(next){
return function onlyOfType(elem){
var siblings = getSiblings(elem);
for(var i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) continue;
if(getName(siblings[i]) === getName(elem)) return false;
return next(elem);
"only-child": function(next){
return function onlyChild(elem){
var siblings = getSiblings(elem);
for(var i = 0; i < siblings.length; i++){
if(isTag(siblings[i]) && siblings[i] !== elem) return false;
return next(elem);
"nth-child": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc) return getChildFunc(next);
return function nthChild(elem){
var siblings = getSiblings(elem);
for(var i = 0, pos = 0; i < siblings.length; i++){
if(siblings[i] === elem) break;
else pos++;
return func(pos) && next(elem);
"nth-last-child": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc) return getChildFunc(next);
return function nthLastChild(elem){
var siblings = getSiblings(elem);
for(var pos = 0, i = siblings.length - 1; i >= 0; i--){
if(siblings[i] === elem) break;
else pos++;
return func(pos) && next(elem);
"nth-of-type": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc) return getChildFunc(next);
return function nthOfType(elem){
var siblings = getSiblings(elem);
for(var pos = 0, i = 0; i < siblings.length; i++){
if(siblings[i] === elem) break;
if(getName(siblings[i]) === getName(elem)) pos++;
return func(pos) && next(elem);
"nth-last-of-type": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc) return getChildFunc(next);
return function nthLastOfType(elem){
var siblings = getSiblings(elem);
for(var pos = 0, i = siblings.length - 1; i >= 0; i--){
if(siblings[i] === elem) break;
if(getName(siblings[i]) === getName(elem)) pos++;
return func(pos) && next(elem);
//jQuery extensions (others follow as pseudos)
checkbox: getAttribFunc("type", "checkbox"),
file: getAttribFunc("type", "file"),
password: getAttribFunc("type", "password"),
radio: getAttribFunc("type", "radio"),
reset: getAttribFunc("type", "reset"),
image: getAttribFunc("type", "image"),
submit: getAttribFunc("type", "submit")
//while filters are precompiled, pseudos get called when they are needed
var pseudos = {
root: function(elem){
return !getParent(elem);
empty: function(elem){
return !getChildren(elem).some(function(elem){
return isTag(elem) || elem.type === "text";
//to consider: :target, :enabled
selected: function(elem){
if(hasAttrib(elem, "selected")) return true;
else if(getName(elem) !== "option") return false;
//the first <option> in a <select> is also selected
var parent = getParent(elem);
if(!parent || getName(parent) !== "select") return false;
var siblings = getChildren(parent),
sawElem = false;
for(var i = 0; i < siblings.length; i++){
if(siblings[i] === elem){
sawElem = true;
} else if(!sawElem){
return false;
} else if(hasAttrib(siblings[i], "selected")){
return false;
return sawElem;
disabled: function(elem){
return hasAttrib(elem, "disabled");
enabled: function(elem){
return !hasAttrib(elem, "disabled");
checked: function(elem){
return hasAttrib(elem, "checked") || pseudos.selected(elem);
//jQuery extensions
//:parent is the inverse of :empty
parent: function(elem){
return !pseudos.empty(elem);
header: function(elem){
var name = getName(elem);
return name === "h1" ||
name === "h2" ||
name === "h3" ||
name === "h4" ||
name === "h5" ||
name === "h6";
button: function(elem){
var name = getName(elem);
return name === "button" ||
name === "input"
Copy link

iplus26 commented Nov 24, 2016


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment