Skip to content

Instantly share code, notes, and snippets.

@iegik
Created March 1, 2016 15:35
Show Gist options
  • Select an option

  • Save iegik/f249884b35cf31a6e7f9 to your computer and use it in GitHub Desktop.

Select an option

Save iegik/f249884b35cf31a6e7f9 to your computer and use it in GitHub Desktop.
crawler
'use strict';
var domain = 'http://example.com',
sm = {
data: {},
meta: {
keywords: document.querySelector('meta[name=keywords]').content,
description: document.querySelector('meta[name=description]').content,
length: 0
}
};
function inject(page, fn) {
var a = document.createElement('iframe');
a.src = page;
a.setAttribute('onload', 'javascript:(' + fn.toString() + ')(this.contentWindow,' + inject.toString() +
');');
document.body.appendChild(a);
}
function scan(parent, cb) {
var domain = 'http://example.com';
[].map.call(document.querySelectorAll('[href]'), function (a) {
return {
loc: a.href,
lastmod: new Date().toJSON(),
changefreq: 'monthly',
priority: 0.64
};
})
.sort(function (a, b) {
return a.loc > b.loc;
})
.filter(function (a) {
return /^((?!\/promo\/).)*$/.test(a.loc);
})
.map(function (a) {
if (new RegExp('^' + domain).test(a.loc)) {
a.loc = a.loc.replace(domain, '');
}
return a;
})
.filter(function (a) {
return /^\//.test(a.loc) && !/#/.test(a.loc);
})
.forEach(function (a) {
if (sm && !sm.data[a.loc]) {
sm.data[a.loc] = a;
if (!sm.data[a.loc].scanned) {
//cb(a.loc, scan);
console.log('scanning #' + (sm.meta.length++) + ' ' + a.loc);
sm.data[a.loc].scanned = true;
}
}
});
}
inject(domain, scan);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment