Skip to content

Instantly share code, notes, and snippets.

@DigiTec
Last active March 14, 2016 08:11
Show Gist options
  • Save DigiTec/c8be7949eb0e06884ad9 to your computer and use it in GitHub Desktop.
Save DigiTec/c8be7949eb0e06884ad9 to your computer and use it in GitHub Desktop.
Personal sample of using Node.js for processing web page contents using request and cheerio
var request = require('request');
var cheerio = require('cheerio');
var urls = [
'https://developer.android.com/reference/com/google/android/gms/location/Geofence.html',
'http://developer.android.com/reference/com/google/android/gms/maps/model/LatLng.html'
];
urls.forEach(function (elem) {
request({
method: 'GET',
url: elem
}, function(error, response, body) {
if (!error && response.statusCode === 200)
{
$ = cheerio.load(body);
var classObj = {};
// Process header
var header = $('#jd-header');
classObj.name = header.find("[itemprop='name']").text().trim();
classObj.classModifiers = header.contents().first().text().trim().replace(/\n/g, '').replace(/\s+/g, ' ');
// Robustly process looking for the optional properties of the class
var properties = header.contents().each(function (i, elem) {
var property = $(elem);
if (property.text().match(/extends/))
{
classObj.extends = property.text().trim();
}
else if (property.text().match(/implements/))
{
classObj.implements = property.text().trim().replace(/\n/g, '').replace(/\s+/g, ' ');
}
});
// Nested Classes
classObj.nestedClasses = [];
$('#nestedclasses').children('tr.api').each(function (i, elem) {
var nestedClass = {};
nestedClass.classModifiers = $(elem).find('.jd-typecol').text().trim();
nestedClass.name = $(elem).find('.jd-linkcol').text().trim();
classObj.nestedClasses.push(nestedClass);
});
// Constants
classObj.constants = [];
$('#constants').children('tr.api').each(function (i, elem) {
var constant = {};
constant.typeName = $(elem).find('.jd-typecol').text().trim();
constant.name = $(elem).find('.jd-linkcol').text().trim();
constant.desc = $(elem).find('.jd-descrcol').text().trim();
classObj.constants.push(constant);
});
// Public Fields
classObj.fields = [];
$('#lfields').children('tr.api').each(function (i, elem) {
var field = {};
field.typeName = $(elem).find('.jd-typecol').text().trim().replace(/\n/g, '').replace(/\s+/g, ' ');
field.name = $(elem).find('.jd-linkcol').text().trim();
field.desc = $(elem).find('.jd-descrcol').text().trim();
classObj.fields.push(field);
});
// Constructors
classObj.constructors = [];
$('#pubctors').find('.jd-linkcol').each(function (i, elem) {
var constructor = {};
constructor.name = $(elem).find('a').text().trim();
constructor.desc = $(elem).find('.jd-descrdiv').text().trim();
classObj.constructors.push(constructor);
});
classObj.methods = [];
// Methods
$('#pubmethods').children('tr.api').each(function (i, elem) {
var method = {};
method.returnTypeName = $(elem).find('.jd-typecol').text().trim().replace(/\n/g, '').replace(/\s+/g, ' ');;
method.name = $(elem).find('.jd-linkcol .sympad a').text().trim();
method.desc = $(elem).find('.jd-linkcol .jd-descrdiv').text().trim();
method.params = $(elem).find('.jd-linkcol nobr').first().contents().last().text().trim();
classObj.methods.push(method);
});
console.log(JSON.stringify(classObj, null, 4));
}
});
}, this);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment