Skip to content

Instantly share code, notes, and snippets.

@corydolphin
Created May 2, 2013 01:17
Show Gist options
  • Select an option

  • Save corydolphin/5499578 to your computer and use it in GitHub Desktop.

Select an option

Save corydolphin/5499578 to your computer and use it in GitHub Desktop.
Parse Craigslist page for description, title, images, location, postTime, using cheerio and requests, Node.js
var cheerio = require('cheerio')
, request = require('request');
var example_url = 'http://boston.craigslist.org/gbs/sub/3779053559.html';
var parsePage = function(url, callback){
request(url, function(err, resp, body) {
if (err){
callback(err);
return
}
$ = cheerio.load(body);
var listing_title = $('.postingtitle').text().trim();
var image_links = $('img').map(function(i, el){
return $(this).attr('src');
});
var listing_description = $('#postingbody').text();
var listing_latitude = $('#leaflet').attr('data-latitude');
var listing_longitude = $('#leaflet').attr('data-longitude');
var listing_id = $('.postinginfos p.postinginfo').first().text().match(/\d+/g)[0]; //grab digit group
var listing_datestamp = $('.postinginfos p.postinginfo > date').first().attr('title');
callback(false, {
title : listing_title,
description: listing_description,
images: image_links,
latitude: listing_latitude,
longitude: listing_longitude,
postTime: listing_datestamp,
id : listing_id
})
});
}
parsePage(example_url, function(err,res){console.log(res)});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment