Created
May 2, 2013 01:17
-
-
Save corydolphin/5499578 to your computer and use it in GitHub Desktop.
Parse Craigslist page for description, title, images, location, postTime, using cheerio and requests, Node.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var cheerio = require('cheerio') | |
| , request = require('request'); | |
| var example_url = 'http://boston.craigslist.org/gbs/sub/3779053559.html'; | |
| var parsePage = function(url, callback){ | |
| request(url, function(err, resp, body) { | |
| if (err){ | |
| callback(err); | |
| return | |
| } | |
| $ = cheerio.load(body); | |
| var listing_title = $('.postingtitle').text().trim(); | |
| var image_links = $('img').map(function(i, el){ | |
| return $(this).attr('src'); | |
| }); | |
| var listing_description = $('#postingbody').text(); | |
| var listing_latitude = $('#leaflet').attr('data-latitude'); | |
| var listing_longitude = $('#leaflet').attr('data-longitude'); | |
| var listing_id = $('.postinginfos p.postinginfo').first().text().match(/\d+/g)[0]; //grab digit group | |
| var listing_datestamp = $('.postinginfos p.postinginfo > date').first().attr('title'); | |
| callback(false, { | |
| title : listing_title, | |
| description: listing_description, | |
| images: image_links, | |
| latitude: listing_latitude, | |
| longitude: listing_longitude, | |
| postTime: listing_datestamp, | |
| id : listing_id | |
| }) | |
| }); | |
| } | |
| parsePage(example_url, function(err,res){console.log(res)}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment