Created
December 23, 2011 22:36
-
-
Save karmadude/1515567 to your computer and use it in GitHub Desktop.
Web Scraping with Node
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/tmpvar/jsdom | |
// npm install jsdom | |
var jsdom = require('jsdom'); | |
function scrapeDribbble (url, page) { | |
dribbbles[page-1] = []; | |
if (page > 1) { | |
url = dribbbleURL + '/shots?page=' + page; | |
} | |
jsdom.env(url, [ | |
jqueryURL | |
], | |
function(errors, window) { | |
window.$(".dribbbles > li").each(function() { | |
var $ = window.$; | |
var dribbble = {}; | |
dribbble.id = $(this).attr('id').replace('screenshot-', ''); | |
dribbble.title = $(this).find('.dribbble-over strong').html(); | |
dribbble.comment = $(this).find('.dribbble-over .comment').html(); | |
dribbble.date = $(this).find('.dribbble-over em').html(); | |
dribbble.url = dribbbleURL + $(this).find('.dribbble-link').attr('href'); | |
dribbble.img = dribbbleURL + $(this).find('.dribbble-link img').attr('src'); | |
dribbble.stats = {}; | |
dribbble.stats.views = $(this).find('.views').html().trim() || 0; | |
dribbble.stats.comments = $(this).find('.cmnt a').html() || 0; | |
dribbble.stats.favs = $(this).find('.fav a').html() || 0; | |
dribbbles[page-1].push(dribbble); | |
}); | |
pagesProcessed++; | |
if( pagesProcessed === maxPages) { | |
console.log("Dribbbles Scraped: ", dribbbles.length); | |
console.log("Dribbbles JSON: ", JSON.stringify(dribbbles)); | |
} | |
}); | |
} | |
console.log('Dribbble Scraper'); | |
var dribbbleURL = 'http://dribbble.com'; | |
var jqueryURL = 'http://code.jquery.com/jquery-1.7.1.min.js'; | |
var dribbbles = []; | |
var maxPages = 10; | |
var pagesProcessed = 0; | |
for(var page = 1; page <= maxPages; page++) { | |
scrapeDribbble(dribbbleURL, page); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment