Last active
November 11, 2016 18:16
-
-
Save leandro/99651677bd4a80f32c70d4d3523ecaed to your computer and use it in GitHub Desktop.
It lists the free tracks available in Last.fm (expected to run inside webtask.io)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var jsdom = require("jsdom"); | |
var _ = require("lodash"); | |
var WebCrawler = {}; | |
var Storage = {}; | |
!function (crawler) { | |
var getFreeDownloadableTracks = function(page, finishCallback) { | |
jsdom.env({ | |
url: "http://www.last.fm/music/+free-music-downloads?page=" + page, | |
scripts: ["http://code.jquery.com/jquery.js"], | |
done: crawlPage(page, finishCallback) | |
}); | |
}; | |
var crawlPage = function(page, finishCallback) { | |
return function(err, window) { | |
var $ = window.$; | |
var totalPages = +_.trim($('ul.pagination .pages').text()).match(/of (\d+)/)[1]; | |
var TRs = $("table.chartlist tbody tr"); | |
var tracksCount = TRs.length; | |
console.log("Fetching %d free musics list in page %d/%d", tracksCount, page, totalPages); | |
TRs.each(function() { | |
var $el = $(this); | |
var artist = _.trim($('span.chartlist-ellipsis-wrap a:eq(0)', $el).text()); | |
var track = _.trim($('span.chartlist-ellipsis-wrap a:eq(1)', $el).text()); | |
var url = $('.chartlist-download-button', $el).attr('href'); | |
if (artist && track && url) { storeTrack(artist, track, url); } | |
}); | |
if (page < totalPages) { | |
getFreeDownloadableTracks(page + 1, finishCallback); | |
} else { | |
finishCallback(crawler); | |
} | |
} | |
}; | |
var storeTrack = function(artist, track, url) { | |
if (!crawler.hasOwnProperty('cachedTracks')) { crawler.cachedTracks= []; } | |
crawler.cachedTracks.push([artist, track, url]); | |
}; | |
var fetchTracksData = function(callback) { getFreeDownloadableTracks(1, callback); }; | |
crawler.fetchTracks = fetchTracksData; | |
}(WebCrawler); | |
module.exports = function(ctx, done) { | |
WebCrawler.fetchTracks(function(crawler) { | |
crawler.cachedTracks.sort(function (a, b) { | |
var artistA = a[0].toLowerCase(); | |
var artistB = b[0].toLowerCase(); | |
var trackA = a[1].toLowerCase(); | |
var trackB = b[1].toLowerCase(); | |
if (artistA < artistB) { return -1; } | |
if (artistA > artistB) { return 1; } | |
if (trackA < trackB) { return -1; } | |
if (trackA > trackB) { return 1; } | |
return 0; | |
}); | |
done(null, crawler.cachedTracks); | |
}); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment