Skip to content

Instantly share code, notes, and snippets.

@leandro
Last active November 11, 2016 18:16
Show Gist options
  • Save leandro/99651677bd4a80f32c70d4d3523ecaed to your computer and use it in GitHub Desktop.
Save leandro/99651677bd4a80f32c70d4d3523ecaed to your computer and use it in GitHub Desktop.
It lists the free tracks available in Last.fm (expected to run inside webtask.io)
var jsdom = require("jsdom");
var _ = require("lodash");
var WebCrawler = {};
var Storage = {};
!function (crawler) {
var getFreeDownloadableTracks = function(page, finishCallback) {
jsdom.env({
url: "http://www.last.fm/music/+free-music-downloads?page=" + page,
scripts: ["http://code.jquery.com/jquery.js"],
done: crawlPage(page, finishCallback)
});
};
var crawlPage = function(page, finishCallback) {
return function(err, window) {
var $ = window.$;
var totalPages = +_.trim($('ul.pagination .pages').text()).match(/of (\d+)/)[1];
var TRs = $("table.chartlist tbody tr");
var tracksCount = TRs.length;
console.log("Fetching %d free musics list in page %d/%d", tracksCount, page, totalPages);
TRs.each(function() {
var $el = $(this);
var artist = _.trim($('span.chartlist-ellipsis-wrap a:eq(0)', $el).text());
var track = _.trim($('span.chartlist-ellipsis-wrap a:eq(1)', $el).text());
var url = $('.chartlist-download-button', $el).attr('href');
if (artist && track && url) { storeTrack(artist, track, url); }
});
if (page < totalPages) {
getFreeDownloadableTracks(page + 1, finishCallback);
} else {
finishCallback(crawler);
}
}
};
var storeTrack = function(artist, track, url) {
if (!crawler.hasOwnProperty('cachedTracks')) { crawler.cachedTracks= []; }
crawler.cachedTracks.push([artist, track, url]);
};
var fetchTracksData = function(callback) { getFreeDownloadableTracks(1, callback); };
crawler.fetchTracks = fetchTracksData;
}(WebCrawler);
module.exports = function(ctx, done) {
WebCrawler.fetchTracks(function(crawler) {
crawler.cachedTracks.sort(function (a, b) {
var artistA = a[0].toLowerCase();
var artistB = b[0].toLowerCase();
var trackA = a[1].toLowerCase();
var trackB = b[1].toLowerCase();
if (artistA < artistB) { return -1; }
if (artistA > artistB) { return 1; }
if (trackA < trackB) { return -1; }
if (trackA > trackB) { return 1; }
return 0;
});
done(null, crawler.cachedTracks);
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment