Skip to content

Instantly share code, notes, and snippets.

@yusuke024
Created June 6, 2015 20:47
Show Gist options
  • Select an option

  • Save yusuke024/9d7f8f4a5de1d1a075dc to your computer and use it in GitHub Desktop.

Select an option

Save yusuke024/9d7f8f4a5de1d1a075dc to your computer and use it in GitHub Desktop.
var url = require('url');
var Crawler = require('crawler');
var moment = require('moment');
var _ = require('underscore');
var c = new Crawler({
maxConnections: 20
});
c.queue({
uri: 'http://www.majorcineplex.com/en/cinema',
callback: catelogHandler
});
function catelogHandler(error, result, $) {
var uris = _.chain($('.cinema_name').toArray()).map(function (el) {
return $(el).attr('data-cinema-id');
}).map(function (id) {
return "http://www.majorcineplex.com/booking/search-results.php?cinemaId=" + id;
}).each(function (uri) {
c.queue({
uri: uri,
callback: cinemaHandler
});
})
}
function cinemaHandler(error, result, $) {
var showtimes = [];
var venue = $('#result-today .cinema-name').text().trim();
_.chain($('#result-today .result-item').toArray()).each(function (el) {
var cinema = $(el).find('.theater-name span').text().trim();
_.each($('.ss-item.group').toArray(), function (el) {
var movieName = $(el).find('.movie-name').text().trim();
_.each($(el).find('.showtimes.group ul li').toArray(), function (el) {
var datetimeString = $(el).text().trim() + " +07:00";
var showtimeDatetime = moment(datetimeString, 'HH:mm Z');
var showtime = { venue: venue, cinema: cinema, movie: movieName, showtime: showtimeDatetime.utcOffset(7).toISOString() };
showtimes.push(showtime);
console.log(showtime);
});
});
});
}
{
"name": "crawler",
"version": "1.0.0",
"description": "",
"main": "crawler.js",
"dependencies": {
"crawler": "^0.4.1",
"moment": "^2.9.0",
"underscore": "^1.7.0"
},
"devDependencies": {},
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC"
}
#!/bin/sh
echo "Cleaning stuffs..."
rm *_result
echo "Running MC crawler..."
/usr/bin/env node mc_crawler.js > mc_result
echo "Running SF crawler..."
/usr/bin/env node sf_crawler.js > sf_result
var url = require('url');
var Crawler = require('crawler');
var moment = require('moment');
var c = new Crawler({
maxConnections: 20
});
function parseMovie(movieString) {
var re = /(.+)\s(\(.+\))\s(\[.+\])/
var movie = movieString.replace(re, '$1');
return movie;
}
function parseDatetime(dateString, timeString) {
var datetimeString = (dateString + " " + timeString).slice(4);
var datetime = moment(datetimeString, "D MMM YYYY HH:mm")
return datetime.format('LLL')
}
function processData(cinema, movie, dateString, timeString) {
console.log(cinema);
console.log(parseMovie(movie));
var datetime = parseDatetime(dateString, timeString);
console.log(datetime);
console.log("==========================");
}
function screenHandler(error, result, $) {
$('.CinemaLinkSml').each(function(index, a) {
var uri = 'http://booking.sfcinemacity.com/' + $(this).attr('href');
c.queue({
uri: uri,
callback: function(error, result, $) {
var cinema = $('#visOrderTracker_txtCinemaDetails').text();
var movie = $('#visOrderTracker_txtMovieDetails').text();
var date = $('#visOrderTracker_txtSessionDateDetails').text();
var time = $('#visOrderTracker_txtSessionTimeDetails').text();
processData(cinema, movie, date, time);
}
});
});
};
c.queue({
uri: 'http://booking.sfcinemacity.com/visSelectCinema.aspx',
callback: function(error, result, $) {
var queue = [];
$('.CinemaLinkSml').each(function(index, a) {
var id = $(this).attr('id');
var uri = 'http://booking.sfcinemacity.com/visSelectMovie.aspx?visSearchBy=cin&visCinID=' + id;
c.queue({
uri: uri,
callback: function(error, result, $) {
$('.CinemaLinkSml').each(function(index, a) {
if ($(this).text() === 'or show ALL MOVIES showtimes') {
return;
}
var uri = 'http://booking.sfcinemacity.com/' + $(this).attr('href');
c.queue({
uri: uri,
callback: screenHandler
});
});
}
});
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment