Created
June 6, 2015 20:47
-
-
Save yusuke024/9d7f8f4a5de1d1a075dc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var url = require('url'); | |
| var Crawler = require('crawler'); | |
| var moment = require('moment'); | |
| var _ = require('underscore'); | |
| var c = new Crawler({ | |
| maxConnections: 20 | |
| }); | |
| c.queue({ | |
| uri: 'http://www.majorcineplex.com/en/cinema', | |
| callback: catelogHandler | |
| }); | |
| function catelogHandler(error, result, $) { | |
| var uris = _.chain($('.cinema_name').toArray()).map(function (el) { | |
| return $(el).attr('data-cinema-id'); | |
| }).map(function (id) { | |
| return "http://www.majorcineplex.com/booking/search-results.php?cinemaId=" + id; | |
| }).each(function (uri) { | |
| c.queue({ | |
| uri: uri, | |
| callback: cinemaHandler | |
| }); | |
| }) | |
| } | |
| function cinemaHandler(error, result, $) { | |
| var showtimes = []; | |
| var venue = $('#result-today .cinema-name').text().trim(); | |
| _.chain($('#result-today .result-item').toArray()).each(function (el) { | |
| var cinema = $(el).find('.theater-name span').text().trim(); | |
| _.each($('.ss-item.group').toArray(), function (el) { | |
| var movieName = $(el).find('.movie-name').text().trim(); | |
| _.each($(el).find('.showtimes.group ul li').toArray(), function (el) { | |
| var datetimeString = $(el).text().trim() + " +07:00"; | |
| var showtimeDatetime = moment(datetimeString, 'HH:mm Z'); | |
| var showtime = { venue: venue, cinema: cinema, movie: movieName, showtime: showtimeDatetime.utcOffset(7).toISOString() }; | |
| showtimes.push(showtime); | |
| console.log(showtime); | |
| }); | |
| }); | |
| }); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "name": "crawler", | |
| "version": "1.0.0", | |
| "description": "", | |
| "main": "crawler.js", | |
| "dependencies": { | |
| "crawler": "^0.4.1", | |
| "moment": "^2.9.0", | |
| "underscore": "^1.7.0" | |
| }, | |
| "devDependencies": {}, | |
| "scripts": { | |
| "test": "echo \"Error: no test specified\" && exit 1" | |
| }, | |
| "author": "", | |
| "license": "ISC" | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| echo "Cleaning stuffs..." | |
| rm *_result | |
| echo "Running MC crawler..." | |
| /usr/bin/env node mc_crawler.js > mc_result | |
| echo "Running SF crawler..." | |
| /usr/bin/env node sf_crawler.js > sf_result |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var url = require('url'); | |
| var Crawler = require('crawler'); | |
| var moment = require('moment'); | |
| var c = new Crawler({ | |
| maxConnections: 20 | |
| }); | |
| function parseMovie(movieString) { | |
| var re = /(.+)\s(\(.+\))\s(\[.+\])/ | |
| var movie = movieString.replace(re, '$1'); | |
| return movie; | |
| } | |
| function parseDatetime(dateString, timeString) { | |
| var datetimeString = (dateString + " " + timeString).slice(4); | |
| var datetime = moment(datetimeString, "D MMM YYYY HH:mm") | |
| return datetime.format('LLL') | |
| } | |
| function processData(cinema, movie, dateString, timeString) { | |
| console.log(cinema); | |
| console.log(parseMovie(movie)); | |
| var datetime = parseDatetime(dateString, timeString); | |
| console.log(datetime); | |
| console.log("=========================="); | |
| } | |
| function screenHandler(error, result, $) { | |
| $('.CinemaLinkSml').each(function(index, a) { | |
| var uri = 'http://booking.sfcinemacity.com/' + $(this).attr('href'); | |
| c.queue({ | |
| uri: uri, | |
| callback: function(error, result, $) { | |
| var cinema = $('#visOrderTracker_txtCinemaDetails').text(); | |
| var movie = $('#visOrderTracker_txtMovieDetails').text(); | |
| var date = $('#visOrderTracker_txtSessionDateDetails').text(); | |
| var time = $('#visOrderTracker_txtSessionTimeDetails').text(); | |
| processData(cinema, movie, date, time); | |
| } | |
| }); | |
| }); | |
| }; | |
| c.queue({ | |
| uri: 'http://booking.sfcinemacity.com/visSelectCinema.aspx', | |
| callback: function(error, result, $) { | |
| var queue = []; | |
| $('.CinemaLinkSml').each(function(index, a) { | |
| var id = $(this).attr('id'); | |
| var uri = 'http://booking.sfcinemacity.com/visSelectMovie.aspx?visSearchBy=cin&visCinID=' + id; | |
| c.queue({ | |
| uri: uri, | |
| callback: function(error, result, $) { | |
| $('.CinemaLinkSml').each(function(index, a) { | |
| if ($(this).text() === 'or show ALL MOVIES showtimes') { | |
| return; | |
| } | |
| var uri = 'http://booking.sfcinemacity.com/' + $(this).attr('href'); | |
| c.queue({ | |
| uri: uri, | |
| callback: screenHandler | |
| }); | |
| }); | |
| } | |
| }); | |
| }); | |
| } | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment