Created
September 10, 2012 00:05
-
-
Save QQism/3688026 to your computer and use it in GitHub Desktop.
vietjet air flights crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Vietjet Air Crawlee</title> | |
<script type='text/javascript' src='https://ajax.googleapis.com/ajax/libs/jquery/1.8.1/jquery.min.js'></script> | |
<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.1.1/css/bootstrap-combined.min.css" rel="stylesheet"> | |
</head> | |
<body> | |
<script> | |
window.onload = function() { | |
var url = 'https://ameliaweb5.intelisys.ca/VietJet/ViewFlights.aspx'; | |
resp = ''; | |
$('#scrape-btn').click(function() { | |
$('#dummy-status').text('scraping...'); | |
$.ajax({ | |
url: url, | |
crossDomain: true, | |
success: function(data) { | |
$('#dummy-status').text('OK'); | |
console.log('Get flight page'); | |
resp = $(data).find('form')[0]; | |
$('#dummy1').html(resp); | |
show_input(); | |
fill_data(); | |
} | |
}); | |
}); | |
$('#post-btn').click(function() { | |
data = {}; | |
$('#dummy1 input').each(function(index, e) { data[e.name] = e.value; }); | |
$('#dummy1 select').each(function(index, e) { data[e.name] = e.value; }); | |
$('#dummy-status').text('scraping...'); | |
$.ajax({ | |
type: 'POST', | |
url: url, | |
data: data, | |
crossDomain: true, | |
success: function(data) { | |
$('#dummy-status').text('OK'); | |
console.log('Get flight page'); | |
resp = $(data).find('form')[0]; | |
$('#dummy2').html(resp); | |
export_data(); | |
} | |
}); | |
}); | |
var fill_data = function() { | |
console.log('Filling info'); | |
$('#dummy1 #lstOrigAP').val('SGN'); | |
$('#dummy1 #lstDestAP').val('HAN'); | |
$('#dummy1 #departure1').val('10/09/2012'); | |
$('#dummy1 #departure2').val('10/09/2012'); | |
$('#dummy1 #dlstDepDate_Month').val('2012/09'); | |
$('#dummy1 #dlstDepDate_Day').val('10'); | |
$('#dummy1 #dlstRetDate_Month').val('2012/09'); | |
$('#dummy1 #dlstRetDate_Day').val('10'); | |
$('#dummy1 #lstCurrency').val('VND'); | |
$('#dummy1 #button').val('vfto'); | |
}; | |
var export_data = function() { | |
flights = []; | |
$('#dummy2 .FlightsGrid').each(function(index, table) { | |
$(table).children('tbody').children('tr [class^=gridFlight]').each(function(index, row) { | |
var flight = {}; | |
var tds = $(row).children('td'); | |
// Flight info | |
var info = $(tds[0]).find('table > tbody > tr > td'); | |
var depart = $(info[0]).html().split('<br>'); | |
flight['startTime'] = depart[0]; | |
flight['startPlace'] = depart[1]; | |
var arrive = $(info[1]).html().split('<br>'); | |
flight['endTime'] = arrive[0]; | |
flight['endPlace'] = arrive[1]; | |
flight['detail'] = $(info[2]).text().trim(); | |
// avalbility, pricing | |
var pricing = $(tds[1]).find('table > tbody > tr > td'); | |
if (pricing.length === 3) { | |
flight['promo'] = $(pricing[0]).text(); | |
flight['eco'] = $(pricing[1]).text().trim(); | |
flight['flexi'] = $(pricing[2]).text().trim(); | |
} else { | |
// sold out | |
flight['error'] = pricing.text(); | |
} | |
flights.push(flight); | |
}); | |
}); | |
var xml_result = flights_to_xml(flights); | |
$('#dummy-result').text(xml_result); | |
}; | |
}; | |
var flights_to_xml = function(data) { | |
xml = []; | |
xml.push('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'); | |
xml.push('<flights>'); | |
data.forEach(function(flight, index) { | |
xml.push('<flight>'); | |
for (var prop in flight) { | |
xml.push('<' + prop + '>'); | |
xml.push(unescape(flight[prop])); | |
xml.push('</' + prop + '>'); | |
} | |
xml.push('</flight>'); | |
}) | |
xml.push('</flights>'); | |
result = xml.join(''); | |
return result; | |
}; | |
var show_input = function() { | |
var startTime = $('#dummy1 #departure1').clone(); | |
startTime.removeAttr('onclick readonly onblur onkeydown onfocus onchange'); | |
startTime.change(function() { | |
date = $(this).val().split(/[\/|-]/); // DD/MM/YYYY | |
$('#dummy1 #departure1').val(date.join('/')); | |
$('#dummy1 #departure2').val(date.join('/')); | |
$('#dummy1 #dlstDepDate_Month').val(date[2] + '/' + date[1]); | |
$('#dummy1 #dlstDepDate_Day').val(date[0]); | |
$('#dummy1 #dlstRetDate_Month').val(date[2] + '/' + date[1]); | |
$('#dummy1 #dlstRetDate_Day').val(date[0]); | |
}) | |
var startPlace = $('#dummy1 #lstOrigAP').clone(); | |
startPlace.removeAttr('onclick onchange onblur onfocus'); | |
startPlace.change(function() { | |
$('#dummy1 #lstOrigAP').val($(this).val()); | |
}); | |
var endPlace = $('#dummy1 #lstDestAP').clone(); | |
endPlace.removeAttr('onclick onchange onblur onfocus'); | |
endPlace.change(function() { | |
$('#dummy1 #lstDestAP').val($(this).val()); | |
}); | |
$('#prepare').append('<br/><span>Start Time </span>'); | |
$('#prepare').append(startTime); | |
$('#prepare').append('<br/><span>Start </span>'); | |
$('#prepare').append(startPlace); | |
$('#prepare').append('<br/><span>End </span>'); | |
$('#prepare').append(endPlace); | |
} | |
</script> | |
<h2>This is the micro crawler that scrapes Vietjet air flights</h2><br> | |
<b>On Mac, it work with only Safari (Chrome and Firefox both throw error due to security policy). Didn't test on Linux and Windows yet</b> | |
<h3>Steps</h3> | |
<ul> | |
<li>Click <b>Start</b> to prepare</li> | |
<li>Input the time (e.g 12/09/2012) and select the departure and destination</li> | |
<li>Click <b>Get flights</b> to retrieve xml flight data</li> | |
</ul> | |
<div id='prepare'> | |
<button class='btn' id='scrape-btn'>Start</button> | |
</div><br/> | |
<button class='btn' id='post-btn'>Get flights</button><br/> | |
<!-- status footer --> | |
<div style="position: fixed; bottom: 0px; right: 0; left: 0; background: grey; color: white"> | |
<span id='dummy-status'></span> | |
<span class='pull-right'>Quang Quach</span> | |
</div> | |
<!-- this part is for storing scraping content--> | |
<div id='dummy1' style='display: none'></div> | |
<div id='dummy2' style='display: none'></div> | |
<br/> | |
<div id='dummy3'> | |
<h4>Result</h4> | |
<code id='dummy-result'></code> | |
</div> | |
<br/><br/><br/> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Mr.QuangQuach, I'd like to see more demo of your crawlers, please show more and more. I am really interesting and apreciate your work. thank you very much.