Skip to content

Instantly share code, notes, and snippets.

@avermeulen
Created April 22, 2014 23:10
Show Gist options
  • Save avermeulen/11197548 to your computer and use it in GitHub Desktop.
Save avermeulen/11197548 to your computer and use it in GitHub Desktop.
Html Scraping using PhantomJS and JQuery
var page = require('webpage').create();
page.onAlert = function (msg) {
console.log('alert!!> ' + msg);
};
page.onConsoleMessage = function (msg, line, source) {
console.log('console> ' + msg);
};
//http://www.f1calendar.com
page.open('http://www.f1calendar.com', function(status) {
console.log("status : " + status);
page.injectJs('./jquery-1.11.0.js');
if (!phantom.state){
page.evaluate(function(){
var getRaces = function(){
var races = $("table#formula_one_events tbody");
var raceList = {};
$.each(races, function(idx, race){
var events = $(race).find(".vevent");
$.each(events, function(id, evt){
var raceName = $(evt).find(".event-column .extra:first").text()
.replace(",", "")
.replace("Formula 1 ", "");
var theRace = raceList[raceName] = raceList[raceName] || {events : []};
var eventType = $(evt).find(".event-column .category").text();
var eventTime = $(evt).find(".date-column .dtstart").attr("title");
theRace.events.push( {eventType : eventType, eventTime : eventTime} );
});
});
return raceList;
};
var formatRaceEventDetails = function(raceList){
var raceDetailsList = [];
for(var race in raceList){
//console.log("race : " + race);
var events = raceList[race].events;
var e = {};
events.forEach(function(ev){
switch(ev.eventType){
case "First Practice Session":
e.practice1 = ev.eventTime;
break;
case "Second Practice Session":
e.practice2 = ev.eventTime;
break;
case "Third Practice Session":
e.practice3 = ev.eventTime;
break;
case "Qualifying Session":
e.qualifying = ev.eventTime;
break;
case "Grand Prix":
e.race = ev.eventTime;
break;
};
});
if (race.trim() !== ""){
raceDetailsList.push({
raceName : race.trim(),
events : e
});
}
}
return raceDetailsList;
};
var raceDetailsList = formatRaceEventDetails(getRaces());
console.log(JSON.stringify(raceDetailsList));
});
}
else{
console.log('state');
}
phantom.exit();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment