Skip to content

Instantly share code, notes, and snippets.

@kehanlu
Last active January 7, 2025 09:45
Show Gist options
  • Save kehanlu/cc8f4aa015f0dcaf19262c6ef9ba0e83 to your computer and use it in GitHub Desktop.
Save kehanlu/cc8f4aa015f0dcaf19262c6ef9ba0e83 to your computer and use it in GitHub Desktop.
get tickets with google flights (python webcrawler)

找機票

用爬蟲的方式透過 google flights 列出所有機票

方法

parse.py

用 selenium 爬蟲, 但 google 的 class 都有打亂,所以trace了很久

而且中途會遇到class會變的狀況,觀察了一陣子之後,先click搜尋框才能抓到我要的element,再輸入資料~

終於前進到要的頁面之後,就把html丟到BeautifulSoup裡面 之後再用ajax丟到前端就搞定了!

:D

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>找機票</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta.2/css/bootstrap.min.css" integrity="sha384-PsH8R72JQ3SOdhVi3uxftmaW6Vc51MKb0q5P2rRUpPvrszuE4W1povHYgTpBfshb" crossorigin="anonymous">
<!-- JQuery -->
<script src="http://apps.bdimg.com/libs/jquery/1.11.1/jquery.min.js"></script>
<style>
.search{
padding: 30px;
text-align: center;
}
.box{
margin-top: 30px;
}
h2{
font-size: 24px;
color: red;
padding: 0 10px 0 10px;
}
h2.title{
color:black;
margin-top:20px;
}
h3{
font-size: 18px;
padding: 10px;
}
h4{
font-size: 16px;
padding: 10px;
color: green;
}
</style>
</head>
<body>
<div class="row">
<div class="col-3"></div>
<div class="col-6">
<div class="search">
從 <input type="text" id="departure">
到 <input type="text" id="destination">
<br>
<button id="submit" type="button" class="btn btn-info" style="float: right;">搜尋</button>
</div>
<div id="output"></div>
</div>
<div class="col-3"></div>
</div>
</body>
<script>
$('#submit').click(function () {
$.ajax(
{
type:"GET",
url: "ajax",
data: {
des:$("#destination").val(),
dep:$("#departure").val(),
},
success: function(result){
var text="";
list_of_tickets = result['tickets'];
for(var i=0;i<list_of_tickets.length;i++){
console.log('ok');
text +=
"<div class='alert alert-secondary box'>"+
"<h2>"+list_of_tickets[i]['price']+"</h2>"+
"<h3>"+list_of_tickets[i]['company']+"</h3>"+
"<h4>"+list_of_tickets[i]['time']+" ("+list_of_tickets[i]['duration']+")</h4>"+
"<span class='badge badge-success'>"+list_of_tickets[i]['ticket_type']+"</span>"+
//"<span class='badge badge-primary'>"+list_of_tickets[i]['flight_type']+"</span>"+
"</div>";
}
des = $("#destination").val();
dep = $("#departure").val();
if(list_of_tickets.length > 0){
title =
"<h2 class='title'>"+
"找到 "+list_of_tickets.length+" 筆從 "+dep+" 到 "+des+" 的機票"+"</h2>";
$("#output").html(title+text);
}else{
$("#output").html("<h2 class='title'>找到 0 筆資料,換個關鍵字吧!</h2>");
}
}
});
})
</script>
</html>
# main parse code
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from bs4 import BeautifulSoup
def from_to(browser, departure, destination):
"""
open 'google.com/flights' default page
then enter departure and destination
return page source we want
"""
browser.get('https://www.google.com/flights/#search')
sleep(0.3)
keyIn(browser, departure, destination)
sleep(0.3)
return browser.page_source
def keyIn(browser, place1, place2):
"""
因為輸入時div的class會改變,所以先click之後才key關鍵字
"""
# XPATH
search_box_XPATH = "//div[@class='LJV2HGB-Ab-a']"
input_XPATH = "//input[@class='LJV2HGB-Mb-f']"
# click
browser.find_elements_by_xpath(search_box_XPATH)[0].click()
browser.find_elements_by_xpath(input_XPATH)[0].send_keys(place1)
sleep(0.3)
browser.find_elements_by_xpath(input_XPATH)[0].send_keys(Keys.RETURN)
browser.find_elements_by_xpath(search_box_XPATH)[1].click()
browser.find_elements_by_xpath(input_XPATH)[0].send_keys(place2)
sleep(0.3)
browser.find_elements_by_xpath(input_XPATH)[0].send_keys(Keys.RETURN)
def get_tickets(departure, destination):
"""
get tickets from 'google.com/flights' through selenium
return a list of Tickets
"""
browser = webdriver.Chrome()
html = from_to(browser, departure, destination)
browser.quit()
bs = BeautifulSoup(html, 'html.parser')
tickets = []
for ele in bs.find_all('div', 'LJV2HGB-d-W'):
try:
price = ele.find('div', 'LJV2HGB-d-Ab').text # 票價
ticket_type = ele.find('div', 'LJV2HGB-d-Cb').text # 票種
time = ele.find('div', 'LJV2HGB-d-Zb').text # 時間
company = ele.find('div', 'LJV2HGB-d-j').text # 航空公司
duration = ele.find('div', 'LJV2HGB-d-E').text # 飛行時間
flight_type = ele.find('div', 'LJV2HGB-d-Qb').text # 航班資訊(直達/轉機)
except:
continue
ticket = {
'price': price,
'ticket_type': ticket_type,
'time': time,
'company': company,
'duration': duration,
'flight_type': flight_type,
}
tickets.append(ticket)
return tickets
# djangoapp views
from django.shortcuts import render
from django.http import JsonResponse
from .parse import get_tickets
def index(request):
return render(request, 'plane/index.html', {})
def ajax(request):
departure = request.GET['dep']
destination = request.GET['des']
data = {
'tickets':get_tickets(departure, destination)
}
return JsonResponse(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment