Skip to content

Instantly share code, notes, and snippets.

# Load more results to maximize the scraping
def load_more():
try:
more_results = '//a[@class = "moreButton"]'
driver.find_element_by_xpath(more_results).click()
# Printing these notes during the program helps me quickly check what it is doing
print('sleeping.....')
sleep(randint(45,60))
except:
pass
def start_kayak(city_from, city_to, date_start, date_end):
"""City codes - it's the IATA codes!
Date format - YYYY-MM-DD"""
kayak = ('https://www.kayak.com/flights/' + city_from + '-' + city_to +
'/' + date_start + '-flexible/' + date_end + '-flexible?sort=bestflight_a')
driver.get(kayak)
sleep(randint(8,10))
# sometimes a popup shows up, so we can use a try statement to check it and close
def page_scrape():
"""This function takes care of the scraping part"""
xp_sections = '//*[@class="section duration"]'
sections = driver.find_elements_by_xpath(xp_sections)
sections_list = [value.text for value in sections]
section_a_list = sections_list[::2] # This is to separate the two flights
section_b_list = sections_list[1::2] # This is to separate the two flights
# if you run into a reCaptcha, you might want to do something about it
city_from = input('From which city? ')
city_to = input('Where to? ')
date_start = input('Search around which departure date? Please use YYYY-MM-DD format only ')
date_end = input('Return when? Please use YYYY-MM-DD format only ')
# city_from = 'LIS'
# city_to = 'SIN'
# date_start = '2019-08-21'
# date_end = '2019-09-07'
import tkinter as tk
import easygui
import pandas as pd
from time import strftime
window = tk.Tk() # you may also see it named as "root" in other sources
window.title("FLIGHT SCRAPER") # self explanatory!
#window.geometry("600x600") # size of the window when it opens
#window.minsize(width=600, height=600) # you can define the minimum size of the window like this
@fnneves
fnneves / flight_scraper_complete.py
Created June 27, 2019 09:34
full code for the flightscraper project
# -*- coding: utf-8 -*-
from time import sleep, strftime
from random import randint
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import smtplib
from email.mime.multipart import MIMEMultipart
# three frames on top of each other
frame_header = tk.Frame(window, borderwidth=2, pady=2)
center_frame = tk.Frame(window, borderwidth=2, pady=5)
bottom_frame = tk.Frame(window, borderwidth=2, pady=5)
frame_header.grid(row=0, column=0)
center_frame.grid(row=1, column=0)
bottom_frame.grid(row=2, column=0)
# label header to be placed in the frame_header
header = tk.Label(frame_header, text = "FLIGHT SCRAPER TOOL", bg='grey', fg='black', height='3', width='50', font=("Helvetica 16 bold"))
# two additional frames go inside the center_frame
frame_main_1 = tk.Frame(center_frame, borderwidth=2, relief='sunken')
frame_main_2 = tk.Frame(center_frame, borderwidth=2, relief='sunken')
# and populate them with the labels referring to the inputs we want from the user
from_city = tk.Label(frame_main_1, text = "FROM: ")
to_city = tk.Label(frame_main_2, text = "TO: ")
departure_date = tk.Label(frame_main_1, text = " DEPARTURE DATE:")
return_date = tk.Label(frame_main_2, text = " RETURN DATE:")
def caps_from(event):
"""Forces the input FROM to be upper case and less than 4 characters"""
from_city1.set(from_city1.get().upper())
if len(from_city1.get()) > 3: from_city1.set(from_city1.get()[:3])
def caps_to(event):
"""Forces the input TO to be upper case and less than 4 characters"""
to_city1.set(to_city1.get().upper())
if len(to_city1.get()) > 3: to_city1.set(to_city1.get()[:3])
# creating the entries for the user input, FROM, TO and dates
from_city_entry = tk.Entry(frame_main_1, textvariable = from_city1, width=4)
from_city_entry.bind("<KeyRelease>", caps_from) # everytime a key is released, it runs the caps_from function on the cell
to_city_entry = tk.Entry(frame_main_2, textvariable = to_city1, width=4)
to_city_entry.bind("<KeyRelease>", caps_to) # everytime a key is released, it runs the caps_to function on the cell
departure_date_entry = tk.Entry(frame_main_1, textvariable = departure_date1, width=12)
return_date_entry = tk.Entry(frame_main_2, textvariable = return_date1, width=12)