Skip to content

Instantly share code, notes, and snippets.

# creating the entries for the user input, FROM, TO and dates
from_city_entry = tk.Entry(frame_main_1, textvariable = from_city1, width=4)
from_city_entry.bind("<KeyRelease>", caps_from) # everytime a key is released, it runs the caps_from function on the cell
to_city_entry = tk.Entry(frame_main_2, textvariable = to_city1, width=4)
to_city_entry.bind("<KeyRelease>", caps_to) # everytime a key is released, it runs the caps_to function on the cell
departure_date_entry = tk.Entry(frame_main_1, textvariable = departure_date1, width=12)
return_date_entry = tk.Entry(frame_main_2, textvariable = return_date1, width=12)
def caps_from(event):
"""Forces the input FROM to be upper case and less than 4 characters"""
from_city1.set(from_city1.get().upper())
if len(from_city1.get()) > 3: from_city1.set(from_city1.get()[:3])
def caps_to(event):
"""Forces the input TO to be upper case and less than 4 characters"""
to_city1.set(to_city1.get().upper())
if len(to_city1.get()) > 3: to_city1.set(to_city1.get()[:3])
# two additional frames go inside the center_frame
frame_main_1 = tk.Frame(center_frame, borderwidth=2, relief='sunken')
frame_main_2 = tk.Frame(center_frame, borderwidth=2, relief='sunken')
# and populate them with the labels referring to the inputs we want from the user
from_city = tk.Label(frame_main_1, text = "FROM: ")
to_city = tk.Label(frame_main_2, text = "TO: ")
departure_date = tk.Label(frame_main_1, text = " DEPARTURE DATE:")
return_date = tk.Label(frame_main_2, text = " RETURN DATE:")
# three frames on top of each other
frame_header = tk.Frame(window, borderwidth=2, pady=2)
center_frame = tk.Frame(window, borderwidth=2, pady=5)
bottom_frame = tk.Frame(window, borderwidth=2, pady=5)
frame_header.grid(row=0, column=0)
center_frame.grid(row=1, column=0)
bottom_frame.grid(row=2, column=0)
# label header to be placed in the frame_header
header = tk.Label(frame_header, text = "FLIGHT SCRAPER TOOL", bg='grey', fg='black', height='3', width='50', font=("Helvetica 16 bold"))
@fnneves
fnneves / flight_scraper_complete.py
Created June 27, 2019 09:34
full code for the flightscraper project
# -*- coding: utf-8 -*-
from time import sleep, strftime
from random import randint
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import smtplib
from email.mime.multipart import MIMEMultipart
import tkinter as tk
import easygui
import pandas as pd
from time import strftime
window = tk.Tk() # you may also see it named as "root" in other sources
window.title("FLIGHT SCRAPER") # self explanatory!
#window.geometry("600x600") # size of the window when it opens
#window.minsize(width=600, height=600) # you can define the minimum size of the window like this
city_from = input('From which city? ')
city_to = input('Where to? ')
date_start = input('Search around which departure date? Please use YYYY-MM-DD format only ')
date_end = input('Return when? Please use YYYY-MM-DD format only ')
# city_from = 'LIS'
# city_to = 'SIN'
# date_start = '2019-08-21'
# date_end = '2019-09-07'
def page_scrape():
"""This function takes care of the scraping part"""
xp_sections = '//*[@class="section duration"]'
sections = driver.find_elements_by_xpath(xp_sections)
sections_list = [value.text for value in sections]
section_a_list = sections_list[::2] # This is to separate the two flights
section_b_list = sections_list[1::2] # This is to separate the two flights
# if you run into a reCaptcha, you might want to do something about it
def start_kayak(city_from, city_to, date_start, date_end):
"""City codes - it's the IATA codes!
Date format - YYYY-MM-DD"""
kayak = ('https://www.kayak.com/flights/' + city_from + '-' + city_to +
'/' + date_start + '-flexible/' + date_end + '-flexible?sort=bestflight_a')
driver.get(kayak)
sleep(randint(8,10))
# sometimes a popup shows up, so we can use a try statement to check it and close
# Load more results to maximize the scraping
def load_more():
try:
more_results = '//a[@class = "moreButton"]'
driver.find_element_by_xpath(more_results).click()
# Printing these notes during the program helps me quickly check what it is doing
print('sleeping.....')
sleep(randint(45,60))
except:
pass