-
-
Save SivaArwin/1309276033837b479a61a7986d42dc17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def start_kayak(city_from, city_to, date_start, date_end): | |
"""City codes - it's the IATA codes! | |
Date format - YYYY-MM-DD""" | |
kayak = ('https://www.kayak.com/flights/' + city_from + '-' + city_to + | |
'/' + date_start + '-flexible/' + date_end + '-flexible?sort=bestflight_a') | |
driver.get(kayak) | |
sleep(randint(8,10)) | |
# sometimes a popup shows up, so we can use a try statement to check it and close | |
try: | |
xp_popup_close = '//button[contains(@id,"dialog-close") and contains(@class,"Button-No-Standard-Style close ")]' | |
driver.find_elements_by_xpath(xp_popup_close)[5].click() | |
except Exception as e: | |
pass | |
sleep(randint(60,95)) | |
print('loading more.....') | |
# load_more() | |
print('starting first scrape.....') | |
df_flights_best = page_scrape() | |
df_flights_best['sort'] = 'best' | |
sleep(randint(60,80)) | |
# Let's also get the lowest prices from the matrix on top | |
matrix = driver.find_elements_by_xpath('//*[contains(@id,"FlexMatrixCell")]') | |
matrix_prices = [price.text.replace('$','') for price in matrix] | |
matrix_prices = list(map(int, matrix_prices)) | |
matrix_min = min(matrix_prices) | |
matrix_avg = sum(matrix_prices)/len(matrix_prices) | |
print('switching to cheapest results.....') | |
cheap_results = '//a[@data-code = "price"]' | |
driver.find_element_by_xpath(cheap_results).click() | |
sleep(randint(60,90)) | |
print('loading more.....') | |
# load_more() | |
print('starting second scrape.....') | |
df_flights_cheap = page_scrape() | |
df_flights_cheap['sort'] = 'cheap' | |
sleep(randint(60,80)) | |
print('switching to quickest results.....') | |
quick_results = '//a[@data-code = "duration"]' | |
driver.find_element_by_xpath(quick_results).click() | |
sleep(randint(60,90)) | |
print('loading more.....') | |
# load_more() | |
print('starting third scrape.....') | |
df_flights_fast = page_scrape() | |
df_flights_fast['sort'] = 'fast' | |
sleep(randint(60,80)) | |
# saving a new dataframe as an excel file. the name is custom made to your cities and dates | |
final_df = df_flights_cheap.append(df_flights_best).append(df_flights_fast) | |
final_df.to_excel('search_backups//{}_flights_{}-{}_from_{}_to_{}.xlsx'.format(strftime("%Y%m%d-%H%M"), | |
city_from, city_to, | |
date_start, date_end), index=False) | |
print('saved df.....') | |
# We can keep track of what they predict and how it actually turns out! | |
xp_loading = '//div[contains(@id,"advice")]' | |
loading = driver.find_element_by_xpath(xp_loading).text | |
xp_prediction = '//span[@class="info-text"]' | |
prediction = driver.find_element_by_xpath(xp_prediction).text | |
print(loading+'\n'+prediction) | |
# sometimes we get this string in the loading variable, which will conflict with the email we send later | |
# just change it to "Not Sure" if it happens | |
weird = '¯\\_(ツ)_/¯' | |
if loading == weird: | |
loading = 'Not sure' | |
username = '[email protected]' | |
password = 'YOUR PASSWORD' | |
server = smtplib.SMTP('smtp.outlook.com', 587) | |
server.ehlo() | |
server.starttls() | |
server.login(username, password) | |
msg = ('Subject: Flight Scraper\n\n\ | |
Cheapest Flight: {}\nAverage Price: {}\n\nRecommendation: {}\n\nEnd of message'.format(matrix_min, matrix_avg, (loading+'\n'+prediction))) | |
message = MIMEMultipart() | |
message['From'] = '[email protected]' | |
message['to'] = '[email protected]' | |
server.sendmail('[email protected]', '[email protected]', msg) | |
print('sent email.....') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment