Sample input files generator: https://scrapers.skypicker.com:5000/generate_sample_data?airline=5J helen/pajicek
Please use sGrab (sGrab.py) and Error codes (airlines_exceptions.py)
Sample input files generator: https://scrapers.skypicker.com:5000/generate_sample_data?airline=5J helen/pajicek
Please use sGrab (sGrab.py) and Error codes (airlines_exceptions.py)
| # -*- coding: utf-8 -*- | |
| import sys | |
| #sys.path.append('/srv/scrapers') | |
| #sys.path.append('/srv/scrapers/simple') | |
| import re | |
| import argparse | |
| import logging | |
| import csv | |
| import requests | |
| import json | |
| import lxml.html | |
| import pycurl | |
| import pytz | |
| import socket | |
| import traceback | |
| import ujson | |
| from pytz import country_timezones | |
| import random | |
| from random import choice | |
| from decimal import Decimal | |
| from itertools import islice | |
| from datetime import datetime, timedelta | |
| from dateutil.relativedelta import relativedelta | |
| from requests import Session | |
| from random import choice | |
| from collections import OrderedDict | |
| from grab import Grab | |
| from pprint import pprint as pp | |
| import os | |
| from time import sleep | |
| import time | |
| sys.path.append('/root/Scrapers') | |
| sys.path.append('/srv/Scrapers') | |
| from scraperlib.s_grab import * | |
| booking_proxies = [ | |
| "192.81.214.211:8888", | |
| "37.139.23.93:8888", | |
| "128.199.221.61:8888", | |
| "178.62.50.177:8888", | |
| "188.226.169.149:8888", | |
| "192.81.212.107:8888", | |
| "107.170.165.55:8888", | |
| ] | |
| dev_ips = [ | |
| "146.185.172.28", | |
| "188.166.6.171", | |
| ] | |
| tz = pytz.timezone(country_timezones("CZ")[0]) | |
| def save_file(filename = "test.html", path = "/srv/Scrapers/booking/airlines/html/", body = ""): | |
| """Save html page to file, with timestamp in filename""" | |
| final_name = "%s%s_%s" % (path, filename, datetime.now(tz).strftime("%H-%M_%d-%m-%Y")) | |
| final_name = final_name.replace(".html","") | |
| final_name += ".html" | |
| print "saving page as %s ..." % final_name | |
| with open(final_name,"wb+") as f: | |
| f.write(body) | |
| class error_payment_failed(Exception): | |
| def __init__(self, info=""): | |
| self.message = "payment_failed" | |
| self.info = info | |
| class BaseAirline(object): | |
| """Base class for all airline scrapers. Provides interface to be | |
| implemented and some useful tools for scraping shit. | |
| """ | |
| #! IATA airline code. | |
| code = None | |
| childs_max_age = None | |
| folder = '/srv/results/' | |
| output = '' | |
| html_url = "" | |
| price = 0.0 | |
| def __init__(self): | |
| """Initializes airline scraper.""" | |
| assert self.code, "IATA airline code must be defined." | |
| self._session = Session() | |
| def _to_html(self, response): | |
| """Parses given HTTP response into HTML DOM object. | |
| :param response: HTTP response. | |
| :type response: :class:`requests.Response` | |
| """ | |
| return lxml.html.fromstring(response.text) | |
| def _to_price(self, value, thousand_sep=',', dec_sep='.'): | |
| """Parses given string into Decimal object holding | |
| amount of currency. Separator defaults are set according to | |
| English customs. | |
| """ | |
| value = value.replace(thousand_sep, '').replace(dec_sep, '.') | |
| value = re.sub(r'[^\d\.]', '', value) | |
| return Decimal(value) | |
| def parse_args(self): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--json_name", type=str, | |
| help="path to json file") | |
| if len(sys.argv)==1: | |
| parser.print_help() | |
| sys.exit(1) | |
| parser.parse_args() | |
| args = parser.parse_args() | |
| with open(args.json_name,"r") as f: | |
| content =f.read() | |
| self.json_data = json.loads(content) | |
| count = 0 | |
| for flight in self.json_data["flights"]: | |
| self.json_data["flights"][count]['departure'] = datetime.strptime(flight['departure'], "%Y-%m-%d %H:%M") | |
| self.json_data["flights"][count]['arrival'] = datetime.strptime(flight['arrival'], "%Y-%m-%d %H:%M") | |
| count += 1 | |
| count = 0 | |
| for p in self.json_data["passengers"]: | |
| self.json_data["passengers"][count]['birthday'] = datetime.strptime(p['birthday'], "%Y-%m-%d") | |
| count += 1 | |
| self.json_data["exp"] = datetime.strptime(self.json_data["exp"], "%m/%y") | |
| def prepare_input(self): | |
| assert self.childs_max_age, "childs_max_age is not set." | |
| """ prepare input data for booking scraper (childs, return_flight, userfriendly names)""" | |
| passengers = sorted(self.json_data["passengers"], key=lambda x:x["birthday"]) | |
| childs = [p for p in passengers if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.childs_max_age)] | |
| childs_b_days = [p["birthday"] for p in passengers if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.childs_max_age)] | |
| #TODO new | |
| #infants = [p for p in childs if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.infants_max_age)] | |
| #infants_b_days = [p["birthday"] for p in childs if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.infants_max_age)] | |
| #TODO new | |
| flights = self.json_data["flights"] | |
| baggage = 0 | |
| for x in self.json_data["passengers"]: | |
| baggage += int(x["bags"]) | |
| card = self.json_data["cardnumber"] | |
| cvc = self.json_data["CVV"] | |
| expiration = self.json_data["exp"] | |
| max_price = self.json_data["maxprice"] | |
| email = self.json_data["email"] | |
| phone = self.json_data["phone"] | |
| holder = self.json_data["nameoncard"] | |
| return_flight = None | |
| if len(flights) > 1: | |
| if flights[1]["departure"] > flights[0]["departure"]: | |
| return_flight = flights[1] | |
| departure_flight = flights[0] | |
| else: | |
| return_flight = flights[0] | |
| departure_flight = flights[1] | |
| else: | |
| departure_flight = flights[0] | |
| g = sGrab() | |
| g.transport.curl.setopt(pycurl.SSL_VERIFYPEER, 0) | |
| g.transport.curl.setopt(pycurl.SSL_VERIFYHOST, 0) | |
| #g.transport.curl.setopt(pycurl.SSLVERSION, 3) | |
| g.transport.curl.setopt(pycurl.SSL_CIPHER_LIST, 'SSLv3') | |
| if [(s.connect(('8.8.8.8', 80)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1] not in dev_ips: | |
| proxy = choice(booking_proxies) | |
| print "I will use this proxy %s" % proxy | |
| self.proxy = proxy | |
| g.setup(proxy=proxy, proxy_type='http', connect_timeout=5, timeout=5) | |
| g.setup(hammer_mode=True, hammer_timeouts=((40, 40),)) | |
| # set default adult if only child is in input and it is only check! | |
| if (len(passengers) - len(childs)) is 0 and str(int(max_price)) is "1": | |
| childs = [] | |
| passengers = [ { | |
| "bags" : baggage, | |
| "birthday" : datetime.now() - timedelta(days = 365.25 * 25), | |
| "cardno" : "A9449115", | |
| "doctype" : "P", | |
| "expiration" : "2064-11-19", | |
| "familyname" : "Novak", | |
| "firstname" : "Jan", | |
| "nationality" : "CZ", | |
| "title" : "Mr", | |
| "visa" : None | |
| } ] | |
| print "PASS %s" % ", ".join(["%s %s" % (p["firstname"], p["familyname"]) for p in passengers]) | |
| return (passengers, childs, baggage, card, cvc, expiration, | |
| max_price, email, phone, holder, departure_flight, return_flight, g, childs_b_days) | |
| def _to_datetime(self, value, format='%Y-%m-%dT%H:%M:%S'): | |
| """Parses :class:`datetime.datetime`. If both *value* and *format* | |
| given as iterables of two elements, they're considered | |
| separately as date and time. | |
| """ | |
| value_is_str = isinstance(value, basestring) | |
| format_is_str = isinstance(format, basestring) | |
| if not value_is_str and not format_is_str: | |
| # parse date and time separately, then combine | |
| return datetime.combine( | |
| datetime.strptime(value[0], format[0]).date(), | |
| datetime.strptime(value[1], format[1]).timetz(), | |
| ) | |
| if value_is_str and format_is_str: | |
| # parse date and time together | |
| return datetime.strptime(value, format) | |
| else: | |
| raise TypeError("Unrecognized combination of arguments.") | |
| def save_file(self, filename = "page.html", path = "/srv/Scrapers/booking/airlines/html/", body = ""): | |
| """Save html page to file, with timestamp in filename""" | |
| path += "%s/" % self.code | |
| if not os.path.exists(path): | |
| os.makedirs(path) | |
| tz = pytz.timezone(country_timezones("CZ")[0]) | |
| bid = 0 | |
| try: | |
| if "bid" not in self.json_data: | |
| bid = 0 | |
| else: | |
| bid = self.json_data["bid"] | |
| except Exception, e: | |
| print e | |
| final_name = "%s%s_%s_%s_%s" % (path, filename, bid, datetime.now(tz).strftime("%H-%M_%d-%m-%Y"), random.random()) | |
| final_name = final_name.replace(".html","") | |
| final_name += ".html" | |
| print "saving page as %s ..." % final_name | |
| with open(final_name,"wb+") as f: | |
| f.write(body) | |
| self.html_url = final_name | |
| def output_error(self, msg = 'not_found', data = "", html_url = ""): | |
| html_url = self.html_url | |
| sys.stderr.write(ujson.dumps({'error':msg, 'data':data, 'html_url':html_url, 'datetime':datetime.now(tz).strftime("%d-%m-%Y_%H:%M")})) | |
| sys.exit(1) | |
| #depreciate | |
| def output_pricechange(self, new_price = 1): | |
| sys.stderr.write(json.dumps({"max_price":self.json_data["maxprice"],"fresh_price":float(new_price), "timestamp": datetime.now(tz).strftime("%d-%m-%Y_%H:%M")})) | |
| sys.exit(1) | |
| # use this | |
| def check_price(self, amount, currency): | |
| amount = self._to_price(str(amount)) | |
| print amount, currency | |
| skypicker_currency_url = "https://cz.skypicker.com/rates/" | |
| rates = self._session.get(skypicker_currency_url + currency.lower()).json() | |
| amount = float(rates['value']) * float(amount) | |
| if float(amount) > float(self.json_data["maxprice"]): | |
| error_msg = { | |
| "ids":[f['id'] for f in self.json_data["flights"]], | |
| "max_price":self.json_data["maxprice"], | |
| "fresh_price":float(amount), | |
| "status":"price_change", | |
| "datetime":datetime.now(tz).strftime("%H-%M_%d-%m-%Y") | |
| } | |
| sys.stderr.write(json.dumps(error_msg)) | |
| exit(1) | |
| self.start_time = time.time() | |
| #self.waiting_on_semaphore() | |
| self.price = amount | |
| print amount, "EUR" | |
| def waiting_on_semaphore(self): | |
| semaphore_url = "https://cz.skypicker.com/api/v0.1/automatic_booking_process_status" | |
| status_data = self._session.get(semaphore_url + "?bid=%s&iata=%s" % (self.json_data["bid"], self.code)).json() | |
| if status_data["status"] is "canceled": | |
| raise Exception("payment canceled") | |
| if status_data["status"] is "pending": | |
| if time.time() - self.start_time > (60 * 5): | |
| self.book_flight() | |
| else: | |
| sleep(5) | |
| self.waiting_on_semaphore() | |
| if status_data["status"] is "ok": | |
| self.json_data.update(status_data) #update card data | |
| def output_res_number(self, reservation_number, additional_info={}): | |
| response = {"reservation_number":reservation_number, "price": str(self.price), 'html_url':self.html_url, 'datetime': datetime.now(tz).strftime("%d-%m-%Y_%H:%M")} | |
| response.update(additional_info) | |
| sys.stderr.write(json.dumps(response)) | |
| return True | |
| #sys.exit(1) | |
| ## helpers | |
| #depreciate | |
| def to_eur(self,curr,amount): | |
| rates = self._session.get("https://cz.skypicker.com/rates/" + curr.lower()).json() | |
| return round(rates['value'] * amount,2) | |
| def parse_price(self,string): | |
| prices = re.findall(r"([\d+]+)",string) | |
| float_places = 0 | |
| full_numbers = 0 | |
| if len(prices) == 1: | |
| float_places = float(prices[0]) | |
| elif len(prices) > 1: | |
| if len(prices[-1]) != 3: | |
| float_places = float("0."+prices[-1]) | |
| full_numbers = int("".join(prices[0:-1])) | |
| else: | |
| full_numbers = int("".join(prices)) | |
| return float(float_places) + float(full_numbers) | |
| #helper for develop | |
| def compare_dicts(self, original, used): | |
| for k,v in original.items(): | |
| if k in used: | |
| if str(v) != str(used[k]): | |
| print "KEY: %s ... >%s<(orig) VS >%s<(script)" % (k, v, used[k]) | |
| else: | |
| print "KEY %s:%s is not in params" % (k, v) |
| SEARCH_FAILED = "search_failed" | |
| PAYMENT_FAILED = "payment_failed" | |
| PRICE_CHANGED = "price_changed" | |
| GETTING_RES_CODE_FAILED = "getting_res_code_failed" | |
| UNKNOW_CURRENCY = "unknow_currency" | |
| FLIGHT_NOT_FOUND = "flight_not_found" | |
| BOOKING_ON_MAIL = "booking_on_mail" | |
| CANT_BOOK_BAGS = "cant_book_bags" | |
| CANT_BOOK_BABY = "cant_book_baby" | |
| DUPLICATE_NAMES = "duplicate_names" | |
| UNEXPECTED_ERROR = "unexpected_error" | |
| AIRLINE_WEB_DOWN = "airline_web_down" | |
| LOGIN_FAILED = "login_failed" | |
| USE_POLICY = "use_policy" | |
| ERR_CODES = { | |
| SEARCH_FAILED:"Search failed. %s", | |
| PAYMENT_FAILED:"Payment failed. %s", | |
| PRICE_CHANGED:"Price changed. %s", | |
| GETTING_RES_CODE_FAILED:"Problem with get reservation code. %s", | |
| UNKNOW_CURRENCY:"Unknow currency. %s", | |
| FLIGHT_NOT_FOUND:"Flight not found. %s", | |
| BOOKING_ON_MAIL:"Booking on email. %s", | |
| CANT_BOOK_BAGS:"Can't book bags. %s", | |
| CANT_BOOK_BABY:"Can't book baby or child. %s", | |
| DUPLICATE_NAMES:"Passengers with same name did not pass validation. %s", | |
| UNEXPECTED_ERROR:"Unexpected error %s", | |
| AIRLINE_WEB_DOWN:"Arline website seems down %s", | |
| LOGIN_FAILED:"Can't log in on airline website %s", | |
| USE_POLICY:"Airline blocking our booking automatas %s" | |
| } | |
| html_url_path = "http://www3.skypicker.com:12555/last_page_of_booking/" | |
| class BookingError(Exception): | |
| def __init__(self, error_code, info="", html_url=""): | |
| self.error_code = error_code | |
| if html_url != "": | |
| html_url = html_url_path + html_url.split("/")[-1] | |
| self.html_url = html_url | |
| try: | |
| self.message = ERR_CODES[error_code] % info | |
| except Exception, e: | |
| print "%s not found!" % error_code | |
| self.message = ERR_CODES[UNEXPECTED_ERROR] | |
| def __str__(self): | |
| return "%s: %s" % (self.error_code, self.message) | |
| """ README | |
| Feel free to add new error code ;) | |
| USAGE in booking scripts: | |
| self.output_error(SEARCH_FAILED) | |
| self.output_error(LOGIN_FAILED) | |
| in core.py it is called like: | |
| if jsn.get("error") in ERR_CODES: | |
| raise BookingError(jsn.get("error"), jsn.get("data")) | |
| """ |
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*-\n | |
| import ujson | |
| import urllib | |
| import pycurl | |
| import re | |
| import sys | |
| import json | |
| import ast | |
| import lxml | |
| import lxml.html | |
| import traceback | |
| from time import sleep | |
| from pprint import pprint as pp | |
| from datetime import datetime, timedelta | |
| from airlines import * | |
| from airlines_exceptions import * | |
| reload(sys) | |
| sys.setdefaultencoding('utf-8') | |
| __author__ = "your name" | |
| __editor__ = "" #TODO | |
| class Airline(BaseAirline): | |
| code = "" #TODO | |
| childs_max_age = 0 #TODO | |
| def book_flight(self): | |
| (passengers, childs, baggage, card, cvc, expiration, max_price, email, | |
| phone, holder, departure_flight, return_flight, g, childs_b_days) = self.prepare_input() | |
| #TODO magic | |
| contact_detail = { | |
| "title":"MR", | |
| "firstName":"Oliver", | |
| "lastName":"Dlouhy", | |
| "street":"Bakalovo nabrezi 2", | |
| "zipCode":"63900", | |
| "city":"Brno", | |
| "country":"CZ", | |
| "email":email, | |
| "repeatemail":email, | |
| "phoneNumber":"+380"+phone, | |
| } | |
| self.check_price(price, currency) | |
| g.setup(hammer_mode=True, hammer_timeouts=((300, 300),)) | |
| try: | |
| #TODO payment | |
| self.save_file(filename="airline.html", body=g.response.body) | |
| self.output_res_number("not parsed yet") #TODO ask me | |
| except Exception, e: | |
| self.output_error(msg=PAYMENT_FAILED) | |
| if __name__ == "__main__": | |
| airline = Airline() | |
| airline.parse_args() | |
| airline.book_flight() |
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*-\n | |
| import ujson | |
| import urllib | |
| import pycurl | |
| import re | |
| import sys | |
| import json | |
| from lxml import etree | |
| from time import sleep | |
| from pprint import pprint as pp | |
| from datetime import datetime, timedelta | |
| from grab import Grab | |
| from airlines_exceptions import * | |
| from airlines import * | |
| from airlines_exceptions import BookingError | |
| __author__ = 'Ladislav Radoň, [email protected]' | |
| reload(sys) | |
| sys.setdefaultencoding('utf-8') | |
| class EastarJet(BaseAirline): | |
| code = "ZE" | |
| childs_max_age = 12 | |
| def book_flight(self): | |
| (passengers, childs, baggage, card, cvc, expiration, max_price, email, | |
| phone, holder, departure_flight, return_flight, g, childs_b_days) = self.prepare_input() | |
| domestic = ["GMP", "KUV", "CJU", "CJJ", "ICN"] # domestic airports (SOUTH KOREA) | |
| # get session | |
| # g.setup(timeout=150) | |
| g.setup(hammer_mode=True, hammer_timeouts=((200, 200),)) | |
| g.go("http://www.eastarjet.com/book/index.htm") | |
| post_data = {} | |
| if departure_flight["from"] in domestic and departure_flight["to"] in domestic: | |
| __cd_station = "DOM" | |
| post_data.update({ | |
| "cd_fromcountry":"KR", | |
| "cd_tocountry":"KR", | |
| }) | |
| else: | |
| __cd_station = "INT" | |
| post_data.update({ | |
| "method":"quickStep", | |
| "cd_station": __cd_station, | |
| "cd_return": 0, | |
| "cd_fromline": departure_flight["from"], | |
| "nm_fromline":'', | |
| "cd_toline": departure_flight["to"], | |
| "nm_toline":'', | |
| "dt_from": departure_flight["departure"].strftime("%Y-%m-%d"), | |
| "no_person_m": len(passengers) - len(childs), | |
| "no_person_p": len(childs), | |
| "no_person_b": 0, | |
| }) | |
| if return_flight: | |
| post_data.update({ | |
| "cd_return": 1, | |
| "dt_to": return_flight["departure"].strftime("%Y-%m-%d"), | |
| }) | |
| pp(post_data) | |
| g.setup(post=post_data) | |
| g.go("http://www.eastarjet.com/book/book.htm") | |
| # self.save_file(filename="ZE_search.html", body=g.response.body) | |
| ajax_data = { | |
| "method": "availability", | |
| "dt_date": post_data["dt_from"], | |
| "is_departure": "true", | |
| "fromline": post_data["cd_fromline"], | |
| "toline": post_data["cd_toline"], | |
| "nmfromline": "", | |
| "nmtoline": "", | |
| "dt_from": post_data["dt_from"], | |
| "cd_station": post_data["cd_station"], | |
| } | |
| if return_flight: | |
| ajax_data.update({ | |
| "dt_to": return_flight["departure"].strftime("%Y-%m-%d"), | |
| }) | |
| # pp(ajax_data) | |
| g.setup(post=ajax_data) | |
| g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
| # print g.response.body | |
| root = etree.XML(g.response.body) | |
| fl_found = False | |
| for item in root.findall('.//list'): | |
| jkey = item.find("journeyKey").text | |
| if (departure_flight["from"] in jkey and | |
| departure_flight["to"] in jkey and | |
| departure_flight["departure"].strftime("%m/%d/%Y %H:%M") in jkey and | |
| departure_flight["arrival"].strftime("%m/%d/%Y %H:%M") in jkey): | |
| post_data = { | |
| "method": "bookingStep2", | |
| "dt_fromstd": item.find('./std').text, | |
| "dt_fromsta": item.find('./sta').text, | |
| "nm_fromfn": item.find('./flightNumber').text, | |
| "nm_fromjkey": jkey, | |
| "nm_fromfkey": item.find("./secondaryLowFare/fareSellKey").text, | |
| "nm_fromFareName": "secondaryLow", | |
| "dt_from": departure_flight["departure"].strftime("%Y-%m-%d"), | |
| "nm_fromfare": item.find("./secondaryLowFare/amountView").text, | |
| "dt_tostd":'', | |
| "dt_tosta":'', | |
| "nm_tofn":'', | |
| "nm_tojkey":'', | |
| "nm_tofkey": '', | |
| "nm_toFareName":'secondaryLow', | |
| } | |
| currency = item.find("./secondaryLowFare/currencyCode").text | |
| fl_found = True | |
| if not fl_found: | |
| self.output_error(msg=FLIGHT_NOT_FOUND, data= "Departure flight not found") | |
| if return_flight: | |
| fl_found = False | |
| ajax_data.update({ | |
| "dt_date": return_flight["departure"].strftime("%Y-%m-%d"), | |
| "is_departure": "false", | |
| }) | |
| pp(ajax_data) | |
| g.setup(post=ajax_data) | |
| g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
| root = etree.XML(g.response.body) | |
| for item in root.findall('.//list'): | |
| jkey = item.find("journeyKey").text | |
| print jkey | |
| if (return_flight["from"] in jkey and | |
| return_flight["to"] in jkey and | |
| return_flight["departure"].strftime("%m/%d/%Y %H:%M") in jkey and | |
| return_flight["arrival"].strftime("%m/%d/%Y %H:%M") in jkey): | |
| post_data.update({ | |
| "dt_tostd": item.find('./std').text, | |
| "dt_tosta": item.find('./sta').text, | |
| "nm_tofn": item.find('./flightNumber').text, | |
| "dt_to": return_flight["departure"].strftime("%Y-%m-%d"), | |
| "nm_tofare": item.find("./secondaryLowFare/amountView").text, | |
| "nm_tojkey": jkey, | |
| "nm_tofkey": item.find("./secondaryLowFare/fareSellKey").text, | |
| "nm_toFareName": "secondaryLow", | |
| }) | |
| fl_found = True | |
| if not fl_found: | |
| self.output_error(msg=FLIGHT_NOT_FOUND, data= "Return flight not found") | |
| pp(post_data) | |
| g.setup(post=post_data) | |
| g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
| # print g.response.body | |
| g.go("http://www.eastarjet.com/book/book.htm?method=bookingStep3") | |
| # g.go("https://www.eastarjet.com/book/book.htm?method=bookingStep3") | |
| # self.save_file(filename="ZE_select_after.html", body=g.response.body) | |
| post_data = [ | |
| ("method","bookingStep3"), | |
| ("nm_lastname", "Dlouhy"), | |
| ("nm_firstname", "Oliver"), | |
| ("cd_gender", 0), | |
| ("nm_customernumber", ""), | |
| ("nm_jumin", ""), | |
| ("nm_phone1", "+420" + phone[0]), # todo check this | |
| ("nm_phone2", phone[1:5]), | |
| ("nm_phone3", phone[5:9]), | |
| ("nm_mailid", email.split("@")[0]), | |
| ("nm_maildomain", email.split("@")[1]), | |
| ("se_maildomain", ""), | |
| ("ck_phone", "on"), | |
| ("emergency_nm_phone1", "+420" + phone[0]), | |
| ("emergency_nm_phone2", phone[1:5]), | |
| ("emergency_nm_phone3", phone[5:9]), | |
| ] | |
| for p in passengers: | |
| post_data.extend([ | |
| ("passenger_nm_paxtype", "CHD" if p in childs else "ADT"), | |
| ("passenger_nm_customernumber", ""), | |
| ("passenger_nm_lastname", p["familyname"]), | |
| ("passenger_nm_firstname", p["firstname"]), | |
| ("passenger_cd_gender", 1 if p["title"] == "Ms" else 0), | |
| ("passenger_nm_jumin", ""), | |
| ("passenger_nm_birthday", p["birthday"].strftime("%Y%m%d")), | |
| ("passenger_cd_paytype", "CHD" if p in childs else "ADT"), | |
| ("passenger_nm_paytype","소아" if p in childs else "성인"), | |
| ]) | |
| if __cd_station == "INT": | |
| post_data.extend([ | |
| ("passport_country", "" ), | |
| ("passport_nationality", "" ), | |
| ("passport_docNo", p["cardno"]), | |
| ("passport_expDate", p["expiration"].replace("-","")), | |
| ("passport_issued", p["nationality"]), | |
| ]) | |
| pp(post_data) | |
| g.setup(post = post_data) | |
| g.go("https://www.eastarjet.com/book/bookAjax.ajax") | |
| g.go("https://www.eastarjet.com/book/book.htm?method=bookingStep4") | |
| # self.save_file(filename="ZE_payment.html", body=g.response.body) | |
| form_data = g.form_fields() | |
| #check price | |
| self.check_price( form_data['no_amount'], currency) | |
| if "cardbrand" in form_data: | |
| form_data.update({"cardbrand": "MC"}) | |
| form_data.update({ | |
| "method": "validatePayment", | |
| "departureStation": "", | |
| "ArrivalStation": "", | |
| "cd_paymenttype":"200", # Credit card - MC | |
| "nm_accountnumber1": card[0:4], | |
| "nm_accountnumber2": card[4:8], | |
| "nm_accountnumber3": card[8:12], | |
| "nm_accountnumber4": card[12:16], #2632136217836 ->> 7836 findindex(card) | |
| "cd_expiremonth": expiration.strftime("%-m"), | |
| "cd_expireyear": expiration.strftime("%Y"), | |
| "nm_cardholdername": holder, | |
| "cd_bill": "Y", | |
| }) | |
| try: | |
| pp(form_data) | |
| g.setup(post=form_data) | |
| g.setup(hammer_mode=True, hammer_timeouts=((300, 300),)) | |
| g.go('https://www.eastarjet.com/book/bookAjax.ajax') | |
| print g.response.body | |
| self.save_file(filename="EastarJet_ZE_0.html", body=g.response.body) | |
| post_data = { | |
| "method": "bookingStep4", | |
| "cd_paymenttype": 200, | |
| } | |
| g.setup(post = post_data) | |
| g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
| print g.response.body | |
| if "Payment is completed." not in g.response.body: | |
| self.output_error(msg=PAYMENT_FAILED) | |
| pnr = re.findall(r'[A-Z0-9]{6}', g.response.body)[0] | |
| # payment process ... | |
| self.save_file(filename="EastarJet_ZE_1.html", body=g.response.body) | |
| g.go("https://www.eastarjet.com/book/book.htm?method=bookingStep5") | |
| # self.save_file(filename="EastarJet_ZE.html", body=g.response.body) | |
| self.save_file(filename="EastarJet_ZE_2.html", body=g.response.body) | |
| # res number | |
| self.output_res_number(pnr) | |
| except Exception, e: | |
| self.save_file(filename="EastarJet_ZE_payment_err.html", body=g.response.body) | |
| print traceback.format_exc(e) | |
| self.output_error(msg=PAYMENT_FAILED) | |
| if __name__ == '__main__': | |
| airline = EastarJet() | |
| airline.parse_args() | |
| airline.book_flight() |
| { | |
| "passengers":[ | |
| { | |
| "bags":0, | |
| "firstname":"viktoria", | |
| "title":"Ms", | |
| "cardno":"UA3323123", | |
| "familyname":"stanova", | |
| "doctype":"P", | |
| "birthday":"1990-07-20", | |
| "expiration":"2018-05-08", | |
| "nationality":"SK", | |
| "visa":"" | |
| } | |
| ], | |
| "CVV":"666", | |
| "maxprice":"10000", | |
| "card_type":"MC", | |
| "phone":"777652838", | |
| "flights":[ | |
| { | |
| "arrival":"2015-09-12 12:50", | |
| "to":"CEB", | |
| "from":"ILO", | |
| "id":230136054, | |
| "departure":"2015-09-12 12:10" | |
| } | |
| ], | |
| "cardnumber":"5164652232068386", | |
| "airline":"5J", | |
| "exp":"12\/19", | |
| "login":"[email protected]", | |
| "password":"tramtararatata", | |
| "email":"[email protected]", | |
| "nameoncard":"skypicker skypicker" | |
| } |
| # -*- coding: utf-8 -*- | |
| import sys | |
| #sys.path.append('/srv/scrapers') | |
| #sys.path.append('/srv/scrapers/simple') | |
| import re | |
| import argparse | |
| import logging | |
| import csv | |
| import requests | |
| import json | |
| import lxml.html | |
| import pycurl | |
| import pytz | |
| import socket | |
| import traceback | |
| import ujson | |
| from pytz import country_timezones | |
| from random import choice | |
| from decimal import Decimal | |
| from itertools import islice | |
| from datetime import datetime, timedelta | |
| from dateutil.relativedelta import relativedelta | |
| from requests import Session | |
| from random import choice | |
| from collections import OrderedDict | |
| from grab import Grab | |
| from pprint import pprint as pp | |
| class sGrabError(Exception): | |
| def __init__(self, msg, action_name, e=None): | |
| self.msg = msg | |
| try: | |
| _grab_log_error(msg, action_name) | |
| except Exception, e: | |
| print traceback.format_exc(e) | |
| print "not logged" | |
| self.e = e | |
| if e: | |
| print traceback.format_exc(e) | |
| def __str__(self): | |
| if self.e: | |
| return self.e | |
| else: | |
| return self.msg | |
| class sGrab(Grab): | |
| """ | |
| action_name - Name request for easier debug. | |
| check - Check conditions ("expected_code", "expected_body_len", "expected_url") | |
| """ | |
| #sGrab optional | |
| _expected_code = 200 | |
| _expected_body_len = 0 | |
| _expected_url = "" | |
| _action_name = "booking_process" | |
| _print_out = True | |
| _check = False | |
| _save_html = False | |
| _post = None | |
| #Grab original | |
| make_request = True | |
| #list of params (if Python 3.. not needed) | |
| additional_params = ["expected_code", "expected_body_len", "expected_url", "action_name", "check", "print_out", "save_html"] | |
| def __init__(self, *args, **kwargs): | |
| self.set_additional(kwargs) | |
| original_kwargs = self.delete_additional_params(kwargs) | |
| Grab.__init__(self, *args, **original_kwargs) | |
| def monitor(self, func, *args, **kwargs): | |
| self.save_attributes() | |
| self.set_additional(kwargs) | |
| original_kwargs = self.delete_additional_params(kwargs) | |
| try: | |
| pre_body = self.response.body if self.response else None # set pre body | |
| if self.make_request and self._print_out: | |
| pre_url = self.config['url'] | |
| if len(args) == 1: | |
| pre_url = args[0] | |
| print "%s : from : %s" % (self._action_name, pre_url) | |
| func(*args, **original_kwargs) #call real go or submit | |
| if self.make_request and self._print_out: | |
| print "%s : %s : to : %s\n" % (self._action_name, self.response.code, self.response.url) | |
| if self._check: | |
| if str(self._expected_code) not in self.response.status: | |
| raise sGrabError("Another http error code, expected is %s but get %s" % (self._expected_code, self.response.status), self._action_name) | |
| if self._expected_url not in self.response.url: | |
| raise sGrabError("Expecting redirection to %s but redirected to %s" % (self._expected_url, self.response.url), self._action_name) | |
| if self._expected_body_len > len(self.response.body): | |
| raise sGrabError("Too small response body (%s bytes) expected more then %s bytes" % (self._expected_body_len, self.response.body), self._action_name) | |
| except Exception, e: | |
| exc_info = sys.exc_info() | |
| #print traceback.format_exc(e) | |
| if self._post: | |
| pp(self._post) | |
| if self._save_html: | |
| if pre_body: | |
| self._grab_save_file(filename="airline-before-%s.html" % self._action_name, body=pre_body) | |
| self._grab_save_file(filename="airline-after-%s.html" % self._action_name, body=self.response.body) | |
| raise exc_info[1], None, exc_info[2] | |
| self.restore_attributes() | |
| def setup(self, *args, **kwargs): | |
| if kwargs: | |
| if kwargs.get("post", None): | |
| self._post = kwargs.get("post") | |
| self.set_additional(kwargs) | |
| original_kwargs = self.delete_additional_params(kwargs) | |
| super(sGrab, self).setup(*args, **original_kwargs) | |
| def submit(self, *args, **kwargs): | |
| self.monitor(super(sGrab, self).submit, *args, **kwargs) | |
| def go(self, *args, **kwargs): | |
| self.monitor(super(sGrab, self).go, *args, **kwargs) | |
| def set_additional(self, kwargs): | |
| #sGrab optional | |
| for param in self.additional_params: | |
| setattr(self, "_"+param, kwargs.get(param, getattr(self, "_"+param))) | |
| #original from Grab | |
| self.make_request = kwargs.get("make_request", self.make_request) | |
| def save_attributes(self): | |
| for param in self.additional_params: | |
| setattr(self, "_back_up_"+param, getattr(self, "_"+param)) | |
| def restore_attributes(self): | |
| for param in self.additional_params: | |
| setattr(self, "_"+param, getattr(self, "_back_up_"+param)) | |
| def delete_additional_params(self, kwargs): | |
| originals = {} | |
| for key, val in kwargs.items(): | |
| if key not in self.additional_params: | |
| originals.update({key: val}) | |
| return originals | |
| def _grab_save_file(self, filename = "test.html", path = "/srv/Scrapers/booking/airlines/html/", body = ""): | |
| """Save html page to file, with timestamp in filename""" | |
| tz = pytz.timezone(country_timezones("CZ")[0]) | |
| final_name = "%s%s_%s" % (path, filename, datetime.now(tz).strftime("%H-%M_%d-%m-%Y")) | |
| final_name = final_name.replace(".html","") | |
| final_name += ".html" | |
| print "saving page as %s ..." % final_name | |
| print "url %s/last_page_of_booking/%s" % ("www3.skypicker.com:12555", final_name.split("/")[-1]) | |
| with open(final_name,"wb+") as f: | |
| f.write(body) | |
| def _grab_log_error(msg, action_name): | |
| tz = pytz.timezone(country_timezones("CZ")[0]) | |
| file_name = "/var/log/s_grab.log" | |
| with open(file_name, 'a') as f: | |
| body = "%s:%s: %s\n" % (datetime.now(tz).strftime("%d-%m-%Y %H:%M"), action_name, msg) | |
| f.write(body) | |
create object
g = sGrab(expected_code=200, print_out=True, save_html=True)
set up
g.setup(expected_code=200, print_out=True, save_html=True)
turn on check
g.setup(check=True)
requests
g.go("www.ryanair.com/Search", action_name="search", expected_url="Selection")
g.go("www.ryanair.com/not_found", expected_code=404)