Sample input files generator: https://scrapers.skypicker.com:5000/generate_sample_data?airline=5J helen/pajicek
Please use sGrab (sGrab.py) and Error codes (airlines_exceptions.py)
Sample input files generator: https://scrapers.skypicker.com:5000/generate_sample_data?airline=5J helen/pajicek
Please use sGrab (sGrab.py) and Error codes (airlines_exceptions.py)
# -*- coding: utf-8 -*- | |
import sys | |
#sys.path.append('/srv/scrapers') | |
#sys.path.append('/srv/scrapers/simple') | |
import re | |
import argparse | |
import logging | |
import csv | |
import requests | |
import json | |
import lxml.html | |
import pycurl | |
import pytz | |
import socket | |
import traceback | |
import ujson | |
from pytz import country_timezones | |
import random | |
from random import choice | |
from decimal import Decimal | |
from itertools import islice | |
from datetime import datetime, timedelta | |
from dateutil.relativedelta import relativedelta | |
from requests import Session | |
from random import choice | |
from collections import OrderedDict | |
from grab import Grab | |
from pprint import pprint as pp | |
import os | |
from time import sleep | |
import time | |
sys.path.append('/root/Scrapers') | |
sys.path.append('/srv/Scrapers') | |
from scraperlib.s_grab import * | |
booking_proxies = [ | |
"192.81.214.211:8888", | |
"37.139.23.93:8888", | |
"128.199.221.61:8888", | |
"178.62.50.177:8888", | |
"188.226.169.149:8888", | |
"192.81.212.107:8888", | |
"107.170.165.55:8888", | |
] | |
dev_ips = [ | |
"146.185.172.28", | |
"188.166.6.171", | |
] | |
tz = pytz.timezone(country_timezones("CZ")[0]) | |
def save_file(filename = "test.html", path = "/srv/Scrapers/booking/airlines/html/", body = ""): | |
"""Save html page to file, with timestamp in filename""" | |
final_name = "%s%s_%s" % (path, filename, datetime.now(tz).strftime("%H-%M_%d-%m-%Y")) | |
final_name = final_name.replace(".html","") | |
final_name += ".html" | |
print "saving page as %s ..." % final_name | |
with open(final_name,"wb+") as f: | |
f.write(body) | |
class error_payment_failed(Exception): | |
def __init__(self, info=""): | |
self.message = "payment_failed" | |
self.info = info | |
class BaseAirline(object): | |
"""Base class for all airline scrapers. Provides interface to be | |
implemented and some useful tools for scraping shit. | |
""" | |
#! IATA airline code. | |
code = None | |
childs_max_age = None | |
folder = '/srv/results/' | |
output = '' | |
html_url = "" | |
price = 0.0 | |
def __init__(self): | |
"""Initializes airline scraper.""" | |
assert self.code, "IATA airline code must be defined." | |
self._session = Session() | |
def _to_html(self, response): | |
"""Parses given HTTP response into HTML DOM object. | |
:param response: HTTP response. | |
:type response: :class:`requests.Response` | |
""" | |
return lxml.html.fromstring(response.text) | |
def _to_price(self, value, thousand_sep=',', dec_sep='.'): | |
"""Parses given string into Decimal object holding | |
amount of currency. Separator defaults are set according to | |
English customs. | |
""" | |
value = value.replace(thousand_sep, '').replace(dec_sep, '.') | |
value = re.sub(r'[^\d\.]', '', value) | |
return Decimal(value) | |
def parse_args(self): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--json_name", type=str, | |
help="path to json file") | |
if len(sys.argv)==1: | |
parser.print_help() | |
sys.exit(1) | |
parser.parse_args() | |
args = parser.parse_args() | |
with open(args.json_name,"r") as f: | |
content =f.read() | |
self.json_data = json.loads(content) | |
count = 0 | |
for flight in self.json_data["flights"]: | |
self.json_data["flights"][count]['departure'] = datetime.strptime(flight['departure'], "%Y-%m-%d %H:%M") | |
self.json_data["flights"][count]['arrival'] = datetime.strptime(flight['arrival'], "%Y-%m-%d %H:%M") | |
count += 1 | |
count = 0 | |
for p in self.json_data["passengers"]: | |
self.json_data["passengers"][count]['birthday'] = datetime.strptime(p['birthday'], "%Y-%m-%d") | |
count += 1 | |
self.json_data["exp"] = datetime.strptime(self.json_data["exp"], "%m/%y") | |
def prepare_input(self): | |
assert self.childs_max_age, "childs_max_age is not set." | |
""" prepare input data for booking scraper (childs, return_flight, userfriendly names)""" | |
passengers = sorted(self.json_data["passengers"], key=lambda x:x["birthday"]) | |
childs = [p for p in passengers if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.childs_max_age)] | |
childs_b_days = [p["birthday"] for p in passengers if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.childs_max_age)] | |
#TODO new | |
#infants = [p for p in childs if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.infants_max_age)] | |
#infants_b_days = [p["birthday"] for p in childs if p["birthday"] > datetime.now() - timedelta(days = 365.25*self.infants_max_age)] | |
#TODO new | |
flights = self.json_data["flights"] | |
baggage = 0 | |
for x in self.json_data["passengers"]: | |
baggage += int(x["bags"]) | |
card = self.json_data["cardnumber"] | |
cvc = self.json_data["CVV"] | |
expiration = self.json_data["exp"] | |
max_price = self.json_data["maxprice"] | |
email = self.json_data["email"] | |
phone = self.json_data["phone"] | |
holder = self.json_data["nameoncard"] | |
return_flight = None | |
if len(flights) > 1: | |
if flights[1]["departure"] > flights[0]["departure"]: | |
return_flight = flights[1] | |
departure_flight = flights[0] | |
else: | |
return_flight = flights[0] | |
departure_flight = flights[1] | |
else: | |
departure_flight = flights[0] | |
g = sGrab() | |
g.transport.curl.setopt(pycurl.SSL_VERIFYPEER, 0) | |
g.transport.curl.setopt(pycurl.SSL_VERIFYHOST, 0) | |
#g.transport.curl.setopt(pycurl.SSLVERSION, 3) | |
g.transport.curl.setopt(pycurl.SSL_CIPHER_LIST, 'SSLv3') | |
if [(s.connect(('8.8.8.8', 80)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1] not in dev_ips: | |
proxy = choice(booking_proxies) | |
print "I will use this proxy %s" % proxy | |
self.proxy = proxy | |
g.setup(proxy=proxy, proxy_type='http', connect_timeout=5, timeout=5) | |
g.setup(hammer_mode=True, hammer_timeouts=((40, 40),)) | |
# set default adult if only child is in input and it is only check! | |
if (len(passengers) - len(childs)) is 0 and str(int(max_price)) is "1": | |
childs = [] | |
passengers = [ { | |
"bags" : baggage, | |
"birthday" : datetime.now() - timedelta(days = 365.25 * 25), | |
"cardno" : "A9449115", | |
"doctype" : "P", | |
"expiration" : "2064-11-19", | |
"familyname" : "Novak", | |
"firstname" : "Jan", | |
"nationality" : "CZ", | |
"title" : "Mr", | |
"visa" : None | |
} ] | |
print "PASS %s" % ", ".join(["%s %s" % (p["firstname"], p["familyname"]) for p in passengers]) | |
return (passengers, childs, baggage, card, cvc, expiration, | |
max_price, email, phone, holder, departure_flight, return_flight, g, childs_b_days) | |
def _to_datetime(self, value, format='%Y-%m-%dT%H:%M:%S'): | |
"""Parses :class:`datetime.datetime`. If both *value* and *format* | |
given as iterables of two elements, they're considered | |
separately as date and time. | |
""" | |
value_is_str = isinstance(value, basestring) | |
format_is_str = isinstance(format, basestring) | |
if not value_is_str and not format_is_str: | |
# parse date and time separately, then combine | |
return datetime.combine( | |
datetime.strptime(value[0], format[0]).date(), | |
datetime.strptime(value[1], format[1]).timetz(), | |
) | |
if value_is_str and format_is_str: | |
# parse date and time together | |
return datetime.strptime(value, format) | |
else: | |
raise TypeError("Unrecognized combination of arguments.") | |
def save_file(self, filename = "page.html", path = "/srv/Scrapers/booking/airlines/html/", body = ""): | |
"""Save html page to file, with timestamp in filename""" | |
path += "%s/" % self.code | |
if not os.path.exists(path): | |
os.makedirs(path) | |
tz = pytz.timezone(country_timezones("CZ")[0]) | |
bid = 0 | |
try: | |
if "bid" not in self.json_data: | |
bid = 0 | |
else: | |
bid = self.json_data["bid"] | |
except Exception, e: | |
print e | |
final_name = "%s%s_%s_%s_%s" % (path, filename, bid, datetime.now(tz).strftime("%H-%M_%d-%m-%Y"), random.random()) | |
final_name = final_name.replace(".html","") | |
final_name += ".html" | |
print "saving page as %s ..." % final_name | |
with open(final_name,"wb+") as f: | |
f.write(body) | |
self.html_url = final_name | |
def output_error(self, msg = 'not_found', data = "", html_url = ""): | |
html_url = self.html_url | |
sys.stderr.write(ujson.dumps({'error':msg, 'data':data, 'html_url':html_url, 'datetime':datetime.now(tz).strftime("%d-%m-%Y_%H:%M")})) | |
sys.exit(1) | |
#depreciate | |
def output_pricechange(self, new_price = 1): | |
sys.stderr.write(json.dumps({"max_price":self.json_data["maxprice"],"fresh_price":float(new_price), "timestamp": datetime.now(tz).strftime("%d-%m-%Y_%H:%M")})) | |
sys.exit(1) | |
# use this | |
def check_price(self, amount, currency): | |
amount = self._to_price(str(amount)) | |
print amount, currency | |
skypicker_currency_url = "https://cz.skypicker.com/rates/" | |
rates = self._session.get(skypicker_currency_url + currency.lower()).json() | |
amount = float(rates['value']) * float(amount) | |
if float(amount) > float(self.json_data["maxprice"]): | |
error_msg = { | |
"ids":[f['id'] for f in self.json_data["flights"]], | |
"max_price":self.json_data["maxprice"], | |
"fresh_price":float(amount), | |
"status":"price_change", | |
"datetime":datetime.now(tz).strftime("%H-%M_%d-%m-%Y") | |
} | |
sys.stderr.write(json.dumps(error_msg)) | |
exit(1) | |
self.start_time = time.time() | |
#self.waiting_on_semaphore() | |
self.price = amount | |
print amount, "EUR" | |
def waiting_on_semaphore(self): | |
semaphore_url = "https://cz.skypicker.com/api/v0.1/automatic_booking_process_status" | |
status_data = self._session.get(semaphore_url + "?bid=%s&iata=%s" % (self.json_data["bid"], self.code)).json() | |
if status_data["status"] is "canceled": | |
raise Exception("payment canceled") | |
if status_data["status"] is "pending": | |
if time.time() - self.start_time > (60 * 5): | |
self.book_flight() | |
else: | |
sleep(5) | |
self.waiting_on_semaphore() | |
if status_data["status"] is "ok": | |
self.json_data.update(status_data) #update card data | |
def output_res_number(self, reservation_number, additional_info={}): | |
response = {"reservation_number":reservation_number, "price": str(self.price), 'html_url':self.html_url, 'datetime': datetime.now(tz).strftime("%d-%m-%Y_%H:%M")} | |
response.update(additional_info) | |
sys.stderr.write(json.dumps(response)) | |
return True | |
#sys.exit(1) | |
## helpers | |
#depreciate | |
def to_eur(self,curr,amount): | |
rates = self._session.get("https://cz.skypicker.com/rates/" + curr.lower()).json() | |
return round(rates['value'] * amount,2) | |
def parse_price(self,string): | |
prices = re.findall(r"([\d+]+)",string) | |
float_places = 0 | |
full_numbers = 0 | |
if len(prices) == 1: | |
float_places = float(prices[0]) | |
elif len(prices) > 1: | |
if len(prices[-1]) != 3: | |
float_places = float("0."+prices[-1]) | |
full_numbers = int("".join(prices[0:-1])) | |
else: | |
full_numbers = int("".join(prices)) | |
return float(float_places) + float(full_numbers) | |
#helper for develop | |
def compare_dicts(self, original, used): | |
for k,v in original.items(): | |
if k in used: | |
if str(v) != str(used[k]): | |
print "KEY: %s ... >%s<(orig) VS >%s<(script)" % (k, v, used[k]) | |
else: | |
print "KEY %s:%s is not in params" % (k, v) |
SEARCH_FAILED = "search_failed" | |
PAYMENT_FAILED = "payment_failed" | |
PRICE_CHANGED = "price_changed" | |
GETTING_RES_CODE_FAILED = "getting_res_code_failed" | |
UNKNOW_CURRENCY = "unknow_currency" | |
FLIGHT_NOT_FOUND = "flight_not_found" | |
BOOKING_ON_MAIL = "booking_on_mail" | |
CANT_BOOK_BAGS = "cant_book_bags" | |
CANT_BOOK_BABY = "cant_book_baby" | |
DUPLICATE_NAMES = "duplicate_names" | |
UNEXPECTED_ERROR = "unexpected_error" | |
AIRLINE_WEB_DOWN = "airline_web_down" | |
LOGIN_FAILED = "login_failed" | |
USE_POLICY = "use_policy" | |
ERR_CODES = { | |
SEARCH_FAILED:"Search failed. %s", | |
PAYMENT_FAILED:"Payment failed. %s", | |
PRICE_CHANGED:"Price changed. %s", | |
GETTING_RES_CODE_FAILED:"Problem with get reservation code. %s", | |
UNKNOW_CURRENCY:"Unknow currency. %s", | |
FLIGHT_NOT_FOUND:"Flight not found. %s", | |
BOOKING_ON_MAIL:"Booking on email. %s", | |
CANT_BOOK_BAGS:"Can't book bags. %s", | |
CANT_BOOK_BABY:"Can't book baby or child. %s", | |
DUPLICATE_NAMES:"Passengers with same name did not pass validation. %s", | |
UNEXPECTED_ERROR:"Unexpected error %s", | |
AIRLINE_WEB_DOWN:"Arline website seems down %s", | |
LOGIN_FAILED:"Can't log in on airline website %s", | |
USE_POLICY:"Airline blocking our booking automatas %s" | |
} | |
html_url_path = "http://www3.skypicker.com:12555/last_page_of_booking/" | |
class BookingError(Exception): | |
def __init__(self, error_code, info="", html_url=""): | |
self.error_code = error_code | |
if html_url != "": | |
html_url = html_url_path + html_url.split("/")[-1] | |
self.html_url = html_url | |
try: | |
self.message = ERR_CODES[error_code] % info | |
except Exception, e: | |
print "%s not found!" % error_code | |
self.message = ERR_CODES[UNEXPECTED_ERROR] | |
def __str__(self): | |
return "%s: %s" % (self.error_code, self.message) | |
""" README | |
Feel free to add new error code ;) | |
USAGE in booking scripts: | |
self.output_error(SEARCH_FAILED) | |
self.output_error(LOGIN_FAILED) | |
in core.py it is called like: | |
if jsn.get("error") in ERR_CODES: | |
raise BookingError(jsn.get("error"), jsn.get("data")) | |
""" |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*-\n | |
import ujson | |
import urllib | |
import pycurl | |
import re | |
import sys | |
import json | |
import ast | |
import lxml | |
import lxml.html | |
import traceback | |
from time import sleep | |
from pprint import pprint as pp | |
from datetime import datetime, timedelta | |
from airlines import * | |
from airlines_exceptions import * | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
__author__ = "your name" | |
__editor__ = "" #TODO | |
class Airline(BaseAirline): | |
code = "" #TODO | |
childs_max_age = 0 #TODO | |
def book_flight(self): | |
(passengers, childs, baggage, card, cvc, expiration, max_price, email, | |
phone, holder, departure_flight, return_flight, g, childs_b_days) = self.prepare_input() | |
#TODO magic | |
contact_detail = { | |
"title":"MR", | |
"firstName":"Oliver", | |
"lastName":"Dlouhy", | |
"street":"Bakalovo nabrezi 2", | |
"zipCode":"63900", | |
"city":"Brno", | |
"country":"CZ", | |
"email":email, | |
"repeatemail":email, | |
"phoneNumber":"+380"+phone, | |
} | |
self.check_price(price, currency) | |
g.setup(hammer_mode=True, hammer_timeouts=((300, 300),)) | |
try: | |
#TODO payment | |
self.save_file(filename="airline.html", body=g.response.body) | |
self.output_res_number("not parsed yet") #TODO ask me | |
except Exception, e: | |
self.output_error(msg=PAYMENT_FAILED) | |
if __name__ == "__main__": | |
airline = Airline() | |
airline.parse_args() | |
airline.book_flight() |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*-\n | |
import ujson | |
import urllib | |
import pycurl | |
import re | |
import sys | |
import json | |
from lxml import etree | |
from time import sleep | |
from pprint import pprint as pp | |
from datetime import datetime, timedelta | |
from grab import Grab | |
from airlines_exceptions import * | |
from airlines import * | |
from airlines_exceptions import BookingError | |
__author__ = 'Ladislav Radoň, [email protected]' | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
class EastarJet(BaseAirline): | |
code = "ZE" | |
childs_max_age = 12 | |
def book_flight(self): | |
(passengers, childs, baggage, card, cvc, expiration, max_price, email, | |
phone, holder, departure_flight, return_flight, g, childs_b_days) = self.prepare_input() | |
domestic = ["GMP", "KUV", "CJU", "CJJ", "ICN"] # domestic airports (SOUTH KOREA) | |
# get session | |
# g.setup(timeout=150) | |
g.setup(hammer_mode=True, hammer_timeouts=((200, 200),)) | |
g.go("http://www.eastarjet.com/book/index.htm") | |
post_data = {} | |
if departure_flight["from"] in domestic and departure_flight["to"] in domestic: | |
__cd_station = "DOM" | |
post_data.update({ | |
"cd_fromcountry":"KR", | |
"cd_tocountry":"KR", | |
}) | |
else: | |
__cd_station = "INT" | |
post_data.update({ | |
"method":"quickStep", | |
"cd_station": __cd_station, | |
"cd_return": 0, | |
"cd_fromline": departure_flight["from"], | |
"nm_fromline":'', | |
"cd_toline": departure_flight["to"], | |
"nm_toline":'', | |
"dt_from": departure_flight["departure"].strftime("%Y-%m-%d"), | |
"no_person_m": len(passengers) - len(childs), | |
"no_person_p": len(childs), | |
"no_person_b": 0, | |
}) | |
if return_flight: | |
post_data.update({ | |
"cd_return": 1, | |
"dt_to": return_flight["departure"].strftime("%Y-%m-%d"), | |
}) | |
pp(post_data) | |
g.setup(post=post_data) | |
g.go("http://www.eastarjet.com/book/book.htm") | |
# self.save_file(filename="ZE_search.html", body=g.response.body) | |
ajax_data = { | |
"method": "availability", | |
"dt_date": post_data["dt_from"], | |
"is_departure": "true", | |
"fromline": post_data["cd_fromline"], | |
"toline": post_data["cd_toline"], | |
"nmfromline": "", | |
"nmtoline": "", | |
"dt_from": post_data["dt_from"], | |
"cd_station": post_data["cd_station"], | |
} | |
if return_flight: | |
ajax_data.update({ | |
"dt_to": return_flight["departure"].strftime("%Y-%m-%d"), | |
}) | |
# pp(ajax_data) | |
g.setup(post=ajax_data) | |
g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
# print g.response.body | |
root = etree.XML(g.response.body) | |
fl_found = False | |
for item in root.findall('.//list'): | |
jkey = item.find("journeyKey").text | |
if (departure_flight["from"] in jkey and | |
departure_flight["to"] in jkey and | |
departure_flight["departure"].strftime("%m/%d/%Y %H:%M") in jkey and | |
departure_flight["arrival"].strftime("%m/%d/%Y %H:%M") in jkey): | |
post_data = { | |
"method": "bookingStep2", | |
"dt_fromstd": item.find('./std').text, | |
"dt_fromsta": item.find('./sta').text, | |
"nm_fromfn": item.find('./flightNumber').text, | |
"nm_fromjkey": jkey, | |
"nm_fromfkey": item.find("./secondaryLowFare/fareSellKey").text, | |
"nm_fromFareName": "secondaryLow", | |
"dt_from": departure_flight["departure"].strftime("%Y-%m-%d"), | |
"nm_fromfare": item.find("./secondaryLowFare/amountView").text, | |
"dt_tostd":'', | |
"dt_tosta":'', | |
"nm_tofn":'', | |
"nm_tojkey":'', | |
"nm_tofkey": '', | |
"nm_toFareName":'secondaryLow', | |
} | |
currency = item.find("./secondaryLowFare/currencyCode").text | |
fl_found = True | |
if not fl_found: | |
self.output_error(msg=FLIGHT_NOT_FOUND, data= "Departure flight not found") | |
if return_flight: | |
fl_found = False | |
ajax_data.update({ | |
"dt_date": return_flight["departure"].strftime("%Y-%m-%d"), | |
"is_departure": "false", | |
}) | |
pp(ajax_data) | |
g.setup(post=ajax_data) | |
g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
root = etree.XML(g.response.body) | |
for item in root.findall('.//list'): | |
jkey = item.find("journeyKey").text | |
print jkey | |
if (return_flight["from"] in jkey and | |
return_flight["to"] in jkey and | |
return_flight["departure"].strftime("%m/%d/%Y %H:%M") in jkey and | |
return_flight["arrival"].strftime("%m/%d/%Y %H:%M") in jkey): | |
post_data.update({ | |
"dt_tostd": item.find('./std').text, | |
"dt_tosta": item.find('./sta').text, | |
"nm_tofn": item.find('./flightNumber').text, | |
"dt_to": return_flight["departure"].strftime("%Y-%m-%d"), | |
"nm_tofare": item.find("./secondaryLowFare/amountView").text, | |
"nm_tojkey": jkey, | |
"nm_tofkey": item.find("./secondaryLowFare/fareSellKey").text, | |
"nm_toFareName": "secondaryLow", | |
}) | |
fl_found = True | |
if not fl_found: | |
self.output_error(msg=FLIGHT_NOT_FOUND, data= "Return flight not found") | |
pp(post_data) | |
g.setup(post=post_data) | |
g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
# print g.response.body | |
g.go("http://www.eastarjet.com/book/book.htm?method=bookingStep3") | |
# g.go("https://www.eastarjet.com/book/book.htm?method=bookingStep3") | |
# self.save_file(filename="ZE_select_after.html", body=g.response.body) | |
post_data = [ | |
("method","bookingStep3"), | |
("nm_lastname", "Dlouhy"), | |
("nm_firstname", "Oliver"), | |
("cd_gender", 0), | |
("nm_customernumber", ""), | |
("nm_jumin", ""), | |
("nm_phone1", "+420" + phone[0]), # todo check this | |
("nm_phone2", phone[1:5]), | |
("nm_phone3", phone[5:9]), | |
("nm_mailid", email.split("@")[0]), | |
("nm_maildomain", email.split("@")[1]), | |
("se_maildomain", ""), | |
("ck_phone", "on"), | |
("emergency_nm_phone1", "+420" + phone[0]), | |
("emergency_nm_phone2", phone[1:5]), | |
("emergency_nm_phone3", phone[5:9]), | |
] | |
for p in passengers: | |
post_data.extend([ | |
("passenger_nm_paxtype", "CHD" if p in childs else "ADT"), | |
("passenger_nm_customernumber", ""), | |
("passenger_nm_lastname", p["familyname"]), | |
("passenger_nm_firstname", p["firstname"]), | |
("passenger_cd_gender", 1 if p["title"] == "Ms" else 0), | |
("passenger_nm_jumin", ""), | |
("passenger_nm_birthday", p["birthday"].strftime("%Y%m%d")), | |
("passenger_cd_paytype", "CHD" if p in childs else "ADT"), | |
("passenger_nm_paytype","소아" if p in childs else "성인"), | |
]) | |
if __cd_station == "INT": | |
post_data.extend([ | |
("passport_country", "" ), | |
("passport_nationality", "" ), | |
("passport_docNo", p["cardno"]), | |
("passport_expDate", p["expiration"].replace("-","")), | |
("passport_issued", p["nationality"]), | |
]) | |
pp(post_data) | |
g.setup(post = post_data) | |
g.go("https://www.eastarjet.com/book/bookAjax.ajax") | |
g.go("https://www.eastarjet.com/book/book.htm?method=bookingStep4") | |
# self.save_file(filename="ZE_payment.html", body=g.response.body) | |
form_data = g.form_fields() | |
#check price | |
self.check_price( form_data['no_amount'], currency) | |
if "cardbrand" in form_data: | |
form_data.update({"cardbrand": "MC"}) | |
form_data.update({ | |
"method": "validatePayment", | |
"departureStation": "", | |
"ArrivalStation": "", | |
"cd_paymenttype":"200", # Credit card - MC | |
"nm_accountnumber1": card[0:4], | |
"nm_accountnumber2": card[4:8], | |
"nm_accountnumber3": card[8:12], | |
"nm_accountnumber4": card[12:16], #2632136217836 ->> 7836 findindex(card) | |
"cd_expiremonth": expiration.strftime("%-m"), | |
"cd_expireyear": expiration.strftime("%Y"), | |
"nm_cardholdername": holder, | |
"cd_bill": "Y", | |
}) | |
try: | |
pp(form_data) | |
g.setup(post=form_data) | |
g.setup(hammer_mode=True, hammer_timeouts=((300, 300),)) | |
g.go('https://www.eastarjet.com/book/bookAjax.ajax') | |
print g.response.body | |
self.save_file(filename="EastarJet_ZE_0.html", body=g.response.body) | |
post_data = { | |
"method": "bookingStep4", | |
"cd_paymenttype": 200, | |
} | |
g.setup(post = post_data) | |
g.go("http://www.eastarjet.com/book/bookAjax.ajax") | |
print g.response.body | |
if "Payment is completed." not in g.response.body: | |
self.output_error(msg=PAYMENT_FAILED) | |
pnr = re.findall(r'[A-Z0-9]{6}', g.response.body)[0] | |
# payment process ... | |
self.save_file(filename="EastarJet_ZE_1.html", body=g.response.body) | |
g.go("https://www.eastarjet.com/book/book.htm?method=bookingStep5") | |
# self.save_file(filename="EastarJet_ZE.html", body=g.response.body) | |
self.save_file(filename="EastarJet_ZE_2.html", body=g.response.body) | |
# res number | |
self.output_res_number(pnr) | |
except Exception, e: | |
self.save_file(filename="EastarJet_ZE_payment_err.html", body=g.response.body) | |
print traceback.format_exc(e) | |
self.output_error(msg=PAYMENT_FAILED) | |
if __name__ == '__main__': | |
airline = EastarJet() | |
airline.parse_args() | |
airline.book_flight() |
{ | |
"passengers":[ | |
{ | |
"bags":0, | |
"firstname":"viktoria", | |
"title":"Ms", | |
"cardno":"UA3323123", | |
"familyname":"stanova", | |
"doctype":"P", | |
"birthday":"1990-07-20", | |
"expiration":"2018-05-08", | |
"nationality":"SK", | |
"visa":"" | |
} | |
], | |
"CVV":"666", | |
"maxprice":"10000", | |
"card_type":"MC", | |
"phone":"777652838", | |
"flights":[ | |
{ | |
"arrival":"2015-09-12 12:50", | |
"to":"CEB", | |
"from":"ILO", | |
"id":230136054, | |
"departure":"2015-09-12 12:10" | |
} | |
], | |
"cardnumber":"5164652232068386", | |
"airline":"5J", | |
"exp":"12\/19", | |
"login":"[email protected]", | |
"password":"tramtararatata", | |
"email":"[email protected]", | |
"nameoncard":"skypicker skypicker" | |
} |
# -*- coding: utf-8 -*- | |
import sys | |
#sys.path.append('/srv/scrapers') | |
#sys.path.append('/srv/scrapers/simple') | |
import re | |
import argparse | |
import logging | |
import csv | |
import requests | |
import json | |
import lxml.html | |
import pycurl | |
import pytz | |
import socket | |
import traceback | |
import ujson | |
from pytz import country_timezones | |
from random import choice | |
from decimal import Decimal | |
from itertools import islice | |
from datetime import datetime, timedelta | |
from dateutil.relativedelta import relativedelta | |
from requests import Session | |
from random import choice | |
from collections import OrderedDict | |
from grab import Grab | |
from pprint import pprint as pp | |
class sGrabError(Exception): | |
def __init__(self, msg, action_name, e=None): | |
self.msg = msg | |
try: | |
_grab_log_error(msg, action_name) | |
except Exception, e: | |
print traceback.format_exc(e) | |
print "not logged" | |
self.e = e | |
if e: | |
print traceback.format_exc(e) | |
def __str__(self): | |
if self.e: | |
return self.e | |
else: | |
return self.msg | |
class sGrab(Grab): | |
""" | |
action_name - Name request for easier debug. | |
check - Check conditions ("expected_code", "expected_body_len", "expected_url") | |
""" | |
#sGrab optional | |
_expected_code = 200 | |
_expected_body_len = 0 | |
_expected_url = "" | |
_action_name = "booking_process" | |
_print_out = True | |
_check = False | |
_save_html = False | |
_post = None | |
#Grab original | |
make_request = True | |
#list of params (if Python 3.. not needed) | |
additional_params = ["expected_code", "expected_body_len", "expected_url", "action_name", "check", "print_out", "save_html"] | |
def __init__(self, *args, **kwargs): | |
self.set_additional(kwargs) | |
original_kwargs = self.delete_additional_params(kwargs) | |
Grab.__init__(self, *args, **original_kwargs) | |
def monitor(self, func, *args, **kwargs): | |
self.save_attributes() | |
self.set_additional(kwargs) | |
original_kwargs = self.delete_additional_params(kwargs) | |
try: | |
pre_body = self.response.body if self.response else None # set pre body | |
if self.make_request and self._print_out: | |
pre_url = self.config['url'] | |
if len(args) == 1: | |
pre_url = args[0] | |
print "%s : from : %s" % (self._action_name, pre_url) | |
func(*args, **original_kwargs) #call real go or submit | |
if self.make_request and self._print_out: | |
print "%s : %s : to : %s\n" % (self._action_name, self.response.code, self.response.url) | |
if self._check: | |
if str(self._expected_code) not in self.response.status: | |
raise sGrabError("Another http error code, expected is %s but get %s" % (self._expected_code, self.response.status), self._action_name) | |
if self._expected_url not in self.response.url: | |
raise sGrabError("Expecting redirection to %s but redirected to %s" % (self._expected_url, self.response.url), self._action_name) | |
if self._expected_body_len > len(self.response.body): | |
raise sGrabError("Too small response body (%s bytes) expected more then %s bytes" % (self._expected_body_len, self.response.body), self._action_name) | |
except Exception, e: | |
exc_info = sys.exc_info() | |
#print traceback.format_exc(e) | |
if self._post: | |
pp(self._post) | |
if self._save_html: | |
if pre_body: | |
self._grab_save_file(filename="airline-before-%s.html" % self._action_name, body=pre_body) | |
self._grab_save_file(filename="airline-after-%s.html" % self._action_name, body=self.response.body) | |
raise exc_info[1], None, exc_info[2] | |
self.restore_attributes() | |
def setup(self, *args, **kwargs): | |
if kwargs: | |
if kwargs.get("post", None): | |
self._post = kwargs.get("post") | |
self.set_additional(kwargs) | |
original_kwargs = self.delete_additional_params(kwargs) | |
super(sGrab, self).setup(*args, **original_kwargs) | |
def submit(self, *args, **kwargs): | |
self.monitor(super(sGrab, self).submit, *args, **kwargs) | |
def go(self, *args, **kwargs): | |
self.monitor(super(sGrab, self).go, *args, **kwargs) | |
def set_additional(self, kwargs): | |
#sGrab optional | |
for param in self.additional_params: | |
setattr(self, "_"+param, kwargs.get(param, getattr(self, "_"+param))) | |
#original from Grab | |
self.make_request = kwargs.get("make_request", self.make_request) | |
def save_attributes(self): | |
for param in self.additional_params: | |
setattr(self, "_back_up_"+param, getattr(self, "_"+param)) | |
def restore_attributes(self): | |
for param in self.additional_params: | |
setattr(self, "_"+param, getattr(self, "_back_up_"+param)) | |
def delete_additional_params(self, kwargs): | |
originals = {} | |
for key, val in kwargs.items(): | |
if key not in self.additional_params: | |
originals.update({key: val}) | |
return originals | |
def _grab_save_file(self, filename = "test.html", path = "/srv/Scrapers/booking/airlines/html/", body = ""): | |
"""Save html page to file, with timestamp in filename""" | |
tz = pytz.timezone(country_timezones("CZ")[0]) | |
final_name = "%s%s_%s" % (path, filename, datetime.now(tz).strftime("%H-%M_%d-%m-%Y")) | |
final_name = final_name.replace(".html","") | |
final_name += ".html" | |
print "saving page as %s ..." % final_name | |
print "url %s/last_page_of_booking/%s" % ("www3.skypicker.com:12555", final_name.split("/")[-1]) | |
with open(final_name,"wb+") as f: | |
f.write(body) | |
def _grab_log_error(msg, action_name): | |
tz = pytz.timezone(country_timezones("CZ")[0]) | |
file_name = "/var/log/s_grab.log" | |
with open(file_name, 'a') as f: | |
body = "%s:%s: %s\n" % (datetime.now(tz).strftime("%d-%m-%Y %H:%M"), action_name, msg) | |
f.write(body) | |
create object
g = sGrab(expected_code=200, print_out=True, save_html=True)
set up
g.setup(expected_code=200, print_out=True, save_html=True)
turn on check
g.setup(check=True)
requests
g.go("www.ryanair.com/Search", action_name="search", expected_url="Selection")
g.go("www.ryanair.com/not_found", expected_code=404)