BHushanRathod · June 14, 2020 20:54 · BHushanRathod · Jun 14, 2020
diff --git a/trip_scrapper.py b/trip_scrapper.py
 from bs4 import BeautifulSoup
 from selenium import webdriver
 import random
 import time

 file1 = open("Hotel_List.txt", "w")


 headers = {
    'Accept': 'text/javascript, text/html, application/xml, text/xml, */*',
    'Accept-Encoding': 'gzip,deflate',
    'Accept-Language': 'en-US,en;q=0.5',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
    'Pragma': 'no-cache',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:28.0) Gecko/20100101 Firefox/28.0',
    'X-Requested-With': 'XMLHttpRequest'
 }

 UAS = (
    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
    "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'
 )

 ua = UAS[random.randrange(len(UAS))]
 headers.update({'User-Agent': ua})

 driver = webdriver.Chrome('/Users/bhushan/Downloads/chromedriver')
 all_url = []


 def get_hotel_name(soup):
    global row
    for a in soup.findAll('h1', {'class': 'detail-baseinfo_name'}):
        name = a.get_text().strip()
        print("Hotel Name: ", name)
        file1.write("Hotel Name: \t%s\n" % name)
        print("~" * 50)


 def get_room_details(soup):
    """
    Method to get all details about specific hotel
    :return:
    """
    global row
    for a in soup.findAll('div', {'class': 'roomlist-baseroom-card'}):
        for b in a.findAll('div', {'class': 'roomname'}):
            print("Room Name: ", b.get_text().strip())
            file1.write("Room Name: \t%s\n" % b.get_text().strip())
        for b in a.findAll('div', {'class': 'roomcard'}):
            for c in b.findAll('div', {'class': 'salecard-flex'}):
                for d in c.findAll('div', {'class': 'salecard-bedfacility'}):
                    for e in d.findAll('div', {'class': 'facility'}):
                        for f in e.findAll('span', {'class': 'desc-text underline'}):
                            print("Amenities: ", f.get_text().strip())
                            file1.write("Amenities: \t%s\n" % f.get_text().strip())
                for d in c.findAll('div', {'class': 'bed'}):
                    for e in d.findAll('div', {'class': 'bed-content'}):
                        for f in e.findAll('span', {'class': 'underline'}):
                            print("Bed Type: ", f.get_text().strip())
                            file1.write("Bed Type: \t%s\n" % f.get_text().strip())
                for d in c.findAll('div', {'class': 'salecard-price'}):
                    for e in d.findAll('div', {'class': 'salecard-price-panel'}):
                        for f in e.findAll('div', {'class': 'note'}):
                            print("Price: ", f.get_text().strip())
                            file1.write("Price: \t%s\n" % f.get_text().strip())
        print('~' * 100)
        file1.write('\n')


 def get_data():
    """
    Method to get the data from trip.com given chekin_date, checkout_date, city, no_of_people.
    :return:
    """

    url = 'https://www.trip.com/hotels/list?city=1&countryId=1&checkin=2020/11/18&checkout=2020/11/24&optionId=1' \
          '&optionType=City&display=Beijing&crn=1&adult=2&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl' \
          '&domestic=1 '

    driver.get(url)
    time.sleep(2)

    # append each hotel to the list
    window = []
    for i in driver.find_elements_by_class_name('list-card-title'):
        i.click()
        window.append(driver.window_handles[-1])

    # append the urls of each page to the list
    for i in window:
        driver.switch_to.window(i)
        all_url.append(driver.current_url)
        html = driver.page_source

        soup = BeautifulSoup(html, 'lxml')
        get_hotel_name(soup)
        get_room_details(soup)


 if __name__ == '__main__':
    get_data()
	from bs4 import BeautifulSoup
	from selenium import webdriver
	import random
	import time

	file1 = open("Hotel_List.txt", "w")


	headers = {
	'Accept': 'text/javascript, text/html, application/xml, text/xml, /',
	'Accept-Encoding': 'gzip,deflate',
	'Accept-Language': 'en-US,en;q=0.5',
	'Cache-Control': 'no-cache',
	'Connection': 'keep-alive',
	'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
	'Pragma': 'no-cache',
	'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:28.0) Gecko/20100101 Firefox/28.0',
	'X-Requested-With': 'XMLHttpRequest'
	}

	UAS = (
	"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
	"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
	"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
	'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'
	)

	ua = UAS[random.randrange(len(UAS))]
	headers.update({'User-Agent': ua})

	driver = webdriver.Chrome('/Users/bhushan/Downloads/chromedriver')
	all_url = []


	def get_hotel_name(soup):
	global row
	for a in soup.findAll('h1', {'class': 'detail-baseinfo_name'}):
	name = a.get_text().strip()
	print("Hotel Name: ", name)
	file1.write("Hotel Name: \t%s\n" % name)
	print("~" * 50)


	def get_room_details(soup):
	"""
	Method to get all details about specific hotel
	:return:
	"""
	global row
	for a in soup.findAll('div', {'class': 'roomlist-baseroom-card'}):
	for b in a.findAll('div', {'class': 'roomname'}):
	print("Room Name: ", b.get_text().strip())
	file1.write("Room Name: \t%s\n" % b.get_text().strip())
	for b in a.findAll('div', {'class': 'roomcard'}):
	for c in b.findAll('div', {'class': 'salecard-flex'}):
	for d in c.findAll('div', {'class': 'salecard-bedfacility'}):
	for e in d.findAll('div', {'class': 'facility'}):
	for f in e.findAll('span', {'class': 'desc-text underline'}):
	print("Amenities: ", f.get_text().strip())
	file1.write("Amenities: \t%s\n" % f.get_text().strip())
	for d in c.findAll('div', {'class': 'bed'}):
	for e in d.findAll('div', {'class': 'bed-content'}):
	for f in e.findAll('span', {'class': 'underline'}):
	print("Bed Type: ", f.get_text().strip())
	file1.write("Bed Type: \t%s\n" % f.get_text().strip())
	for d in c.findAll('div', {'class': 'salecard-price'}):
	for e in d.findAll('div', {'class': 'salecard-price-panel'}):
	for f in e.findAll('div', {'class': 'note'}):
	print("Price: ", f.get_text().strip())
	file1.write("Price: \t%s\n" % f.get_text().strip())
	print('~' * 100)
	file1.write('\n')


	def get_data():
	"""
	Method to get the data from trip.com given chekin_date, checkout_date, city, no_of_people.
	:return:
	"""

	url = 'https://www.trip.com/hotels/list?city=1&countryId=1&checkin=2020/11/18&checkout=2020/11/24&optionId=1' \
	'&optionType=City&display=Beijing&crn=1&adult=2&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl' \
	'&domestic=1 '

	driver.get(url)
	time.sleep(2)

	# append each hotel to the list
	window = []
	for i in driver.find_elements_by_class_name('list-card-title'):
	i.click()
	window.append(driver.window_handles[-1])

	# append the urls of each page to the list
	for i in window:
	driver.switch_to.window(i)
	all_url.append(driver.current_url)
	html = driver.page_source

	soup = BeautifulSoup(html, 'lxml')
	get_hotel_name(soup)
	get_room_details(soup)


	if __name__ == '__main__':
	get_data()