Created
August 30, 2020 03:48
-
-
Save janaki-sasidhar/92c5b6fe08793b10f6e2d65009b0dee7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import time | |
| from selenium import webdriver | |
| from selenium.webdriver.firefox.options import Options | |
| from PIL import Image | |
| import cv2 | |
| options = Options() | |
| options.headless = True | |
| driver = webdriver.Firefox( | |
| executable_path=r"C:\Users\sasid\Downloads\geckodriver.exe", options=options) | |
| URL = 'http://properties.inspecthoa.com/3080-21st-St-Long-Island-City-NY-11102' | |
| final_list = [] | |
| driver.maximize_window() | |
| driver.get(URL) | |
| def get_concat_v(im1, im2): | |
| dst = Image.new('RGB', (im1.width, im1.height + im2.height)) | |
| dst.paste(im1, (0, 0)) | |
| dst.paste(im2, (0, im1.height)) | |
| return dst | |
| def standardize(pil_img, logo=None): | |
| border = 50 | |
| paste_x = border | |
| paste_y = border | |
| logo_x = 3170 | |
| logo_y = 2380 | |
| img_width, img_height = pil_img.size | |
| width, height = 3508 - border*2, 2480 - border*2 | |
| wpercent = (width/float(img_width)) | |
| img_height = int((float(img_height)*float(wpercent))) | |
| img_width = width | |
| if img_height > height: | |
| hpercent = float(height/float(img_height)) | |
| img_width = int((float(img_width)*float(hpercent))) | |
| img_height = height | |
| paste_x = border + int(float(width - img_width)/2) | |
| paste_y = border # + int(float(height - img_height)/2) | |
| pil_img = pil_img.resize((img_width, img_height), resample=Image.LANCZOS) | |
| result = Image.new(pil_img.mode, (width + border*2, | |
| height + border*2), (255, 255, 255, 255)) | |
| result.paste(pil_img, (paste_x, paste_y)) | |
| if logo is not None: | |
| result.paste(logo, (logo_x, logo_y)) | |
| return result | |
| def get_images(link): | |
| pdf_file_name = link.strip('/').split('/')[-1]+".pdf" | |
| ihoa_summary = driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[1]').screenshot_as_png | |
| final_list.append(ihoa_summary) | |
| time.sleep(5) | |
| # click finances | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[1]/button/span[1]').click() | |
| time.sleep(5) | |
| finances_summary = driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[3]/section/div').screenshot_as_png | |
| final_list.append(finances_summary) | |
| # fees click | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div[2]/button').click() | |
| time.sleep(5) | |
| fee_one = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[1]').screenshot_as_png | |
| final_list.append(fee_one) | |
| fee_two = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[2]/div').screenshot_as_png | |
| final_list.append(fee_two) | |
| # expences click | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div[3]/button').click() | |
| time.sleep(5) | |
| exp_1 = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[1]').screenshot_as_png | |
| final_list.append(exp_1) | |
| exp_2 = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[2]').screenshot_as_png | |
| final_list.append(exp_2) | |
| # reserver click | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div[4]/button').click() | |
| time.sleep(5) | |
| res_1 = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[1]').screenshot_as_png | |
| final_list.append(res_1) | |
| res_2 = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[2]').screenshot_as_png | |
| final_list.append(res_2) | |
| # bylaws_click | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[2]/button').click() | |
| time.sleep(5) | |
| driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[2]/div/div[2]/button').click() | |
| time.sleep(5) | |
| must_know = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div').screenshot_as_png | |
| final_list.append(must_know) | |
| driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[2]/div/div[3]/button').click() | |
| time.sleep(5) | |
| good_to_know = driver.find_element_by_xpath( | |
| '//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div').screenshot_as_png | |
| final_list.append(good_to_know) | |
| # operations | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[3]/button').click() | |
| time.sleep(5) | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div/button').click() | |
| time.sleep(5) | |
| operations = driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[3]/section/div/div') | |
| total_height = operations.size['height']+1000 | |
| driver.set_window_size(1920, total_height) | |
| time.sleep(2) | |
| driver.save_screenshot('operations_screenshot.png') | |
| img_one = cv2.imread('operations_screenshot.png') | |
| print(img_one.shape) | |
| height_one = img_one.shape[0] | |
| width_one = img_one.shape[1] | |
| # Cut the image in half | |
| height_cutoff = height_one // 2 - 50 | |
| print(height_cutoff) | |
| s1 = img_one[:height_cutoff, :] | |
| s2 = img_one[height_cutoff:, :] | |
| #cv2.imwrite("top_part.png", s1) | |
| cv2.imwrite("operations_half.png", s2) | |
| # financing | |
| driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[4]/button').click() | |
| time.sleep(5) | |
| operations = driver.find_element_by_xpath( | |
| '/html/body/div/main/div[1]/section/div/div[3]') | |
| total_height = operations.size['height']+700 | |
| driver.set_window_size(1920, total_height) | |
| time.sleep(2) | |
| driver.save_screenshot('financing_screenshot.png') | |
| # Read the image | |
| img = cv2.imread('financing_screenshot.png') | |
| # print(img.shape) | |
| height = img.shape[0] | |
| #width = img.shape[1] | |
| # Cut the image in half | |
| height_cutoff = (height // 2) - 75 | |
| # print(height_cutoff) | |
| s1 = img[:height_cutoff, :] | |
| s2 = img[height_cutoff:, :] | |
| #cv2.imwrite("top_part.png", s1) | |
| cv2.imwrite("financing_half.png", s2) | |
| count = 0 | |
| for i in final_list: | |
| filename = '{}.png'.format(count) | |
| count += 1 | |
| with open(filename, 'wb') as f: | |
| f.write(i) | |
| im_list = [] | |
| widt = Image.open('1.png').width | |
| one = Image.open('0.png') | |
| one = one.resize((widt, one.height)) | |
| one.save('0.png') | |
| logo = Image.open('hoa_logo.png') | |
| for i in range(0, count): | |
| im = Image.open(str(i)+'.png').convert('RGB') | |
| if i > 7: | |
| im_list.append(standardize(im, logo)) | |
| else: | |
| im_list.append(im) | |
| oper_img = Image.open('operations_half.png').convert('RGB') | |
| fin_img = Image.open('financing_half.png').convert('RGB') | |
| im_list.append(standardize(oper_img, logo)) | |
| im_list.append(standardize(fin_img, logo)) | |
| im1 = standardize(get_concat_v(im_list[0], im_list[1]), logo) | |
| im2 = standardize(get_concat_v(im_list[2], im_list[3]), logo) | |
| im3 = standardize(get_concat_v(im_list[4], im_list[5]), logo) | |
| im4 = standardize(get_concat_v(im_list[6], im_list[7]), logo) | |
| listtwo = [im1, im2, im3, im4] | |
| listtwo.extend(im_list[8:]) | |
| im1.save("tmp/"+pdf_file_name, "PDF", resolution=150, quality=70, | |
| optimize=True, save_all=True, append_images=listtwo[1:]) | |
| print('Image saved at tmp/{}'.format(pdf_file_name)) | |
| get_images(URL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment