Skip to content

Instantly share code, notes, and snippets.

@janaki-sasidhar
Created August 30, 2020 03:48
Show Gist options
  • Save janaki-sasidhar/92c5b6fe08793b10f6e2d65009b0dee7 to your computer and use it in GitHub Desktop.
Save janaki-sasidhar/92c5b6fe08793b10f6e2d65009b0dee7 to your computer and use it in GitHub Desktop.
import time
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from PIL import Image
import cv2
options = Options()
options.headless = True
driver = webdriver.Firefox(
executable_path=r"C:\Users\sasid\Downloads\geckodriver.exe", options=options)
URL = 'http://properties.inspecthoa.com/3080-21st-St-Long-Island-City-NY-11102'
final_list = []
driver.maximize_window()
driver.get(URL)
def get_concat_v(im1, im2):
dst = Image.new('RGB', (im1.width, im1.height + im2.height))
dst.paste(im1, (0, 0))
dst.paste(im2, (0, im1.height))
return dst
def standardize(pil_img, logo=None):
border = 50
paste_x = border
paste_y = border
logo_x = 3170
logo_y = 2380
img_width, img_height = pil_img.size
width, height = 3508 - border*2, 2480 - border*2
wpercent = (width/float(img_width))
img_height = int((float(img_height)*float(wpercent)))
img_width = width
if img_height > height:
hpercent = float(height/float(img_height))
img_width = int((float(img_width)*float(hpercent)))
img_height = height
paste_x = border + int(float(width - img_width)/2)
paste_y = border # + int(float(height - img_height)/2)
pil_img = pil_img.resize((img_width, img_height), resample=Image.LANCZOS)
result = Image.new(pil_img.mode, (width + border*2,
height + border*2), (255, 255, 255, 255))
result.paste(pil_img, (paste_x, paste_y))
if logo is not None:
result.paste(logo, (logo_x, logo_y))
return result
def get_images(link):
pdf_file_name = link.strip('/').split('/')[-1]+".pdf"
ihoa_summary = driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[1]').screenshot_as_png
final_list.append(ihoa_summary)
time.sleep(5)
# click finances
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[1]/button/span[1]').click()
time.sleep(5)
finances_summary = driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[3]/section/div').screenshot_as_png
final_list.append(finances_summary)
# fees click
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div[2]/button').click()
time.sleep(5)
fee_one = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[1]').screenshot_as_png
final_list.append(fee_one)
fee_two = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[2]/div').screenshot_as_png
final_list.append(fee_two)
# expences click
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div[3]/button').click()
time.sleep(5)
exp_1 = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[1]').screenshot_as_png
final_list.append(exp_1)
exp_2 = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[2]').screenshot_as_png
final_list.append(exp_2)
# reserver click
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div[4]/button').click()
time.sleep(5)
res_1 = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[1]').screenshot_as_png
final_list.append(res_1)
res_2 = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div[2]').screenshot_as_png
final_list.append(res_2)
# bylaws_click
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[2]/button').click()
time.sleep(5)
driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[2]/div/div[2]/button').click()
time.sleep(5)
must_know = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div').screenshot_as_png
final_list.append(must_know)
driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[2]/div/div[3]/button').click()
time.sleep(5)
good_to_know = driver.find_element_by_xpath(
'//*[@id="root"]/main/div[1]/section/div/div[3]/div[3]/section/div/div').screenshot_as_png
final_list.append(good_to_know)
# operations
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[3]/button').click()
time.sleep(5)
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[2]/div/div/button').click()
time.sleep(5)
operations = driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[3]/section/div/div')
total_height = operations.size['height']+1000
driver.set_window_size(1920, total_height)
time.sleep(2)
driver.save_screenshot('operations_screenshot.png')
img_one = cv2.imread('operations_screenshot.png')
print(img_one.shape)
height_one = img_one.shape[0]
width_one = img_one.shape[1]
# Cut the image in half
height_cutoff = height_one // 2 - 50
print(height_cutoff)
s1 = img_one[:height_cutoff, :]
s2 = img_one[height_cutoff:, :]
#cv2.imwrite("top_part.png", s1)
cv2.imwrite("operations_half.png", s2)
# financing
driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]/div[1]/div[4]/button').click()
time.sleep(5)
operations = driver.find_element_by_xpath(
'/html/body/div/main/div[1]/section/div/div[3]')
total_height = operations.size['height']+700
driver.set_window_size(1920, total_height)
time.sleep(2)
driver.save_screenshot('financing_screenshot.png')
# Read the image
img = cv2.imread('financing_screenshot.png')
# print(img.shape)
height = img.shape[0]
#width = img.shape[1]
# Cut the image in half
height_cutoff = (height // 2) - 75
# print(height_cutoff)
s1 = img[:height_cutoff, :]
s2 = img[height_cutoff:, :]
#cv2.imwrite("top_part.png", s1)
cv2.imwrite("financing_half.png", s2)
count = 0
for i in final_list:
filename = '{}.png'.format(count)
count += 1
with open(filename, 'wb') as f:
f.write(i)
im_list = []
widt = Image.open('1.png').width
one = Image.open('0.png')
one = one.resize((widt, one.height))
one.save('0.png')
logo = Image.open('hoa_logo.png')
for i in range(0, count):
im = Image.open(str(i)+'.png').convert('RGB')
if i > 7:
im_list.append(standardize(im, logo))
else:
im_list.append(im)
oper_img = Image.open('operations_half.png').convert('RGB')
fin_img = Image.open('financing_half.png').convert('RGB')
im_list.append(standardize(oper_img, logo))
im_list.append(standardize(fin_img, logo))
im1 = standardize(get_concat_v(im_list[0], im_list[1]), logo)
im2 = standardize(get_concat_v(im_list[2], im_list[3]), logo)
im3 = standardize(get_concat_v(im_list[4], im_list[5]), logo)
im4 = standardize(get_concat_v(im_list[6], im_list[7]), logo)
listtwo = [im1, im2, im3, im4]
listtwo.extend(im_list[8:])
im1.save("tmp/"+pdf_file_name, "PDF", resolution=150, quality=70,
optimize=True, save_all=True, append_images=listtwo[1:])
print('Image saved at tmp/{}'.format(pdf_file_name))
get_images(URL)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment