Created
September 27, 2023 07:54
-
-
Save SpotlightForBugs/5c8aa90d8e5d5e0c0caf97eea9a22ff2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import sys | |
import webbrowser | |
import customtkinter | |
import json2html | |
import requests | |
from selenium import webdriver | |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
style = """ | |
<style> | |
table { | |
border-collapse: collapse; | |
border: 1px solid black; | |
} | |
th, td { | |
border: 1px solid black; | |
padding: 5px; | |
} | |
th { | |
background-color: #4CAF50; | |
color: white; | |
} | |
</style> | |
""" | |
def process_browser_log_entry(entry): | |
response = json.loads(entry['message'])['message'] | |
return response | |
def find_json_url(url, driver): | |
driver.get(url) | |
driver.add_cookie({'name': '__hs_cookie_cat_pref', 'value': '1:true,2:true,3:true'}) | |
driver.refresh() | |
browser_log = driver.get_log('performance') | |
events = [process_browser_log_entry(entry) for entry in browser_log] | |
for event in events: | |
if 'https://app-eu1.hubspot.com/api/meetings-public/v1/book' in str(event): | |
json_url = event['params']['request']['url'] | |
return json_url | |
return None | |
def find_hubspot_meeting_api_url(events): | |
for event in events: | |
if 'https://app.hubspot.com/api/meetings-public' in str(event): | |
json_url = event['params']['request']['url'] | |
return json_url | |
return None | |
def find_meetings_hubspot_url(events, driver): | |
accepted_hostnames = [ | |
"meetings.hubspot.com", "meetings.hubspotqa.com", | |
"meetings-eu1.hubspot.com", "meetings-eu1.hubspotqa.com", | |
"app.hubspot.com", "app.hubspotqa.com", | |
"app-eu1.hubspot.com", "app-eu1.hubspotqa.com" | |
] | |
for event in events: | |
event_url = event.get('params', {}).get('request', {}).get('url', "") | |
for hostname in accepted_hostnames: | |
if hostname + "/meetings/" in event_url: | |
json_url = find_json_url(event_url, driver) | |
if json_url: | |
return json_url | |
return None | |
# TODO: add support for other hubspot domains | |
# TODO: add https://forms.hsforms.com/embed/ to the list of urls to check for json endpoint | |
def get_endpoint(meeting_url): | |
url = meeting_url | |
url = f'https://{url}' | |
caps = DesiredCapabilities.CHROME | |
caps['goog:loggingPrefs'] = {'performance': 'ALL'} | |
options = webdriver.ChromeOptions() | |
options.add_argument("--headless") | |
driver = webdriver.Chrome(options=options) | |
driver.get(url) | |
driver.add_cookie({'name': '__hs_cookie_cat_pref', 'value': '1:true,2:true,3:true'}) | |
driver.refresh() | |
browser_log = driver.get_log('performance') | |
events = [process_browser_log_entry(entry) for entry in browser_log] | |
json_url = None | |
if 'meetings.hubspot.com/' in url: | |
json_url = find_hubspot_meeting_api_url(events) | |
else: | |
json_url = find_meetings_hubspot_url(events, driver) | |
driver.quit() | |
if json_url: | |
return json_url | |
else: | |
print("JSON URL not found.") | |
return False | |
def process_json_url(json_endpoint): | |
# get the json endpoint, use a user agent to avoid 403 | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' | |
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36' | |
} | |
response = requests.get(json_endpoint, headers=headers) | |
json_result = json.loads(response.text) | |
return json_result | |
def run(json_endpoint): | |
if json_endpoint: | |
print("fetching...") | |
else: | |
sys.exit(1) | |
processed_url_json = process_json_url(json_endpoint) | |
BLOCKLIST = { | |
"linkId", 'userId', 'portalId', 'formGuid' | |
} | |
for key in BLOCKLIST: | |
if key in processed_url_json: | |
del processed_url_json[key] | |
html = json2html.json2html.convert(json=processed_url_json, encode=True).decode("utf-8") | |
with open("result.html", "w") as f: | |
f.write(style + html) | |
os.system("result.html") | |
def on_url_input_button_click(entry, textfield, mode): | |
if entry.get() == "": return | |
if mode == 0: | |
textfield.configure(text="Fetching...") | |
textfield.update() | |
textfield.configure(text="loading...") | |
textfield.update() | |
endpoint_url = get_endpoint(entry.get()) | |
if endpoint_url: | |
textfield.configure(text="Click here to open the JSON URL", text_color="blue", cursor="hand2") | |
textfield.bind("<Button-1>", lambda e: webbrowser.open_new(endpoint_url)) | |
else: | |
textfield.configure(text="JSON URL not found.") | |
textfield.update() | |
elif mode == 1: | |
textfield.configure(text="Fetching...") | |
textfield.update() | |
run(get_endpoint(entry.get())) | |
textfield.configure(text="Done!") | |
textfield.update() | |
else: | |
print("Invalid mode") | |
def gui_ask_for_url(): | |
customtkinter.set_appearance_mode("System") # Modes: system (default), light, dark | |
customtkinter.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green | |
app = customtkinter.CTk() | |
app.title("Hubspot Booking API Scraper") | |
app.geometry("600x200") | |
app.resizable(True, True) | |
entry = customtkinter.CTkEntry(app, placeholder_text="Enter Hubspot Meeting URL", width=500) | |
textfield = customtkinter.CTkLabel(app, text="No Data", width=500, height=500) | |
get_json_btn = customtkinter.CTkButton(app, text="Find JSON URL", | |
command=lambda: on_url_input_button_click(entry, textfield, 0), width=20, | |
height=1) | |
get_html_table_btn = customtkinter.CTkButton(app, text="Create Table", | |
command=lambda: on_url_input_button_click(entry, textfield, 1), | |
width=20, height=1) | |
entry.pack(pady=10) | |
get_json_btn.pack(pady=10) | |
get_html_table_btn.pack(pady=10) | |
textfield.pack(pady=10) | |
app.mainloop() | |
if __name__ == '__main__': | |
gui_ask_for_url() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment