Skip to content

Instantly share code, notes, and snippets.

@SpotlightForBugs
Created September 27, 2023 07:54
Show Gist options
  • Save SpotlightForBugs/5c8aa90d8e5d5e0c0caf97eea9a22ff2 to your computer and use it in GitHub Desktop.
Save SpotlightForBugs/5c8aa90d8e5d5e0c0caf97eea9a22ff2 to your computer and use it in GitHub Desktop.
import json
import os
import sys
import webbrowser
import customtkinter
import json2html
import requests
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
style = """
<style>
table {
border-collapse: collapse;
border: 1px solid black;
}
th, td {
border: 1px solid black;
padding: 5px;
}
th {
background-color: #4CAF50;
color: white;
}
</style>
"""
def process_browser_log_entry(entry):
response = json.loads(entry['message'])['message']
return response
def find_json_url(url, driver):
driver.get(url)
driver.add_cookie({'name': '__hs_cookie_cat_pref', 'value': '1:true,2:true,3:true'})
driver.refresh()
browser_log = driver.get_log('performance')
events = [process_browser_log_entry(entry) for entry in browser_log]
for event in events:
if 'https://app-eu1.hubspot.com/api/meetings-public/v1/book' in str(event):
json_url = event['params']['request']['url']
return json_url
return None
def find_hubspot_meeting_api_url(events):
for event in events:
if 'https://app.hubspot.com/api/meetings-public' in str(event):
json_url = event['params']['request']['url']
return json_url
return None
def find_meetings_hubspot_url(events, driver):
accepted_hostnames = [
"meetings.hubspot.com", "meetings.hubspotqa.com",
"meetings-eu1.hubspot.com", "meetings-eu1.hubspotqa.com",
"app.hubspot.com", "app.hubspotqa.com",
"app-eu1.hubspot.com", "app-eu1.hubspotqa.com"
]
for event in events:
event_url = event.get('params', {}).get('request', {}).get('url', "")
for hostname in accepted_hostnames:
if hostname + "/meetings/" in event_url:
json_url = find_json_url(event_url, driver)
if json_url:
return json_url
return None
# TODO: add support for other hubspot domains
# TODO: add https://forms.hsforms.com/embed/ to the list of urls to check for json endpoint
def get_endpoint(meeting_url):
url = meeting_url
url = f'https://{url}'
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'}
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
driver.get(url)
driver.add_cookie({'name': '__hs_cookie_cat_pref', 'value': '1:true,2:true,3:true'})
driver.refresh()
browser_log = driver.get_log('performance')
events = [process_browser_log_entry(entry) for entry in browser_log]
json_url = None
if 'meetings.hubspot.com/' in url:
json_url = find_hubspot_meeting_api_url(events)
else:
json_url = find_meetings_hubspot_url(events, driver)
driver.quit()
if json_url:
return json_url
else:
print("JSON URL not found.")
return False
def process_json_url(json_endpoint):
# get the json endpoint, use a user agent to avoid 403
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
}
response = requests.get(json_endpoint, headers=headers)
json_result = json.loads(response.text)
return json_result
def run(json_endpoint):
if json_endpoint:
print("fetching...")
else:
sys.exit(1)
processed_url_json = process_json_url(json_endpoint)
BLOCKLIST = {
"linkId", 'userId', 'portalId', 'formGuid'
}
for key in BLOCKLIST:
if key in processed_url_json:
del processed_url_json[key]
html = json2html.json2html.convert(json=processed_url_json, encode=True).decode("utf-8")
with open("result.html", "w") as f:
f.write(style + html)
os.system("result.html")
def on_url_input_button_click(entry, textfield, mode):
if entry.get() == "": return
if mode == 0:
textfield.configure(text="Fetching...")
textfield.update()
textfield.configure(text="loading...")
textfield.update()
endpoint_url = get_endpoint(entry.get())
if endpoint_url:
textfield.configure(text="Click here to open the JSON URL", text_color="blue", cursor="hand2")
textfield.bind("<Button-1>", lambda e: webbrowser.open_new(endpoint_url))
else:
textfield.configure(text="JSON URL not found.")
textfield.update()
elif mode == 1:
textfield.configure(text="Fetching...")
textfield.update()
run(get_endpoint(entry.get()))
textfield.configure(text="Done!")
textfield.update()
else:
print("Invalid mode")
def gui_ask_for_url():
customtkinter.set_appearance_mode("System") # Modes: system (default), light, dark
customtkinter.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green
app = customtkinter.CTk()
app.title("Hubspot Booking API Scraper")
app.geometry("600x200")
app.resizable(True, True)
entry = customtkinter.CTkEntry(app, placeholder_text="Enter Hubspot Meeting URL", width=500)
textfield = customtkinter.CTkLabel(app, text="No Data", width=500, height=500)
get_json_btn = customtkinter.CTkButton(app, text="Find JSON URL",
command=lambda: on_url_input_button_click(entry, textfield, 0), width=20,
height=1)
get_html_table_btn = customtkinter.CTkButton(app, text="Create Table",
command=lambda: on_url_input_button_click(entry, textfield, 1),
width=20, height=1)
entry.pack(pady=10)
get_json_btn.pack(pady=10)
get_html_table_btn.pack(pady=10)
textfield.pack(pady=10)
app.mainloop()
if __name__ == '__main__':
gui_ask_for_url()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment