Skip to content

Instantly share code, notes, and snippets.

@robertripoll
Created January 14, 2024 12:32
Show Gist options
  • Save robertripoll/43c39d8e5898b87e63833840f9f5941b to your computer and use it in GitHub Desktop.
Save robertripoll/43c39d8e5898b87e63833840f9f5941b to your computer and use it in GitHub Desktop.
Python script to retrieve the title of ChatGPT conversations
#!/usr/local/bin/python
import sys
import requests
from bs4 import BeautifulSoup
import json
def get_data_object(url) -> dict | None:
try:
# Fetch HTML content from the URL
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad responses
# Parse HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Find the script tag with id="__NEXT_DATA__"
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag:
# Extract the inner content of the script tag
inner_content = script_tag.contents[0] if script_tag.contents else None
if inner_content == None:
return inner_content
return json.loads(inner_content)
else:
return None
except requests.RequestException as e:
print(f"ERROR fetching content: {e}")
return None
def get_page_title(data: dict | None) -> str | None:
if data == None:
return None
return data.get('props', {}).get('pageProps', {}).get('serverResponse', {}).get('data', {}).get('title')
if __name__ == "__main__":
# Check if at least one URL is provided as a command-line argument
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <URL1> <URL2> ...")
sys.exit(1)
# Get the list of URLs from the command-line arguments
urls = sys.argv[1:]
results = {}
for url in urls:
print(f"Processing {url}...")
data_object = get_data_object(url)
page_title = get_page_title(data_object)
if page_title:
results[url] = page_title
print("OK!")
print(json.dumps(results))
beautifulsoup4==4.12.2
requests==2.31.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment