robertripoll · January 14, 2024 12:32
diff --git a/get_gpt_conversations_title.py b/get_gpt_conversations_title.py
 #!/usr/local/bin/python

 import sys
 import requests
 from bs4 import BeautifulSoup
 import json

 def get_data_object(url) -> dict | None:
    try:
        # Fetch HTML content from the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad responses

        # Parse HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the script tag with id="__NEXT_DATA__"
        script_tag = soup.find('script', id='__NEXT_DATA__')

        if script_tag:
            # Extract the inner content of the script tag
            inner_content = script_tag.contents[0] if script_tag.contents else None
            if inner_content == None:
                return inner_content
            return json.loads(inner_content)
        else:
            return None

    except requests.RequestException as e:
        print(f"ERROR fetching content: {e}")
        return None
    
 def get_page_title(data: dict | None) -> str | None:
    if data == None:
        return None

    return data.get('props', {}).get('pageProps', {}).get('serverResponse', {}).get('data', {}).get('title')

 if __name__ == "__main__":
    # Check if at least one URL is provided as a command-line argument
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <URL1> <URL2> ...")
        sys.exit(1)

    # Get the list of URLs from the command-line arguments
    urls = sys.argv[1:]
    results = {}

    for url in urls:
        print(f"Processing {url}...")

        data_object = get_data_object(url)
        page_title = get_page_title(data_object)

        if page_title:
            results[url] = page_title
            print("OK!")

    print(json.dumps(results))
diff --git a/requirements.txt b/requirements.txt
 beautifulsoup4==4.12.2
 requests==2.31.0
	#!/usr/local/bin/python

	import sys
	import requests
	from bs4 import BeautifulSoup
	import json

	def get_data_object(url) -> dict \| None:
	try:
	# Fetch HTML content from the URL
	response = requests.get(url)
	response.raise_for_status() # Raise an exception for bad responses

	# Parse HTML content using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find the script tag with id="__NEXT_DATA__"
	script_tag = soup.find('script', id='__NEXT_DATA__')

	if script_tag:
	# Extract the inner content of the script tag
	inner_content = script_tag.contents[0] if script_tag.contents else None
	if inner_content == None:
	return inner_content
	return json.loads(inner_content)
	else:
	return None

	except requests.RequestException as e:
	print(f"ERROR fetching content: {e}")
	return None

	def get_page_title(data: dict \| None) -> str \| None:
	if data == None:
	return None

	return data.get('props', {}).get('pageProps', {}).get('serverResponse', {}).get('data', {}).get('title')

	if __name__ == "__main__":
	# Check if at least one URL is provided as a command-line argument
	if len(sys.argv) < 2:
	print(f"Usage: {sys.argv[0]} <URL1> <URL2> ...")
	sys.exit(1)

	# Get the list of URLs from the command-line arguments
	urls = sys.argv[1:]
	results = {}

	for url in urls:
	print(f"Processing {url}...")

	data_object = get_data_object(url)
	page_title = get_page_title(data_object)

	if page_title:
	results[url] = page_title
	print("OK!")

	print(json.dumps(results))