Skip to content

Instantly share code, notes, and snippets.

@urielhdz
Created June 20, 2025 01:14
Show Gist options
  • Save urielhdz/7ef19065633941e8f00b6bb2fb9202c1 to your computer and use it in GitHub Desktop.
Save urielhdz/7ef19065633941e8f00b6bb2fb9202c1 to your computer and use it in GitHub Desktop.
import click
import os
from dotenv import load_dotenv
from openai import OpenAI
import requests
from bs4 import BeautifulSoup
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=OPENAI_API_KEY)
@click.command()
@click.option('--url', default='https://news.ycombinator.com', help='URL to get a summary from')
@click.option('--about', default='', help='Description of the page to summarize, e.g., "This is a news website about technology.". It helps the AI to understand the context better.')
@click.option('--output-file', default='output.txt', help='Path to the output summary file')
def process_file(url, about, output_file):
print("Starting the process...")
info = scrap_url(url)
# print(f"Scraped info from {url}:\n{info[:500]}...") # Print first 500 characters for debugging
print(f"Scraped info from {url} successfully.")
r = summarize_info(info,url, about)
print(f"Summary generated successfully.")
with open(output_file, 'w', encoding='utf-8') as f:
f.write(r)
print(f"Summary written to {output_file} successfully.")
def summarize_info(info,url,about):
input= """
Por favor, resume las siguiente información de la página web:
{url}
Si la información es muy extensa, por favor, resume las líneas más importantes.
Si la información es corta, por favor, resume todo.
Aquí está el cuerpo de la información:
{info}
""".format(info=info, url=url)
if about:
input += f"\n\nContexto: {about}"
# print(f"Input: {input}")
response = client.responses.create(
model="gpt-4.1",
input=input
)
return response.output_text
def scrap_url(url):
"""
Scrapes the main text content from the given URL.
Returns the extracted text as a string.
"""
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Get text and clean up whitespace
text = soup.get_text(separator=' ', strip=True)
lines = [line.strip() for line in text.splitlines() if line.strip()]
return '\n'.join(lines)
except Exception as e:
return f"Error scraping URL: {e}"
if __name__ == '__main__':
process_file()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment