Created
August 28, 2025 04:00
-
-
Save JupyterJones/fb8f3b248006eaf30c7c350039908857 to your computer and use it in GitHub Desktop.
Step into the future of news with our AI-powered News Narrator!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, request, render_template_string, redirect, url_for | |
import sqlite3 | |
import requests | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
import re, os | |
from icecream import ic | |
import chromadb | |
from chromadb.utils import embedding_functions | |
# ==== CONFIG ==== | |
DB_FILE = "news.db" | |
NARRATION_DIR = "static/narrations" | |
os.makedirs(NARRATION_DIR, exist_ok=True) | |
TTS_API_URL = "http://localhost:8880/v1/audio/speech" | |
ASSISTANT_VOICE = "af_sky" | |
PHI_URL = "http://localhost:11434/api/generate" | |
MAX_RESULTS = 12 # top chunks for Chroma query | |
HEADERS = { | |
"User-Agent": ( | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
"AppleWebKit/537.36 (KHTML, like Gecko) " | |
"Chrome/115.0.0.0 Safari/537.36" | |
) | |
} | |
app = Flask(__name__) | |
# ==== DATABASE HELPERS ==== | |
def init_db(): | |
conn = sqlite3.connect(DB_FILE) | |
c = conn.cursor() | |
c.execute("""CREATE TABLE IF NOT EXISTS articles | |
(id INTEGER PRIMARY KEY, query TEXT, url TEXT, html TEXT, date TEXT)""") | |
c.execute("""CREATE TABLE IF NOT EXISTS narrations | |
(id INTEGER PRIMARY KEY, query TEXT, summary TEXT, | |
text_file TEXT, audio_file TEXT, date TEXT)""") | |
conn.commit() | |
conn.close() | |
ic("Database initialized") | |
def save_article(query, url, html): | |
conn = sqlite3.connect(DB_FILE) | |
c = conn.cursor() | |
c.execute("INSERT INTO articles (query,url,html,date) VALUES (?,?,?,?)", | |
(query, url, html, datetime.now().isoformat())) | |
conn.commit() | |
conn.close() | |
ic(f"Saved article: {url}") | |
def save_narration(query, summary, text_file, audio_file): | |
conn = sqlite3.connect(DB_FILE) | |
c = conn.cursor() | |
c.execute("INSERT INTO narrations (query,summary,text_file,audio_file,date) VALUES (?,?,?,?,?)", | |
(query, summary, text_file, audio_file, datetime.now().isoformat())) | |
conn.commit() | |
conn.close() | |
ic(f"Saved narration: {text_file}, {audio_file}") | |
def get_narrations(limit=3): | |
conn = sqlite3.connect(DB_FILE) | |
c = conn.cursor() | |
c.execute("SELECT id,query,summary,text_file,audio_file,date FROM narrations ORDER BY id DESC LIMIT ?", (limit,)) | |
rows = c.fetchall() | |
conn.close() | |
return rows | |
def get_older_narrations(offset=3, limit=10): | |
conn = sqlite3.connect(DB_FILE) | |
c = conn.cursor() | |
c.execute("SELECT id,query,summary,text_file,audio_file,date FROM narrations ORDER BY id DESC LIMIT ? OFFSET ?", (limit, offset)) | |
rows = c.fetchall() | |
conn.close() | |
return rows | |
# ==== UTILITIES ==== | |
def safe_filename(text): | |
first_chars = re.sub(r'[^a-zA-Z0-9 ]', '', text)[:25] | |
parts = first_chars.split() | |
snippet = "_".join(parts) if parts else "narration" | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M") | |
return f"{snippet}_{timestamp}" | |
def duckduckgo_search(query, max_results=5): | |
url = "https://html.duckduckgo.com/html/" | |
params = {"q": query} | |
try: | |
resp = requests.post(url, data=params, headers=HEADERS, timeout=15) | |
resp.raise_for_status() | |
except Exception as e: | |
ic(f"Error fetching search results: {e}") | |
return [] | |
soup = BeautifulSoup(resp.text, "html.parser") | |
links = [] | |
for div in soup.select("div.result"): | |
a = div.find("a", href=True) | |
if a and a['href'].startswith("http"): | |
href = a['href'] | |
try: | |
page = requests.get(href, timeout=10) | |
save_article(query, href, page.text) | |
links.append((href, page.text)) | |
except Exception as e: | |
ic(f"Failed to fetch {href}: {e}") | |
if len(links) >= max_results: | |
break | |
ic(f"Found {len(links)} articles for query: {query}") | |
return links | |
def build_chroma(articles): | |
client = chromadb.Client() | |
embedding_func = embedding_functions.DefaultEmbeddingFunction() | |
# --- Remove old temp collection if it exists --- | |
try: | |
collection = client.get_collection("temp_news") | |
client.delete_collection("temp_news") | |
ic("Deleted existing temp_news collection") | |
except Exception: | |
# Collection doesn't exist yet | |
pass | |
collection = client.create_collection("temp_news", embedding_function=embedding_func) | |
for idx, (url, html_content) in enumerate(articles): | |
soup = BeautifulSoup(html_content, "html.parser") | |
text = soup.get_text(" ", strip=True) | |
if len(text) > 50: | |
collection.add(documents=[text], metadatas=[{"url": url}], ids=[str(idx)]) | |
ic(f"Chroma collection built with {len(articles)} articles") | |
return collection | |
def summarize_with_phi(query, collection): | |
# Collect text chunks from Chroma | |
results = collection.query(query_texts=[query], n_results=MAX_RESULTS) | |
chunks = results.get("documents", [[]])[0] | |
metadatas = results.get("metadatas", [[]])[0] | |
if not chunks: | |
return "⚠ No results found." | |
raw_news_text = "\n\n".join(f"[{meta.get('url','No URL')}] {chunk}" for chunk, meta in zip(chunks, metadatas)) | |
prompt = ( | |
f"You are a professional YouTube news narrator.\n" | |
f"Write an engaging narration from these news excerpts:\n{raw_news_text}\n\n" | |
f"Final narration:" | |
) | |
payload = { | |
"model": "phi3:latest", | |
"prompt": prompt, | |
"stream": False | |
} | |
try: | |
r = requests.post(PHI_URL, json=payload, timeout=560) | |
r.raise_for_status() | |
response_data = r.json() | |
narration = response_data.get("response", "⚠ Error: no response from Phi3").strip() | |
ic("Narration generated via Phi3") | |
return narration | |
except Exception as e: | |
ic(f"❌ Error contacting Phi3 API: {e}") | |
return "⚠ Error generating narration." | |
def generate_tts(text, filename_base): | |
txt_path = os.path.join(NARRATION_DIR, filename_base + ".txt") | |
mp3_path = os.path.join(NARRATION_DIR, filename_base + ".mp3") | |
with open(txt_path, "w") as f: | |
f.write(text) | |
payload = {"input": text, "voice": ASSISTANT_VOICE} | |
try: | |
resp = requests.post(TTS_API_URL, json=payload, timeout=500) | |
resp.raise_for_status() | |
with open(mp3_path, "wb") as f: | |
f.write(resp.content) | |
ic(f"TTS generated: {mp3_path}") | |
return txt_path, mp3_path | |
except Exception as e: | |
ic(f"TTS generation failed: {e}") | |
return txt_path, None | |
# ==== FLASK ROUTES ==== | |
@app.route("/", methods=["GET","POST"]) | |
def index(): | |
if request.method == "POST": | |
query = request.form["query"].strip() | |
if not query: | |
return redirect(url_for("index")) | |
ic(f"Query submitted: {query}") | |
articles = duckduckgo_search(query) | |
if not articles: | |
return "⚠ No results found for this query. Try manual refresh later." | |
collection = build_chroma(articles) | |
summary = summarize_with_phi(query, collection) | |
# --- Only feed the narration text to TTS --- | |
narration_text_only = summary # <-- remove URLs, Query, Date from TTS input | |
base = safe_filename(summary) | |
txt_path, mp3_path = generate_tts(narration_text_only, base) | |
save_narration(query, summary, os.path.basename(txt_path), os.path.basename(mp3_path) if mp3_path else None) | |
return redirect(url_for("index")) | |
narrations = get_narrations() | |
return render_template_string(""" | |
<h1 style="color:orange;font-size:5vw;">FlaskArchitect News Assistant</h1> | |
<form method="post"> | |
<input name="query" placeholder="Enter your query" style="width:300px"> | |
<button type="submit">Search / Refresh</button> | |
</form> | |
<h2>Latest Narrations</h2> | |
{% for n in narrations %} | |
<div style="margin-bottom:20px;"> | |
<b>{{n[1]}}</b> ({{n[5]}})<br> | |
<a href="{{ url_for('static', filename='narrations/' + n[3]) }}" target="_blank">View Text</a> | | |
{% if n[4] %} | |
<audio controls src="{{ url_for('static', filename='narrations/' + n[4]) }}"></audio> | |
{% endif %} | |
<p>{{n[2]}}</p> | |
</div> | |
{% endfor %} | |
<a href="{{ url_for('older_versions') }}">Older Versions</a> | |
""", narrations=narrations) | |
@app.route("/older_versions") | |
def older_versions(): | |
narrations = get_older_narrations() | |
return render_template_string(""" | |
<h1>Older Versions</h1> | |
{% for n in narrations %} | |
<div style="margin-bottom:20px;"> | |
<b>{{n[1]}}</b> ({{n[5]}})<br> | |
<a href="{{ url_for('static', filename='narrations/' + n[3]) }}" target="_blank">View Text</a> | | |
{% if n[4] %} | |
<audio controls src="{{ url_for('static', filename='narrations/' + n[4]) }}"></audio> | |
{% endif %} | |
<p>{{n[2]}}</p> | |
</div> | |
{% endfor %} | |
<a href="{{ url_for('index') }}">Back to Latest</a> | |
""", narrations=narrations) | |
if __name__ == "__main__": | |
init_db() | |
app.run(host="0.0.0.0", port=5000, debug=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Title: “AI News Narrator: Turning Web Articles into Spoken Stories!” | |
Description: | |
Step into the future of news with our AI-powered News Narrator! 📰✨ | |
This Flask web app lets you enter any news query and instantly generates: | |
Automated web searches using DuckDuckGo | |
Article collection and summarization powered by ChromaDB embeddings | |
Professional narration text generated by the powerful Phi3 AI model | |
High-quality TTS audio of the narration for immediate listening | |
Gone are the days of reading endless articles — now you can hear the story in a natural, human-like voice, perfect for catching up on news while on the go. | |
💡 Features: | |
Dynamic news collection and AI summarization | |
Clean TTS output without distracting URLs or HTML | |
Database storage of narrations for quick playback | |
Latest and older versions easily accessible | |
Fully self-contained, open-source Flask application | |
Whether you want AI-generated storytelling for current events, science news, or any topic you choose, this app turns text into audio instantly. | |
🔗 Explore, listen, and experience the future of news today! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment