Skip to content

Instantly share code, notes, and snippets.

@bbelderbos
Created May 7, 2026 18:03
Show Gist options
  • Select an option

  • Save bbelderbos/af7097f98d5d0e4baee003367b472b56 to your computer and use it in GitHub Desktop.

Select an option

Save bbelderbos/af7097f98d5d0e4baee003367b472b56 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "anthropic>=0.40",
# "httpx>=0.27",
# "openai>=1.50",
# "python-decouple>=3.8",
# ]
# ///
# AI trend digest: fetch HN + Reddit concurrently, rank with an LLM, print markdown.
import asyncio
from collections.abc import Awaitable
from dataclasses import dataclass
from datetime import datetime, timezone
from enum import StrEnum
from typing import Protocol, assert_never
from anthropic import AsyncAnthropic
from anthropic.types import TextBlock
from decouple import config
from openai import AsyncOpenAI
import httpx
class Provider(StrEnum):
ANTHROPIC = "anthropic"
OPENAI = "openai"
AI_PROVIDER = Provider(config("AI_PROVIDER", default=Provider.ANTHROPIC))
REDDIT_USER_AGENT = config(
"REDDIT_USER_AGENT"
) # e.g. "script:ai-trend-digest:v0.1 (by /u/yourname)"
HN_QUERIES = ["claude", "gpt", "llm agents", "anthropic", "openai", "mcp"]
SUBREDDITS = ["LocalLLaMA", "LLMDevs", "Rag", "AI_Agents"]
MIN_POINTS = 30
DEFAULT_ANTHROPIC_MODEL = "claude-haiku-4-5"
DEFAULT_OPENAI_MODEL = "gpt-4o-mini"
RANK_PROMPT = """You are curating an AI engineering daily digest. Each input line is a candidate
post from Hacker News or Reddit.
For each post, decide if it would actually make a working AI engineer more
effective tomorrow. DROP launches, hype, opinion pieces, "is AI a bubble"
takes, and recycled news. KEEP techniques, capabilities, tools, and research.
Output one line per kept post, exact format:
<score>/10 <category> [<title>](<url>) — <one-line why>
Categories: research | tool | technique | capability
Sort by score desc. Drop anything below 7. If nothing qualifies, output:
(no signal today)
"""
@dataclass(frozen=True)
class Post:
source: str
title: str
url: str
points: int
created_at: datetime
async def fetch_hn(client: httpx.AsyncClient, query: str) -> list[Post]:
response = await client.get(
"https://hn.algolia.com/api/v1/search_by_date",
params={"query": query, "tags": "story", "hitsPerPage": 30},
timeout=10,
)
response.raise_for_status()
return [
Post(
source="hn",
title=h.get("title") or h.get("story_title") or "",
url=f"https://news.ycombinator.com/item?id={h['objectID']}",
points=int(h.get("points") or 0),
created_at=datetime.fromtimestamp(int(h.get("created_at_i") or 0), tz=timezone.utc),
)
for h in response.json().get("hits", [])
if h.get("objectID")
]
async def fetch_reddit(client: httpx.AsyncClient, sub: str) -> list[Post]:
response = await client.get(
f"https://www.reddit.com/r/{sub}/top.json",
params={"t": "day", "limit": 30},
headers={"User-Agent": REDDIT_USER_AGENT},
timeout=10,
)
response.raise_for_status()
return [
Post(
source=f"r/{sub}",
title=c["data"]["title"],
url=f"https://reddit.com{c['data']['permalink']}",
points=int(c["data"].get("score") or 0),
created_at=datetime.fromtimestamp(
float(c["data"].get("created_utc") or 0), tz=timezone.utc
),
)
for c in response.json().get("data", {}).get("children", [])
if not c["data"].get("stickied")
]
async def safe_fetch(coro: Awaitable[list[Post]], label: str) -> list[Post]:
try:
return await coro
except Exception as e:
print(f"# fetch failed ({label}): {e}")
return []
def dedupe_and_filter(posts: list[Post], min_points: int) -> list[Post]:
seen: set[str] = set()
out: list[Post] = []
for p in sorted(posts, key=lambda p: -p.points):
if p.url in seen or p.points < min_points:
continue
seen.add(p.url)
out.append(p)
return out
def render(posts: list[Post]) -> str:
lines = [f"## Candidates ({len(posts)})", ""]
for p in posts:
lines.append(f"- [{p.title}]({p.url}) — {p.source} · {p.points} pts")
return "\n".join(lines)
class Ranker(Protocol):
model: str
async def rank(self, digest: str, system: str) -> str: ...
class AnthropicRanker:
def __init__(self, api_key: str, model: str) -> None:
self.client = AsyncAnthropic(api_key=api_key)
self.model = model
async def rank(self, digest: str, system: str) -> str:
msg = await self.client.messages.create(
model=self.model,
max_tokens=2048,
system=system,
messages=[{"role": "user", "content": digest}],
)
block = msg.content[0]
if not isinstance(block, TextBlock):
raise RuntimeError(f"expected text block, got {block.type}")
return block.text
class OpenAIRanker:
def __init__(self, api_key: str, model: str) -> None:
self.client = AsyncOpenAI(api_key=api_key)
self.model = model
async def rank(self, digest: str, system: str) -> str:
resp = await self.client.chat.completions.create(
model=self.model,
max_tokens=2048,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": digest},
],
)
return resp.choices[0].message.content or ""
def make_ranker(provider: Provider) -> Ranker:
match provider:
case Provider.ANTHROPIC:
return AnthropicRanker(
config("ANTHROPIC_API_KEY"),
config("CLAUDE_MODEL", default=DEFAULT_ANTHROPIC_MODEL),
)
case Provider.OPENAI:
return OpenAIRanker(
config("OPENAI_API_KEY"),
config("OPENAI_MODEL", default=DEFAULT_OPENAI_MODEL),
)
case _:
assert_never(provider)
async def main() -> None:
async with httpx.AsyncClient() as http:
tasks = [safe_fetch(fetch_hn(http, q), f"hn:{q}") for q in HN_QUERIES]
tasks += [safe_fetch(fetch_reddit(http, s), f"r/{s}") for s in SUBREDDITS]
results = await asyncio.gather(*tasks)
posts = [p for batch in results for p in batch]
digest = render(dedupe_and_filter(posts, MIN_POINTS))
print(digest, "\n")
ranker = make_ranker(AI_PROVIDER)
print(f"## Ranked ({AI_PROVIDER}: {ranker.model})\n")
print(await ranker.rank(digest, RANK_PROMPT))
print(f"\n_run: {datetime.now(timezone.utc).isoformat(timespec='seconds')}_")
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment