Skip to content

Instantly share code, notes, and snippets.

@Arefu
Created February 24, 2026 23:23
Show Gist options
  • Select an option

  • Save Arefu/f2998431c7a68393c8d2f334fd53c894 to your computer and use it in GitHub Desktop.

Select an option

Save Arefu/f2998431c7a68393c8d2f334fd53c894 to your computer and use it in GitHub Desktop.
Free news for the masses - Bypass stupid paywalls easily with your own horribly written Python script!
from flask import Flask, request, Response, render_template_string
import requests
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
app = Flask(__name__)
ALLOWED_DOMAINS = [ "odt.co.nz", "nzherald.co.nz", "thepress.co.nz"]
@app.route('/proxy')
def proxy():
url = request.args.get('url')
if not url:
return "Missing URL parameter", 400
# Validate domain
parsed_url = urlparse(url)
if parsed_url.scheme not in ('http', 'https'):
return "Invalid URL scheme", 400
if parsed_url.hostname not in ALLOWED_DOMAINS and not any(parsed_url.hostname.endswith("." + d) for d in ALLOWED_DOMAINS):
return "URL domain not allowed", 403
#@#zephr-html-paywall
try:
res = requests.get(url, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'html.parser')
#The Press
if parsed_url.hostname == "www.thepress.co.nz":
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
context = browser.new_context(user_agent=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/122.0.0.0 Safari/537.36"),
locale="en-NZ"
)
page = context.new_page()
page.set_default_timeout(20000)
page.goto(url, wait_until="domcontentloaded")
page.wait_for_selector("article, [itemprop='articleBody'], #root h1")
rendered_html = page.content()
context.close()
browser.close()
soup = BeautifulSoup(rendered_html, "html.parser")
head = soup.head or soup.new_tag("head")
if not soup.head:
if soup.html:
soup.html.insert(0, head)
else:
soup.insert(0, head)
if not head.find("base"):
base = soup.new_tag("base", href=f"{parsed_url.scheme}://{parsed_url.netloc}")
head.insert(0, base)
for tag in soup.find_all(True):
for attr in ("href", "src", "action"):
if attr in tag.attrs:
val = tag[attr]
if isinstance(val, str) and not val.startswith(("javascript:", "data:", "mailto:", "tel:")):
tag[attr] = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", val)
if "srcset" in tag.attrs:
new_candidates = []
for candidate in str(tag["srcset"]).split(","):
parts = candidate.strip().split()
if not parts:
continue
url_part = parts[0]
rest = parts[1:]
if not url_part.startswith(("data:", "javascript:", "mailto:", "tel:")):
url_part = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", url_part)
new_candidates.append(" ".join([url_part] + rest))
tag["srcset"] = ", ".join(new_candidates)
return Response(str(soup), content_type="text/html")
#Otago Daily Times
if parsed_url.hostname == "www.odt.co.nz":
for script in soup.find_all("script"):
script.decompose()
for div in soup.select('div.field-item.even'):
if div.get('property') == 'content:encoded':
del div['property']
#NZ Herald
if parsed_url.hostname == "www.nzherald.co.nz":
[s.decompose() for s in soup.find_all("script") if any(k in (s.get("src") or "").lower() for k in ["tailwind", "queryly.v4", "client", "appear", "image", "gpt", "react"])]
div = soup.select_one("div.article-paywall-hide")
if div:
for tag in div.find_all(True):
if 'class' in tag.attrs:
del tag.attrs['class']
if 'style' in tag.attrs:
del tag.attrs['style']
if 'data-test-ui' in tag.attrs:
del tag.attrs['data-test-ui']
del div.attrs['class']
soup = BeautifulSoup(str(soup).replace("&", "&"), "html.parser")
#Fix all Relative URLs.
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
for tag in soup.find_all(True):
for attr in ("href", "src", "action", "img"):
if attr in tag.attrs:
original = tag[attr]
tag[attr] = urljoin(base_url, original)
clean_html = str(soup)
return Response(clean_html, content_type='text/html')
except Exception as e:
return f"Error fetching or processing URL: {e}", 500
@app.route('/')
def index():
return render_template_string('''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>ODT Free</title>
<!-- Bootstrap CSS CDN -->
<link
href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css"
rel="stylesheet"
/>
</head>
<body class="bg-light">
<div class="container py-5">
<h1 class="mb-4 text-center">ODT Free</h1>
<form id="urlForm" class="mx-auto" style="max-width: 480px;" action="/proxy" method="get">
<div class="mb-3">
<p>Otago Daily Times has full support, the web-page <i>should</i> appear mostly normal, minus the pay wall, NZ Herlad has partial support, currently images for articles will not display.</p>
<label for="urlInput" class="form-label">Enter a premium link from <a href="https://odt.co.nz" target="_blank">odt.co.nz</a>, <a href="https://nzherald.co.nz" target="_blank">nzherald.co.nz</a>, or <a href="https://thepress.co.nz" target="_blank">thepress.co.nz</a></label>
<input
type="url"
class="form-control"
id="urlInput"
name="url"
placeholder="https://www.odt.co.nz/news/dunedin/far-running-company-they-were-awful"
required
/>
</div>
<button type="submit" class="btn btn-primary w-100">Submit</button>
</form>
</div>
<!-- Bootstrap JS Bundle with Popper (optional) -->
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>
''')
if __name__ == '__main__':
app.run(debug=False)
@Arefu
Copy link
Author

Arefu commented Feb 24, 2026

This was vibe coded.
I don't do Python, I don't do paying for stuff either.

It was written originally to bypass the paywall on ODT, then expanded to Herald (Yeah, I still hold a grudge), and most recently The Press.

You need the following installed

beautifulsoup4==4.14.3
blinker==1.9.0
bs4==0.0.2
certifi==2026.1.4
charset-normalizer==3.4.4
click==8.3.1
Flask==3.1.2
greenlet==3.3.2
idna==3.11
itsdangerous==2.2.0
Jinja2==3.1.6
MarkupSafe==3.0.3
playwright==1.58.0
pyee==13.0.1
requests==2.32.5
soupsieve==2.8.3
typing_extensions==4.15.0
urllib3==2.6.3
Werkzeug==3.1.5

Or at the least, ensure feature sets are the same.

Then, throw your link into the box, and it will handle the rest so long as they don't update their selectors JS / CSS trickery.(https://knowyourmeme.com/memes/cheeto-lock) is what I would use here to explain why they've done a bad bad thing.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment