Created
February 24, 2026 23:23
-
-
Save Arefu/f2998431c7a68393c8d2f334fd53c894 to your computer and use it in GitHub Desktop.
Free news for the masses - Bypass stupid paywalls easily with your own horribly written Python script!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from flask import Flask, request, Response, render_template_string | |
| import requests | |
| from playwright.sync_api import sync_playwright | |
| from bs4 import BeautifulSoup | |
| from urllib.parse import urljoin, urlparse | |
| app = Flask(__name__) | |
| ALLOWED_DOMAINS = [ "odt.co.nz", "nzherald.co.nz", "thepress.co.nz"] | |
| @app.route('/proxy') | |
| def proxy(): | |
| url = request.args.get('url') | |
| if not url: | |
| return "Missing URL parameter", 400 | |
| # Validate domain | |
| parsed_url = urlparse(url) | |
| if parsed_url.scheme not in ('http', 'https'): | |
| return "Invalid URL scheme", 400 | |
| if parsed_url.hostname not in ALLOWED_DOMAINS and not any(parsed_url.hostname.endswith("." + d) for d in ALLOWED_DOMAINS): | |
| return "URL domain not allowed", 403 | |
| #@#zephr-html-paywall | |
| try: | |
| res = requests.get(url, timeout=10) | |
| res.raise_for_status() | |
| soup = BeautifulSoup(res.text, 'html.parser') | |
| #The Press | |
| if parsed_url.hostname == "www.thepress.co.nz": | |
| with sync_playwright() as p: | |
| browser = p.chromium.launch(headless=True, args=["--no-sandbox"]) | |
| context = browser.new_context(user_agent=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/122.0.0.0 Safari/537.36"), | |
| locale="en-NZ" | |
| ) | |
| page = context.new_page() | |
| page.set_default_timeout(20000) | |
| page.goto(url, wait_until="domcontentloaded") | |
| page.wait_for_selector("article, [itemprop='articleBody'], #root h1") | |
| rendered_html = page.content() | |
| context.close() | |
| browser.close() | |
| soup = BeautifulSoup(rendered_html, "html.parser") | |
| head = soup.head or soup.new_tag("head") | |
| if not soup.head: | |
| if soup.html: | |
| soup.html.insert(0, head) | |
| else: | |
| soup.insert(0, head) | |
| if not head.find("base"): | |
| base = soup.new_tag("base", href=f"{parsed_url.scheme}://{parsed_url.netloc}") | |
| head.insert(0, base) | |
| for tag in soup.find_all(True): | |
| for attr in ("href", "src", "action"): | |
| if attr in tag.attrs: | |
| val = tag[attr] | |
| if isinstance(val, str) and not val.startswith(("javascript:", "data:", "mailto:", "tel:")): | |
| tag[attr] = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", val) | |
| if "srcset" in tag.attrs: | |
| new_candidates = [] | |
| for candidate in str(tag["srcset"]).split(","): | |
| parts = candidate.strip().split() | |
| if not parts: | |
| continue | |
| url_part = parts[0] | |
| rest = parts[1:] | |
| if not url_part.startswith(("data:", "javascript:", "mailto:", "tel:")): | |
| url_part = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", url_part) | |
| new_candidates.append(" ".join([url_part] + rest)) | |
| tag["srcset"] = ", ".join(new_candidates) | |
| return Response(str(soup), content_type="text/html") | |
| #Otago Daily Times | |
| if parsed_url.hostname == "www.odt.co.nz": | |
| for script in soup.find_all("script"): | |
| script.decompose() | |
| for div in soup.select('div.field-item.even'): | |
| if div.get('property') == 'content:encoded': | |
| del div['property'] | |
| #NZ Herald | |
| if parsed_url.hostname == "www.nzherald.co.nz": | |
| [s.decompose() for s in soup.find_all("script") if any(k in (s.get("src") or "").lower() for k in ["tailwind", "queryly.v4", "client", "appear", "image", "gpt", "react"])] | |
| div = soup.select_one("div.article-paywall-hide") | |
| if div: | |
| for tag in div.find_all(True): | |
| if 'class' in tag.attrs: | |
| del tag.attrs['class'] | |
| if 'style' in tag.attrs: | |
| del tag.attrs['style'] | |
| if 'data-test-ui' in tag.attrs: | |
| del tag.attrs['data-test-ui'] | |
| del div.attrs['class'] | |
| soup = BeautifulSoup(str(soup).replace("&", "&"), "html.parser") | |
| #Fix all Relative URLs. | |
| base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" | |
| for tag in soup.find_all(True): | |
| for attr in ("href", "src", "action", "img"): | |
| if attr in tag.attrs: | |
| original = tag[attr] | |
| tag[attr] = urljoin(base_url, original) | |
| clean_html = str(soup) | |
| return Response(clean_html, content_type='text/html') | |
| except Exception as e: | |
| return f"Error fetching or processing URL: {e}", 500 | |
| @app.route('/') | |
| def index(): | |
| return render_template_string(''' | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <title>ODT Free</title> | |
| <!-- Bootstrap CSS CDN --> | |
| <link | |
| href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" | |
| rel="stylesheet" | |
| /> | |
| </head> | |
| <body class="bg-light"> | |
| <div class="container py-5"> | |
| <h1 class="mb-4 text-center">ODT Free</h1> | |
| <form id="urlForm" class="mx-auto" style="max-width: 480px;" action="/proxy" method="get"> | |
| <div class="mb-3"> | |
| <p>Otago Daily Times has full support, the web-page <i>should</i> appear mostly normal, minus the pay wall, NZ Herlad has partial support, currently images for articles will not display.</p> | |
| <label for="urlInput" class="form-label">Enter a premium link from <a href="https://odt.co.nz" target="_blank">odt.co.nz</a>, <a href="https://nzherald.co.nz" target="_blank">nzherald.co.nz</a>, or <a href="https://thepress.co.nz" target="_blank">thepress.co.nz</a></label> | |
| <input | |
| type="url" | |
| class="form-control" | |
| id="urlInput" | |
| name="url" | |
| placeholder="https://www.odt.co.nz/news/dunedin/far-running-company-they-were-awful" | |
| required | |
| /> | |
| </div> | |
| <button type="submit" class="btn btn-primary w-100">Submit</button> | |
| </form> | |
| </div> | |
| <!-- Bootstrap JS Bundle with Popper (optional) --> | |
| <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script> | |
| </body> | |
| </html> | |
| ''') | |
| if __name__ == '__main__': | |
| app.run(debug=False) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was vibe coded.
I don't do Python, I don't do paying for stuff either.
It was written originally to bypass the paywall on ODT, then expanded to Herald (Yeah, I still hold a grudge), and most recently The Press.
You need the following installed
Or at the least, ensure feature sets are the same.
Then, throw your link into the box, and it will handle the rest so long as they don't update their selectors JS / CSS trickery.(https://knowyourmeme.com/memes/cheeto-lock) is what I would use here to explain why they've done a bad bad thing.