Arefu · February 24, 2026 23:23 · Arefu · Feb 24, 2026
diff --git a/ODT.py b/ODT.py
 from flask import Flask, request, Response, render_template_string
 import requests
 from playwright.sync_api import sync_playwright
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin, urlparse

 app = Flask(__name__)

 ALLOWED_DOMAINS = [ "odt.co.nz", "nzherald.co.nz", "thepress.co.nz"]

 @app.route('/proxy')
 def proxy():
    url = request.args.get('url')
    if not url:
        return "Missing URL parameter", 400

    # Validate domain
    parsed_url = urlparse(url)
    if parsed_url.scheme not in ('http', 'https'):
        return "Invalid URL scheme", 400

    if parsed_url.hostname not in ALLOWED_DOMAINS and not any(parsed_url.hostname.endswith("." + d) for d in ALLOWED_DOMAINS):
        return "URL domain not allowed", 403


 #@#zephr-html-paywall
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()

        soup = BeautifulSoup(res.text, 'html.parser')
        
        #The Press
        if parsed_url.hostname == "www.thepress.co.nz":
            with sync_playwright() as p:
                browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
                context = browser.new_context(user_agent=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                                "AppleWebKit/537.36 (KHTML, like Gecko) "
                                "Chrome/122.0.0.0 Safari/537.36"),
                    locale="en-NZ"
                )
                page = context.new_page()
                page.set_default_timeout(20000)
                page.goto(url, wait_until="domcontentloaded")
                page.wait_for_selector("article, [itemprop='articleBody'], #root h1")
                rendered_html = page.content()
                context.close()
                browser.close()
                soup = BeautifulSoup(rendered_html, "html.parser")
                head = soup.head or soup.new_tag("head")
                if not soup.head:
                    if soup.html:
                        soup.html.insert(0, head)
                    else:
                        soup.insert(0, head)

                if not head.find("base"):
                    base = soup.new_tag("base", href=f"{parsed_url.scheme}://{parsed_url.netloc}")
                    head.insert(0, base)

                for tag in soup.find_all(True):
                    for attr in ("href", "src", "action"):
                        if attr in tag.attrs:
                            val = tag[attr]
                            if isinstance(val, str) and not val.startswith(("javascript:", "data:", "mailto:", "tel:")):
                                tag[attr] = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", val)

                    if "srcset" in tag.attrs:
                        new_candidates = []
                        for candidate in str(tag["srcset"]).split(","):
                            parts = candidate.strip().split()
                            if not parts:
                                continue
                            url_part = parts[0]
                            rest = parts[1:]
                            if not url_part.startswith(("data:", "javascript:", "mailto:", "tel:")):
                                url_part = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", url_part)
                            new_candidates.append(" ".join([url_part] + rest))
                        tag["srcset"] = ", ".join(new_candidates)

                return Response(str(soup), content_type="text/html")

        
        #Otago Daily Times
        if parsed_url.hostname == "www.odt.co.nz":
          for script in soup.find_all("script"):
            script.decompose()
        
          for div in soup.select('div.field-item.even'):
              if div.get('property') == 'content:encoded':
                  del div['property']

        #NZ Herald
        if parsed_url.hostname == "www.nzherald.co.nz":
          [s.decompose() for s in soup.find_all("script") if any(k in (s.get("src") or "").lower() for k in ["tailwind", "queryly.v4", "client", "appear", "image", "gpt", "react"])]

          div = soup.select_one("div.article-paywall-hide")
          if div:
              for tag in div.find_all(True): 
                  if 'class' in tag.attrs:
                    del tag.attrs['class']
                  if 'style' in tag.attrs:
                    del tag.attrs['style']
                  if 'data-test-ui'  in tag.attrs:
                    del tag.attrs['data-test-ui']
                    
          del div.attrs['class']
          
          soup = BeautifulSoup(str(soup).replace("&amp;", "&"), "html.parser")

        
        #Fix all Relative URLs.
        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"

        for tag in soup.find_all(True):
          for attr in ("href", "src", "action", "img"):
            if attr in tag.attrs:
              original = tag[attr]
              tag[attr] = urljoin(base_url, original)
   
        clean_html = str(soup)

        return Response(clean_html, content_type='text/html')

    except Exception as e:
        return f"Error fetching or processing URL: {e}", 500

 @app.route('/')
 def index():
    return render_template_string('''
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>ODT Free</title>
  <!-- Bootstrap CSS CDN -->
  <link
    href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css"
    rel="stylesheet"
  />
 </head>
 <body class="bg-light">
  <div class="container py-5">
    <h1 class="mb-4 text-center">ODT Free</h1>
    <form id="urlForm" class="mx-auto" style="max-width: 480px;" action="/proxy" method="get">
      <div class="mb-3">
        <p>Otago Daily Times has full support, the web-page <i>should</i> appear mostly normal, minus the pay wall, NZ Herlad has partial support, currently images for articles will not display.</p>
                <label for="urlInput" class="form-label">Enter a premium link from <a href="https://odt.co.nz" target="_blank">odt.co.nz</a>, <a href="https://nzherald.co.nz" target="_blank">nzherald.co.nz</a>, or <a href="https://thepress.co.nz" target="_blank">thepress.co.nz</a></label>
        <input
          type="url"
          class="form-control"
          id="urlInput"
          name="url"
          placeholder="https://www.odt.co.nz/news/dunedin/far-running-company-they-were-awful"
          required
        />
      </div>
      <button type="submit" class="btn btn-primary w-100">Submit</button>
    </form>
  </div>

  <!-- Bootstrap JS Bundle with Popper (optional) -->
  <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
 </body>
 </html>
    ''')

 if __name__ == '__main__':
    app.run(debug=False)
	from flask import Flask, request, Response, render_template_string
	import requests
	from playwright.sync_api import sync_playwright
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin, urlparse

	app = Flask(__name__)

	ALLOWED_DOMAINS = [ "odt.co.nz", "nzherald.co.nz", "thepress.co.nz"]

	@app.route('/proxy')
	def proxy():
	url = request.args.get('url')
	if not url:
	return "Missing URL parameter", 400

	# Validate domain
	parsed_url = urlparse(url)
	if parsed_url.scheme not in ('http', 'https'):
	return "Invalid URL scheme", 400

	if parsed_url.hostname not in ALLOWED_DOMAINS and not any(parsed_url.hostname.endswith("." + d) for d in ALLOWED_DOMAINS):
	return "URL domain not allowed", 403


	#@#zephr-html-paywall
	try:
	res = requests.get(url, timeout=10)
	res.raise_for_status()

	soup = BeautifulSoup(res.text, 'html.parser')

	#The Press
	if parsed_url.hostname == "www.thepress.co.nz":
	with sync_playwright() as p:
	browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
	context = browser.new_context(user_agent=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/122.0.0.0 Safari/537.36"),
	locale="en-NZ"
	)
	page = context.new_page()
	page.set_default_timeout(20000)
	page.goto(url, wait_until="domcontentloaded")
	page.wait_for_selector("article, [itemprop='articleBody'], #root h1")
	rendered_html = page.content()
	context.close()
	browser.close()
	soup = BeautifulSoup(rendered_html, "html.parser")
	head = soup.head or soup.new_tag("head")
	if not soup.head:
	if soup.html:
	soup.html.insert(0, head)
	else:
	soup.insert(0, head)

	if not head.find("base"):
	base = soup.new_tag("base", href=f"{parsed_url.scheme}://{parsed_url.netloc}")
	head.insert(0, base)

	for tag in soup.find_all(True):
	for attr in ("href", "src", "action"):
	if attr in tag.attrs:
	val = tag[attr]
	if isinstance(val, str) and not val.startswith(("javascript:", "data:", "mailto:", "tel:")):
	tag[attr] = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", val)

	if "srcset" in tag.attrs:
	new_candidates = []
	for candidate in str(tag["srcset"]).split(","):
	parts = candidate.strip().split()
	if not parts:
	continue
	url_part = parts[0]
	rest = parts[1:]
	if not url_part.startswith(("data:", "javascript:", "mailto:", "tel:")):
	url_part = urljoin(f"{parsed_url.scheme}://{parsed_url.netloc}", url_part)
	new_candidates.append(" ".join([url_part] + rest))
	tag["srcset"] = ", ".join(new_candidates)

	return Response(str(soup), content_type="text/html")


	#Otago Daily Times
	if parsed_url.hostname == "www.odt.co.nz":
	for script in soup.find_all("script"):
	script.decompose()

	for div in soup.select('div.field-item.even'):
	if div.get('property') == 'content:encoded':
	del div['property']

	#NZ Herald
	if parsed_url.hostname == "www.nzherald.co.nz":
	[s.decompose() for s in soup.find_all("script") if any(k in (s.get("src") or "").lower() for k in ["tailwind", "queryly.v4", "client", "appear", "image", "gpt", "react"])]

	div = soup.select_one("div.article-paywall-hide")
	if div:
	for tag in div.find_all(True):
	if 'class' in tag.attrs:
	del tag.attrs['class']
	if 'style' in tag.attrs:
	del tag.attrs['style']
	if 'data-test-ui' in tag.attrs:
	del tag.attrs['data-test-ui']

	del div.attrs['class']

	soup = BeautifulSoup(str(soup).replace("&", "&"), "html.parser")


	#Fix all Relative URLs.
	base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"

	for tag in soup.find_all(True):
	for attr in ("href", "src", "action", "img"):
	if attr in tag.attrs:
	original = tag[attr]
	tag[attr] = urljoin(base_url, original)

	clean_html = str(soup)

	return Response(clean_html, content_type='text/html')

	except Exception as e:
	return f"Error fetching or processing URL: {e}", 500

	@app.route('/')
	def index():
	return render_template_string('''
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1" />
	<title>ODT Free</title>
	<!-- Bootstrap CSS CDN -->
	<link
	href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css"
	rel="stylesheet"
	/>
	</head>
	<body class="bg-light">
	<div class="container py-5">
	<h1 class="mb-4 text-center">ODT Free</h1>
	<form id="urlForm" class="mx-auto" style="max-width: 480px;" action="/proxy" method="get">
	<div class="mb-3">
	<p>Otago Daily Times has full support, the web-page <i>should</i> appear mostly normal, minus the pay wall, NZ Herlad has partial support, currently images for articles will not display.</p>
	<label for="urlInput" class="form-label">Enter a premium link from <a href="https://odt.co.nz" target="_blank">odt.co.nz</a>, <a href="https://nzherald.co.nz" target="_blank">nzherald.co.nz</a>, or <a href="https://thepress.co.nz" target="_blank">thepress.co.nz</a></label>
	<input
	type="url"
	class="form-control"
	id="urlInput"
	name="url"
	placeholder="https://www.odt.co.nz/news/dunedin/far-running-company-they-were-awful"
	required
	/>
	</div>
	<button type="submit" class="btn btn-primary w-100">Submit</button>
	</form>
	</div>

	<!-- Bootstrap JS Bundle with Popper (optional) -->
	<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
	</body>
	</html>
	''')

	if __name__ == '__main__':
	app.run(debug=False)
No results found