Skip to content

Instantly share code, notes, and snippets.

@jerieljan
Created July 20, 2025 14:33
Show Gist options
  • Save jerieljan/36cbb9bf71b9061efa122b41c902bdf2 to your computer and use it in GitHub Desktop.
Save jerieljan/36cbb9bf71b9061efa122b41c902bdf2 to your computer and use it in GitHub Desktop.
Cloudflare Browser Rendering - Open WebUI Tool

Cloudflare Browser Rendering

This is also available at: https://openwebui.com/t/jerieljan/cloudflare_browser_rendering

Enables LLMs to read webpages by processing them into Markdown with the Cloudflare Browser Rendering API.

This tool in particular focuses on the /markdown capabilities of the API since it works nicely with LLMs. It does not utilize the /content or /scrape HTML commands, but is still sufficient for parsing website content.

Read more at: https://developers.cloudflare.com/browser-rendering/

Requirements

You need:

Usage

  • Add this under Workspace -> Tools
  • Configure Valves and define your account ID and custom API token
  • Start a new chat, select the tool under (+) and provide a URL.

Limitations

  • This is subject to the limits and restrictions of Cloudflare's Browser Rendering service. Observe rate limits, and scraping limitations.
"""
title: Cloudflare Browser Rendering
author: jerieljan
version: 0.3
license: MIT License
description: Enables LLMs to read webpages by processing them into Markdown with the Cloudflare Browser Rendering API
"""
from pydantic import BaseModel, Field
from typing import Optional, Callable, Any, Dict, List
import requests
import asyncio
class Tools:
class Valves(BaseModel):
CLOUDFLARE_ACCOUNT_ID: str = Field(
default="", description="The Cloudflare Account ID"
)
CLOUDFLARE_API_TOKEN: str = Field(
default="", description="The API token to access Cloudflare services"
)
CLOUDFLARE_API_BASE_URL: str = Field(
default="https://api.cloudflare.com/client/v4",
description="(Optional) The base URL for Cloudflare API endpoints",
)
def __init__(self):
self.valves = self.Valves()
self.citation = False
self.tools = [
{
"type": "function",
"function": {
"name": "extract_markdown",
"description": "Extract markdown content from a webpage using Cloudflare Browser Rendering",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL of the webpage to extract markdown from",
},
"reject_pattern": {
"type": "array",
"items": {"type": "string"},
"description": "Optional regex patterns to reject certain requests (e.g., CSS files)",
"default": []
}
},
"required": ["url"],
},
},
},
{
"type": "function",
"function": {
"name": "extract_markdown_from_html",
"description": "Convert raw HTML content to markdown using Cloudflare Browser Rendering",
"parameters": {
"type": "object",
"properties": {
"html": {
"type": "string",
"description": "The raw HTML content to convert to markdown",
}
},
"required": ["html"],
},
},
}
]
async def extract_markdown(
self,
url: str,
reject_pattern: Optional[List[str]] = None,
__event_emitter__: Optional[Callable[[Dict], Any]] = None
) -> str:
"""
Uses the Cloudflare Browser Rendering service to fetch a webpage and provide the content in Markdown format.
"""
if not self.valves.CLOUDFLARE_API_TOKEN:
raise Exception("CLOUDFLARE_API_TOKEN not provided in valves")
if not self.valves.CLOUDFLARE_ACCOUNT_ID:
raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves")
# Status emitter helper
async def emit_status(
description: str, status: str = "in_progress", done: bool = False
):
if __event_emitter__:
await __event_emitter__(
{
"type": "status",
"data": {
"description": description,
"status": status,
"done": done,
},
}
)
# Initial status
await emit_status(f"Extracting markdown from: {url}...", "processing")
headers = {
"Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}",
"Content-Type": "application/json",
}
payload = {"url": url}
if reject_pattern:
payload["rejectRequestPattern"] = reject_pattern
try:
await emit_status(f"Extracting markdown from: {url}...", "processing")
response = requests.post(
f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown",
headers=headers,
json=payload,
timeout=30,
)
response.raise_for_status()
result = response.json()
if not result.get("success", False):
error_msg = result.get("errors", ["Unknown error occurred"])[0]
await emit_status(f"Error: {error_msg}", status="error", done=True)
return f"Error extracting markdown: {error_msg}"
markdown_content = result.get("result", "")
# Emit citation
if __event_emitter__:
await __event_emitter__(
{
"type": "citation",
"data": {
"document": [markdown_content],
"metadata": [{"source": url}],
"source": {"name": url, "url": url},
},
}
)
# Complete status
await emit_status(
"Markdown extraction completed successfully", status="complete", done=True
)
return markdown_content
except requests.exceptions.RequestException as e:
error_msg = f"Network error extracting markdown: {str(e)}"
await emit_status(error_msg, status="error", done=True)
return error_msg
except Exception as e:
error_msg = f"Error extracting markdown: {str(e)}"
await emit_status(error_msg, status="error", done=True)
return error_msg
async def extract_markdown_from_html(
self,
html: str,
__event_emitter__: Optional[Callable[[Dict], Any]] = None
) -> str:
"""
Uses the Cloudflare Browser Rendering service to process the user's provided HTML code and reformat it to Markdown.
"""
if not self.valves.CLOUDFLARE_API_TOKEN:
raise Exception("CLOUDFLARE_API_TOKEN not provided in valves")
if not self.valves.CLOUDFLARE_ACCOUNT_ID:
raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves")
# Status emitter helper
async def emit_status(
description: str, status: str = "in_progress", done: bool = False
):
if __event_emitter__:
await __event_emitter__(
{
"type": "status",
"data": {
"description": description,
"status": status,
"done": done,
},
}
)
# Initial status
await emit_status("Converting HTML to markdown...", "processing")
headers = {
"Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}",
"Content-Type": "application/json",
}
payload = {"html": html}
try:
await emit_status("Converting HTML to markdown...", "processing")
response = requests.post(
f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown",
headers=headers,
json=payload,
timeout=30,
)
response.raise_for_status()
result = response.json()
if not result.get("success", False):
error_msg = result.get("errors", ["Unknown error occurred"])[0]
await emit_status(f"Error: {error_msg}", status="error", done=True)
return f"Error converting HTML to markdown: {error_msg}"
markdown_content = result.get("result", "")
# Emit citation for HTML content
if __event_emitter__:
await __event_emitter__(
{
"type": "citation",
"data": {
"document": [markdown_content],
"metadata": [{"source": "Raw HTML Content"}],
"source": {"name": "HTML Content"},
},
}
)
# Complete status
await emit_status(
"HTML to markdown conversion completed successfully", status="complete", done=True
)
return markdown_content
except requests.exceptions.RequestException as e:
error_msg = f"Network error converting HTML to markdown: {str(e)}"
await emit_status(error_msg, status="error", done=True)
return error_msg
except Exception as e:
error_msg = f"Error converting HTML to markdown: {str(e)}"
await emit_status(error_msg, status="error", done=True)
return error_msg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment