jerieljan · July 20, 2025 14:33
diff --git a/README.md b/README.md
diff --git a/cf_browser_rendering.py b/cf_browser_rendering.py
 """
 title: Cloudflare Browser Rendering
 author: jerieljan
 version: 0.3
 license: MIT License
 description: Enables LLMs to read webpages by processing them into Markdown with the Cloudflare Browser Rendering API
 """

 from pydantic import BaseModel, Field
 from typing import Optional, Callable, Any, Dict, List
 import requests
 import asyncio


 class Tools:
    class Valves(BaseModel):
        CLOUDFLARE_ACCOUNT_ID: str = Field(
            default="", description="The Cloudflare Account ID"
        )

        CLOUDFLARE_API_TOKEN: str = Field(
            default="", description="The API token to access Cloudflare services"
        )

        CLOUDFLARE_API_BASE_URL: str = Field(
            default="https://api.cloudflare.com/client/v4",
            description="(Optional) The base URL for Cloudflare API endpoints",
        )

    def __init__(self):
        self.valves = self.Valves()
        self.citation = False
        self.tools = [
            {
                "type": "function",
                "function": {
                    "name": "extract_markdown",
                    "description": "Extract markdown content from a webpage using Cloudflare Browser Rendering",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "url": {
                                "type": "string",
                                "description": "The URL of the webpage to extract markdown from",
                            },
                            "reject_pattern": {
                                "type": "array",
                                "items": {"type": "string"},
                                "description": "Optional regex patterns to reject certain requests (e.g., CSS files)",
                                "default": []
                            }
                        },
                        "required": ["url"],
                    },
                },
            },
            {
                "type": "function", 
                "function": {
                    "name": "extract_markdown_from_html",
                    "description": "Convert raw HTML content to markdown using Cloudflare Browser Rendering",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "html": {
                                "type": "string",
                                "description": "The raw HTML content to convert to markdown",
                            }
                        },
                        "required": ["html"],
                    },
                },
            }
        ]

    async def extract_markdown(
        self, 
        url: str, 
        reject_pattern: Optional[List[str]] = None,
        __event_emitter__: Optional[Callable[[Dict], Any]] = None
    ) -> str:
        """
        Uses the Cloudflare Browser Rendering service to fetch a webpage and provide the content in Markdown format.
        """
        if not self.valves.CLOUDFLARE_API_TOKEN:
            raise Exception("CLOUDFLARE_API_TOKEN not provided in valves")
        
        if not self.valves.CLOUDFLARE_ACCOUNT_ID:
            raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves")

        # Status emitter helper
        async def emit_status(
            description: str, status: str = "in_progress", done: bool = False
        ):
            if __event_emitter__:
                await __event_emitter__(
                    {
                        "type": "status",
                        "data": {
                            "description": description,
                            "status": status,
                            "done": done,
                        },
                    }
                )

        # Initial status
        await emit_status(f"Extracting markdown from: {url}...", "processing")

        headers = {
            "Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}",
            "Content-Type": "application/json",
        }

        payload = {"url": url}
        
        if reject_pattern:
            payload["rejectRequestPattern"] = reject_pattern

        try:
            await emit_status(f"Extracting markdown from: {url}...", "processing")

            response = requests.post(
                f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown",
                headers=headers,
                json=payload,
                timeout=30,
            )
            response.raise_for_status()
            result = response.json()

            if not result.get("success", False):
                error_msg = result.get("errors", ["Unknown error occurred"])[0]
                await emit_status(f"Error: {error_msg}", status="error", done=True)
                return f"Error extracting markdown: {error_msg}"

            markdown_content = result.get("result", "")

            # Emit citation
            if __event_emitter__:
                await __event_emitter__(
                    {
                        "type": "citation",
                        "data": {
                            "document": [markdown_content],
                            "metadata": [{"source": url}],
                            "source": {"name": url, "url": url},
                        },
                    }
                )

            # Complete status
            await emit_status(
                "Markdown extraction completed successfully", status="complete", done=True
            )

            return markdown_content

        except requests.exceptions.RequestException as e:
            error_msg = f"Network error extracting markdown: {str(e)}"
            await emit_status(error_msg, status="error", done=True)
            return error_msg
        except Exception as e:
            error_msg = f"Error extracting markdown: {str(e)}"
            await emit_status(error_msg, status="error", done=True)
            return error_msg

    async def extract_markdown_from_html(
        self, 
        html: str,
        __event_emitter__: Optional[Callable[[Dict], Any]] = None
    ) -> str:
        """
        Uses the Cloudflare Browser Rendering service to process the user's provided HTML code and reformat it to Markdown.
        """
        if not self.valves.CLOUDFLARE_API_TOKEN:
            raise Exception("CLOUDFLARE_API_TOKEN not provided in valves")
        
        if not self.valves.CLOUDFLARE_ACCOUNT_ID:
            raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves")

        # Status emitter helper
        async def emit_status(
            description: str, status: str = "in_progress", done: bool = False
        ):
            if __event_emitter__:
                await __event_emitter__(
                    {
                        "type": "status",
                        "data": {
                            "description": description,
                            "status": status,
                            "done": done,
                        },
                    }
                )

        # Initial status
        await emit_status("Converting HTML to markdown...", "processing")

        headers = {
            "Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}",
            "Content-Type": "application/json",
        }

        payload = {"html": html}

        try:
            await emit_status("Converting HTML to markdown...", "processing")

            response = requests.post(
                f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown",
                headers=headers,
                json=payload,
                timeout=30,
            )
            response.raise_for_status()
            result = response.json()

            if not result.get("success", False):
                error_msg = result.get("errors", ["Unknown error occurred"])[0]
                await emit_status(f"Error: {error_msg}", status="error", done=True)
                return f"Error converting HTML to markdown: {error_msg}"

            markdown_content = result.get("result", "")

            # Emit citation for HTML content
            if __event_emitter__:
                await __event_emitter__(
                    {
                        "type": "citation",
                        "data": {
                            "document": [markdown_content],
                            "metadata": [{"source": "Raw HTML Content"}],
                            "source": {"name": "HTML Content"},
                        },
                    }
                )

            # Complete status
            await emit_status(
                "HTML to markdown conversion completed successfully", status="complete", done=True
            )

            return markdown_content

        except requests.exceptions.RequestException as e:
            error_msg = f"Network error converting HTML to markdown: {str(e)}"
            await emit_status(error_msg, status="error", done=True)
            return error_msg
        except Exception as e:
            error_msg = f"Error converting HTML to markdown: {str(e)}"
            await emit_status(error_msg, status="error", done=True)
            return error_msg
	"""
	title: Cloudflare Browser Rendering
	author: jerieljan
	version: 0.3
	license: MIT License
	description: Enables LLMs to read webpages by processing them into Markdown with the Cloudflare Browser Rendering API
	"""

	from pydantic import BaseModel, Field
	from typing import Optional, Callable, Any, Dict, List
	import requests
	import asyncio


	class Tools:
	class Valves(BaseModel):
	CLOUDFLARE_ACCOUNT_ID: str = Field(
	default="", description="The Cloudflare Account ID"
	)

	CLOUDFLARE_API_TOKEN: str = Field(
	default="", description="The API token to access Cloudflare services"
	)

	CLOUDFLARE_API_BASE_URL: str = Field(
	default="https://api.cloudflare.com/client/v4",
	description="(Optional) The base URL for Cloudflare API endpoints",
	)

	def __init__(self):
	self.valves = self.Valves()
	self.citation = False
	self.tools = [
	{
	"type": "function",
	"function": {
	"name": "extract_markdown",
	"description": "Extract markdown content from a webpage using Cloudflare Browser Rendering",
	"parameters": {
	"type": "object",
	"properties": {
	"url": {
	"type": "string",
	"description": "The URL of the webpage to extract markdown from",
	},
	"reject_pattern": {
	"type": "array",
	"items": {"type": "string"},
	"description": "Optional regex patterns to reject certain requests (e.g., CSS files)",
	"default": []
	}
	},
	"required": ["url"],
	},
	},
	},
	{
	"type": "function",
	"function": {
	"name": "extract_markdown_from_html",
	"description": "Convert raw HTML content to markdown using Cloudflare Browser Rendering",
	"parameters": {
	"type": "object",
	"properties": {
	"html": {
	"type": "string",
	"description": "The raw HTML content to convert to markdown",
	}
	},
	"required": ["html"],
	},
	},
	}
	]

	async def extract_markdown(
	self,
	url: str,
	reject_pattern: Optional[List[str]] = None,
	__event_emitter__: Optional[Callable[[Dict], Any]] = None
	) -> str:
	"""
	Uses the Cloudflare Browser Rendering service to fetch a webpage and provide the content in Markdown format.
	"""
	if not self.valves.CLOUDFLARE_API_TOKEN:
	raise Exception("CLOUDFLARE_API_TOKEN not provided in valves")

	if not self.valves.CLOUDFLARE_ACCOUNT_ID:
	raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves")

	# Status emitter helper
	async def emit_status(
	description: str, status: str = "in_progress", done: bool = False
	):
	if __event_emitter__:
	await __event_emitter__(
	{
	"type": "status",
	"data": {
	"description": description,
	"status": status,
	"done": done,
	},
	}
	)

	# Initial status
	await emit_status(f"Extracting markdown from: {url}...", "processing")

	headers = {
	"Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}",
	"Content-Type": "application/json",
	}

	payload = {"url": url}

	if reject_pattern:
	payload["rejectRequestPattern"] = reject_pattern

	try:
	await emit_status(f"Extracting markdown from: {url}...", "processing")

	response = requests.post(
	f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown",
	headers=headers,
	json=payload,
	timeout=30,
	)
	response.raise_for_status()
	result = response.json()

	if not result.get("success", False):
	error_msg = result.get("errors", ["Unknown error occurred"])[0]
	await emit_status(f"Error: {error_msg}", status="error", done=True)
	return f"Error extracting markdown: {error_msg}"

	markdown_content = result.get("result", "")

	# Emit citation
	if __event_emitter__:
	await __event_emitter__(
	{
	"type": "citation",
	"data": {
	"document": [markdown_content],
	"metadata": [{"source": url}],
	"source": {"name": url, "url": url},
	},
	}
	)

	# Complete status
	await emit_status(
	"Markdown extraction completed successfully", status="complete", done=True
	)

	return markdown_content

	except requests.exceptions.RequestException as e:
	error_msg = f"Network error extracting markdown: {str(e)}"
	await emit_status(error_msg, status="error", done=True)
	return error_msg
	except Exception as e:
	error_msg = f"Error extracting markdown: {str(e)}"
	await emit_status(error_msg, status="error", done=True)
	return error_msg

	async def extract_markdown_from_html(
	self,
	html: str,
	__event_emitter__: Optional[Callable[[Dict], Any]] = None
	) -> str:
	"""
	Uses the Cloudflare Browser Rendering service to process the user's provided HTML code and reformat it to Markdown.
	"""
	if not self.valves.CLOUDFLARE_API_TOKEN:
	raise Exception("CLOUDFLARE_API_TOKEN not provided in valves")

	if not self.valves.CLOUDFLARE_ACCOUNT_ID:
	raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves")

	# Status emitter helper
	async def emit_status(
	description: str, status: str = "in_progress", done: bool = False
	):
	if __event_emitter__:
	await __event_emitter__(
	{
	"type": "status",
	"data": {
	"description": description,
	"status": status,
	"done": done,
	},
	}
	)

	# Initial status
	await emit_status("Converting HTML to markdown...", "processing")

	headers = {
	"Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}",
	"Content-Type": "application/json",
	}

	payload = {"html": html}

	try:
	await emit_status("Converting HTML to markdown...", "processing")

	response = requests.post(
	f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown",
	headers=headers,
	json=payload,
	timeout=30,
	)
	response.raise_for_status()
	result = response.json()

	if not result.get("success", False):
	error_msg = result.get("errors", ["Unknown error occurred"])[0]
	await emit_status(f"Error: {error_msg}", status="error", done=True)
	return f"Error converting HTML to markdown: {error_msg}"

	markdown_content = result.get("result", "")

	# Emit citation for HTML content
	if __event_emitter__:
	await __event_emitter__(
	{
	"type": "citation",
	"data": {
	"document": [markdown_content],
	"metadata": [{"source": "Raw HTML Content"}],
	"source": {"name": "HTML Content"},
	},
	}
	)

	# Complete status
	await emit_status(
	"HTML to markdown conversion completed successfully", status="complete", done=True
	)

	return markdown_content

	except requests.exceptions.RequestException as e:
	error_msg = f"Network error converting HTML to markdown: {str(e)}"
	await emit_status(error_msg, status="error", done=True)
	return error_msg
	except Exception as e:
	error_msg = f"Error converting HTML to markdown: {str(e)}"
	await emit_status(error_msg, status="error", done=True)
	return error_msg