Skip to content

Instantly share code, notes, and snippets.

@cb109
Last active August 28, 2025 08:22
Show Gist options
  • Save cb109/a324a6bfb49f875ae5721929590909a8 to your computer and use it in GitHub Desktop.
Save cb109/a324a6bfb49f875ae5721929590909a8 to your computer and use it in GitHub Desktop.
Reusable Chrome Instance for Puppeteer using Django Model and Management Command
[Unit]
Description=Chrome Browser Worker
After=network.target
[Service]
Environment=PYTHONUNBUFFERED=1
Environment=DJANGO_DEPLOYMENT_TYPE=prod
User=www-data
Group=www-data
WorkingDirectory=/opt/myproject
ExecStart=/opt/myproject/manage.py chromeworker
Restart=on-failure
KillSignal=SIGINT
RuntimeMaxSec=7d
"""Launch a Chrome and make it available through the ORM.
Puppeteer scripts can make use of that running browser instance by
calling .connect() against the ws_endpoint_url. This allows to skip the
startup time involved in launching a new browser from scratch.
Note: This command blocks until cancelled and tries to clean up the
database afterwards, so 'dead' ChromeWorkers won't pile up. To make
this work, setup this script as a systemd service using
KillSignal=SIGINT (which translates to a KeyboardInterrupt in here)
instead of SIGTERM, see:
https://alexandra-zaharia.github.io/posts/stopping-python-systemd-service-cleanly)
"""
import json
import logging
import socket
import subprocess
import tempfile
import threading
import time
import uuid
from typing import List, Optional
from django.core.management.base import BaseCommand
from django.db import close_old_connections
from django.template.loader import render_to_string
from myproject.myapp.models import ChromeWorker
logger = logging.getLogger(__name__)
class LaunchChromeThread(threading.Thread):
def __init__(
self, remote_info_filepath: str, *args, headless: bool = True, **kwargs
):
super().__init__(*args, **kwargs)
self.remote_info_filepath = remote_info_filepath
self.headless = headless
def run(self):
javascript_code: str = render_to_string(
"chromeworker/puppeteer_launch_chrome.js",
{
"headless": self.headless,
"launch_arguments": (
# ATTENTION: Order matters here, it seems --user-data-dir MUST come
# before --disble-web-security, otherwise that flag is just
# silently ignored, so do not reorder!
"--user-data-dir=/tmp/",
"--disable-web-security",
"--no-sandbox",
"--disable-setuid-sandbox",
# Additional render options below
"--hide-scrollbars",
"--browser-test",
"--disable-background-networking",
"--run-all-compositor-stages-before-draw",
"--disable-new-content-rendering-timeout",
"--disable-threaded-animation",
"--disable-threaded-scrolling",
"--disable-checker-imaging",
"--disable-image-animation-resync",
# Prevents Chrome from using /dev/shm (shared memory). Docker
# containers often have limited shared memory (64MB default),
# causing Chrome to crash when it runs out.
"--disable-dev-shm-usage",
# Disables GPU hardware acceleration. Prevents GPU-related crashes
# in headless environments.
"--disable-gpu",
# Disables CPU-based rendering fallback. Can prevent hangs.
"--disable-software-rasterizer",
# Disables site isolation. Reduces process count and memory usage.
"--disable-features=IsolateOrigins,site-per-process",
# Hides automation indicators from JavaScript detection.
"--disable-blink-features=AutomationControlled",
),
"executable_path": "google-chrome",
"remote_info_filepath": self.remote_info_filepath,
},
)
options: List[str] = ["node", "--eval", javascript_code]
# This blocks, but since it's within a thread the main
# thread can analyse its output and continue.
subprocess.run(options, capture_output=True, check=True)
def launch_chrome_browser(worker_uuid: str, headless: bool = True) -> Optional[dict]:
# Start thread that launches a Chrome instance via our puppeteer script.
print(f"Launching Chrome Worker '{worker_uuid}'...")
remote_info_file: object = tempfile.NamedTemporaryFile(
prefix=f"remote_info.{worker_uuid}.", suffix=".json"
)
thread = LaunchChromeThread(remote_info_file.name, headless=headless)
thread.start()
# Read remote_info data from the output file created by the thread.
remote_info = None
seconds_step: float = 0.5
seconds_left: float = 10.0
while True:
if not seconds_left:
raise RuntimeError(
f"Timed out trying to read Chrome Worker '{worker_uuid}' remote_info"
)
try:
with open(remote_info_filepath) as f:
remote_info = json.loads(f.read())
except (FileNotFoundError, TypeError, json.decoder.JSONDecodeError):
pass
if remote_info is not None:
break
seconds_left -= seconds_step
time.sleep(seconds_step)
# Create a ChromeWorker instance.
worker, _ = ChromeWorker.objects.get_or_create(
uuid=worker_uuid,
pid=remote_info["processId"],
hostname=socket.gethostname(),
ws_endpoint_url=remote_info["wsEndpoint"],
)
logger.info("Created ChromeWorker: %s", worker)
# Block infinitely until exit is triggered from the outside.
while True:
pass
class Command(BaseCommand):
help = (
"Launch an instance of Google Chrome via puppeteer and keep it alive, "
"allowing other services to reuse it via its browser.wsEndpoint(), so they "
"don't have the browser startup time on every call."
)
def add_arguments(self, parser):
parser.add_argument(
"--headful",
action="store_true",
dest="headful",
default=False,
help=(
"Start launcher in non-headless mode, aka with visible browser GUI, "
"helpful with debugging"
),
)
def handle(self, *args, **options):
headful: bool = options["headful"]
worker_uuid: str = str(uuid.uuid4())
def _on_browser_exit_disable_worker_instance():
# Avoid 'MySQL server has gone away' error.
close_old_connections()
try:
worker = ChromeWorker.objects.get(uuid=worker_uuid)
logger.warning(
(
"Stopping ChromeWorker as it looks like its associated "
"browser has been closed: %s"
),
worker,
)
worker.stop()
except ChromeWorker.DoesNotExist:
return
try:
launch_chrome_browser(worker_uuid, headless=not headful)
except KeyboardInterrupt:
logger.info("The chromeworker command has been stopped on purpose.")
finally:
_on_browser_exit_disable_worker_instance()
from datetime import datetime
from django.db import models
class ChromeWorker(models.Model):
"""Represent a running Chrome instance that can be connected to.
Note: The Chrome instance needs to run on our main host, so that
it is reachable on the same machine handling associated
requests.
"""
uuid = models.CharField(max_length=64)
"""Result of uuid.uuid4() as a string, e.g. to associate file names."""
pid = models.PositiveIntegerField()
"""The process ID of the associated active Chrome process."""
hostname = models.CharField(max_length=128)
"""The host where the process runs on."""
ws_endpoint_url = models.CharField(max_length=512)
"""The Chrome debugging protocol URL used to connect to this Chrome instance."""
num_used = models.PositiveIntegerField(default=0)
"""Track how often calls to this worker were made."""
stopped_at = models.DateTimeField(blank=True, null=True, default=None)
def __str__(self) -> str:
return f"ChromeWorker uuid={self.uuid} pid={self.pid}"
def stop(self):
self.stopped_at = datetime.now()
self.save(update_fields=["stopped_at"])
/**
* Launch a long-lived Chrome meant to be reused by other clients.
*
* It will write its remote URL and process ID to the given filepath for
* the caller to inspect.
*/
const fs = require('fs');
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: {% if headless %}'new'{% else %}false{% endif %},
executablePath: '{{ executable_path }}',
args: [
{% for arg in launch_arguments %}
'{{ arg }}',{% endfor %}
],
});
const remoteInfo = {
processId: browser.process().pid,
wsEndpoint: browser.wsEndpoint(),
};
fs.writeFileSync(
'{{ remote_info_filepath }}', JSON.stringify(remoteInfo, null, 2)
);
// Leave browser alive/open.
await browser.disconnect();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment