Skip to content

Instantly share code, notes, and snippets.

@netlooker
Created May 7, 2025 12:35
Show Gist options
  • Save netlooker/df9513493a8a85169321f76e998e3e5c to your computer and use it in GitHub Desktop.
Save netlooker/df9513493a8a85169321f76e998e3e5c to your computer and use it in GitHub Desktop.
Nix Docling Serve
{
config,
lib,
pkgs,
...
}:
let
inherit (lib) types mkEnableOption mkOption mkIf mkPackageOption getExe optional optionalString maintainers;
cfg = config.services.docling-serve;
# Helper to convert boolean to "True" or "False" string, or empty if null
boolToStringOrEmpty = val:
if val == null then ""
else if val then "True"
else "False";
in
{
options = {
services.docling-serve = {
enable = mkEnableOption "Docling Serve server";
package = mkPackageOption pkgs "docling-serve" { };
host = mkOption {
type = types.str;
default = "127.0.0.1";
example = "0.0.0.0";
description = ''
The host address to which the Docling Serve server HTTP interface binds.
'';
};
port = mkOption {
type = types.port;
default = 5001;
example = 11111;
description = ''
The port on which the Docling Serve server listens.
'';
};
# <<< MODIFIED/ENHANCED artifactsPath >>>
artifactsPath = mkOption {
type = types.nullOr types.path;
default = null; # Defaults to docling's internal cache mechanism
example = "/var/lib/docling-models";
description = ''
Path to pre-downloaded Docling model artifacts (including standard models and VLMs like SmolDocling).
If set, this will be passed via the DOCLING_ARTIFACTS_PATH environment variable.
Ensures offline/air-gapped operation and specific model versions.
If not set, Docling will attempt to download models on first use.
When using DynamicUser, this path is defaulted to a subdirectory within 'stateDir'
(e.g., /var/lib/docling-serve/models) to ensure proper permissions and caching.
'';
};
ompNumThreads = mkOption {
type = types.nullOr types.ints.positive;
default = null; # Docling defaults to 4 if not set
example = 8;
description = ''
Limits the number of CPU threads used by Docling (sets OMP_NUM_THREADS).
'';
};
logLevel = mkOption {
type = types.nullOr (types.enum [ "DEBUG" "INFO" "WARNING" "ERROR" "CRITICAL" ]);
default = null; # Application default (likely INFO or WARNING)
example = "DEBUG";
description = ''
Sets the log level for the Docling Serve application (e.g., via DOCLING_LOG_LEVEL or a similar docling-serve specific variable).
'';
};
enableRemoteServices = mkOption {
type = types.nullOr types.bool; # Using nullOr to allow app default
default = null; # Application default (likely false)
description = ''
Whether to allow Docling to use remote services for certain operations (e.g., OCR, LLMs via API options).
Sets an environment variable like DOCLING_ENABLE_REMOTE_SERVICES.
This is critical for data privacy and air-gapped environments.
'';
};
allowExternalPlugins = mkOption {
type = types.nullOr types.bool; # Using nullOr to allow app default
default = null; # Application default (likely false)
description = ''
Whether to allow loading of third-party Docling plugins.
Sets an environment variable like DOCLING_ALLOW_EXTERNAL_PLUGINS.
'';
};
# <<< NEW defaultPipeline Option >>>
defaultPipeline = mkOption {
type = types.nullOr (types.enum ["standard" "vlm"]); # Assuming these are the primary types
default = null; # Server application default
description = ''
Sets a server-wide default processing pipeline (e.g., "standard", "vlm").
This would be passed via an environment variable like DOCLING_SERVE_DEFAULT_PIPELINE.
API requests can typically override this.
Consult docling-serve documentation for the exact environment variable name.
'';
};
# <<< NEW defaultVlmModel Option >>>
defaultVlmModel = mkOption {
type = types.nullOr types.str;
default = null; # Server application default
example = "smoldocling"; # Or "smoldocling_mlx" etc.
description = ''
Sets a server-wide default Vision Language Model (VLM) to use if the defaultPipeline is "vlm".
This would be passed via an environment variable like DOCLING_SERVE_DEFAULT_VLM_MODEL.
API requests can typically override this.
Ensure the model artifacts for the specified VLM are available via 'artifactsPath'.
Consult docling-serve documentation for the exact environment variable name.
'';
};
extraEnvironment = mkOption {
type = types.attrsOf types.str;
default = {
# DOCLING_SERVE_ENABLE_UI = "True"; # Original example, keep if relevant
};
example = ''
{
DOCLING_SERVE_ENABLE_UI = "False";
DOCLING_SERVE_MAX_UPLOAD_SIZE_MB = "100";
# Add other docling-serve specific environment variables here
}
'';
description = ''
Extra environment variables specifically for the Docling Serve application.
These are merged with variables derived from other options.
For a full list of Docling Serve environment variables, refer to its official documentation:
<https://github.com/docling-project/docling-serve/blob/main/docs/configuration.md>
'';
};
environmentFile = mkOption {
description = ''
Path to an environment file for the systemd service.
Useful for passing secrets (e.g., API keys for remote services if enabled)
without making them world-readable in the Nix store.
'';
type = types.nullOr types.path;
default = null;
example = "/var/lib/secrets/doclingServeSecrets";
};
openFirewall = mkOption {
type = types.bool;
default = false;
description = ''
Whether to open the firewall for Docling Serve.
This adds `services.docling-serve.port` to `networking.firewall.allowedTCPPorts`.
'';
};
stateDir = mkOption {
type = types.path;
default = "/var/lib/docling-serve";
description = ''
Directory for Docling Serve to store persistent state, including cached models
if 'artifactsPath' is not explicitly set to another location or if the application
needs other writable state. Will be created with appropriate permissions for the service user.
'';
};
};
};
config = mkIf cfg.enable {
systemd.services.docling-serve = {
description = "Docling API Service";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
# <<< MODIFIED/ENHANCED Environment Variable Construction >>>
environment = lib.filterAttrs (n: v: v != null && v != "") (cfg.extraEnvironment // {
# DOCLING_ARTIFACTS_PATH:
# If user specifies cfg.artifactsPath, use that.
# Otherwise, default to a 'models' subdirectory within cfg.stateDir for DynamicUser compatibility.
DOCLING_ARTIFACTS_PATH = if cfg.artifactsPath != null then cfg.artifactsPath else "${cfg.stateDir}/models";
OMP_NUM_THREADS = optionalString (cfg.ompNumThreads != null) (toString cfg.ompNumThreads);
# Assuming docling-serve picks up standard docling env vars or has its own for these:
DOCLING_LOG_LEVEL = cfg.logLevel;
DOCLING_ENABLE_REMOTE_SERVICES = boolToStringOrEmpty cfg.enableRemoteServices;
DOCLING_ALLOW_EXTERNAL_PLUGINS = boolToStringOrEmpty cfg.allowExternalPlugins;
# <<< NEW Environment Variables for Default Pipeline/VLM >>>
# Note: The exact environment variable names (DOCLING_SERVE_DEFAULT_PIPELINE, DOCLING_SERVE_DEFAULT_VLM_MODEL)
# are placeholders. Replace with actual names if docling-serve defines them.
DOCLING_SERVE_DEFAULT_PIPELINE = cfg.defaultPipeline;
DOCLING_SERVE_DEFAULT_VLM_MODEL = cfg.defaultVlmModel;
# If docling-serve uses XDG_CACHE_HOME, this directs its cache into the stateDir.
# Docling library itself uses $HOME/.cache/docling or DOCLING_ARTIFACTS_PATH.
XDG_CACHE_HOME = "${cfg.stateDir}/cache"; # For general app caching if it respects XDG
});
serviceConfig = {
ExecStart = "${getExe cfg.package} run --host \"${cfg.host}\" --port ${toString cfg.port}";
EnvironmentFile = optional (cfg.environmentFile != null) cfg.environmentFile;
# User and State
DynamicUser = true;
StateDirectory = baseNameOf cfg.stateDir; # systemd creates /var/lib/<StateDirectory>
StateDirectoryMode = "0750";
CacheDirectory = baseNameOf cfg.stateDir; # systemd creates /var/cache/<CacheDirectory>
CacheDirectoryMode = "0750";
# Ensures the 'models' and 'cache' subdirectories are usable if DOCLING_ARTIFACTS_PATH or XDG_CACHE_HOME point there.
# Systemd only creates the top-level StateDirectory/CacheDirectory. Subdirs must be handled by app or pre-created.
# Security Hardening
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
DevicePolicy = "closed";
LockPersonality = true;
NoNewPrivileges = true;
PrivateUsers = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectControlGroups = true;
RestrictAddressFamilies = [ "AF_INET" "AF_INET6" "AF_UNIX" ];
RestrictNamespaces = true;
RestrictRealtime = true;
SystemCallArchitectures = "native";
SystemCallFilter = [ "@system-service" ];
UMask = "0077";
CapabilityBoundingSet = "";
AmbientCapabilities = "";
# <<< MODIFIED ReadWritePaths Logic >>>
# Service needs write access to its stateDir (which includes the default model cache location).
# If artifactsPath is explicitly set by the user to a *different* writable location,
# that path might also need to be included if the service needs to write to it (e.g., for caching by docling lib).
# However, it's generally better for artifactsPath to be read-only if it's just for pre-downloaded models.
# For simplicity and DynamicUser, encouraging model caching within stateDir is best.
ReadWritePaths = [ cfg.stateDir ];
# If cfg.artifactsPath is set and is *not* within cfg.stateDir, and it's *not* a Nix store path (which is read-only),
# and the application *needs* to write to it (unlikely if it's just for loading models),
# then it would need to be added here. But this is complex.
# For pre-downloaded models in a custom read-only location:
ReadOnlyPaths = if cfg.artifactsPath != null && cfg.artifactsPath != "${cfg.stateDir}/models" && !lib.hasPrefix "/nix/store" cfg.artifactsPath then
[] # If it's not the default and not /nix/store, it's a user-managed path. Assume readable by service.
else if cfg.artifactsPath != null && lib.hasPrefix "/nix/store" cfg.artifactsPath then
[ cfg.artifactsPath ] # Explicitly allow reading from Nix store path if specified.
else
[];
ProtectClock = true;
ProtectProc = "invisible";
ProcSubset = "pid";
StandardOutput = "journal";
StandardError = "journal";
};
};
networking.firewall = mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
};
meta.maintainers = with maintainers; [ drupol ];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment