James O'Doherty jodoherty

Enable larger GTT to fit models into memory.

options ttm pages_limit=31457280
options ttm page_pool_size=15728640

Download and stage gemma4 variants into a local directory for llama-server.

mkdir -p /srv/models/{gemma-4-26B-A4B-it-GGUF,gemma-4-E2B-it-GGUF,gemma-4-31B-it-GGUF}

WARNING: This is only for headless Framework Desktop and other AI MAX 395+ 128GB machines. I tried this on my Asus ROG Z13 with KDE running and it crashed my system hard. If you're using LLMs on a machine with a desktop environment, consider running llama.cpp server with the Vulkan backend instead of this.

First you have to set up your Framework Desktop to allow a large amount of GTT memory.

This was tested with the following modprobe.conf settings:

# Maximize GTT for LLM usage on 128GB UMA system
options amdgpu gttsize=120000
options ttm pages_limit=31457280

	//
	// Rule.swift
	//
	// Created by James O'Doherty on 3/27/23.
	//

	import Foundation

	struct Rule: Codable, Identifiable {
	var id: String

	{
	"title": "PC Keys for terminals/VMs",
	"rules": [
	{
	"description": "Swap around modifiers",
	"manipulators": [
	{
	"type": "basic",
	"from": {
	"key_code": "left_command",

	#!/usr/sbin/nft -f

	flush ruleset

	table inet filter {
	chain input {
	type filter hook input priority 0;

	# accept any localhost traffic
	iif lo accept

	UXTerm.termName: xterm-256color

	!UXTermfont: -misc-fixed-medium-r-semicondensed--13-----*-iso10646-1
	UXTermfont: -gnu-unifont-medium-r-normal--16-----*-iso10646-1

	!UXTerm*reverseVideo: true
	UXTerm*loginShell: true
	UXTerm*visualBell: true
	UXTerm*visualBellLine: true
	UXTerm*altSendsEscape: true

	"""
	Prefect extra loggers with threading example.

	Run it like this:

	PREFECT_LOGGING_EXTRA_LOGGERS=__main__ PREFECT_API_URL=http://127.0.0.1:4200/api python main.py

	You should see the plain Python logging for the '__main__' package in the
	Prefect UI.

	evdev:atkbd:dmi:bvn:bvr:bd:svnLENOVO:pn:pvrThinkPad
	KEYBOARD_KEY_b7=rightmeta
	KEYBOARD_KEY_3a=leftctrl

	#!/bin/sh

	type=cuda
	type=rocm
	type=vulkan
	image=ghcr.io/ggml-org/llama.cpp:server-$type
	model=gemma-4-26B-A4B-it
	q=UD-Q4_K_XL
	q=MXFP4_MOE
	q=UD-Q8_K_XL

	#!/bin/sh

	type=cuda
	type=rocm
	model=google/gemma-4-26B-A4B-it
	u=0.9

	if [ "$#" -eq 0 ]; then
	set -- -d --restart=unless-stopped
	fi