randyap8-wq · May 12, 2026 03:46
diff --git a/gistfile1.txt b/gistfile1.txt
 rust-engine/Cargo.toml
 @@ -3,7 +3,6 @@ name = "micro-expert-router"
 version = "0.1.0"
 edition = "2021"
 description = "MoE execution engine that streams experts from NVMe into RAM with O_DIRECT, an LRU expert cache, and a learned predictive prefetcher"
 Copilot commented 35 minutes ago
 Copilot
 Copilot
 AI
 35 minutes ago
 Removing the license field makes the crate fail cargo publish validation (Cargo requires license or license-file). If the intent is to keep the project unlicensed, consider adding publish = false under [package] to make that explicit and prevent accidental publish attempts.

 Suggested changeset 1 (1)
 rust-engine/Cargo.toml
 Original file line number	Diff line number	Diff line change
 version = "0.1.0"
 edition = "2021"
 description = "MoE execution engine that streams experts from NVMe into RAM with O_DIRECT, an LRU expert cache, and a learned predictive prefetcher"
 publish = false
 [dependencies]
 # Tokio drives the orchestration (scheduling, prefetch tasks, channels).
 Copilot uses AI. Check for mistakes.
 @randyap8-wq	Reply...
 config.toml
 Comment on lines +73 to 80
 # On-disk weight dtype: "f32" (4 B/weight, default, legacy), "f16"
 # (2 B/weight), "int8" (1 B/weight + per-tensor scales), "q4k" (Q4_K_M
 # 256-block, GGUF-compatible 4-bit), or "q4_0" (Q4_0 32-block, 4-bit).
 # Lower-precision dtypes halve/quarter SSD bytes per expert read, the
 # dominant energy term in this engine. Must match what
 # `extract_mixtral_experts.py` / `gguf-convert` / `gen-data` was invoked
 # with.
 dtype = "f32"
 Copilot commented 35 minutes ago
 Copilot
 Copilot
 AI
 35 minutes ago
 The sample value dtype = "f32" (and the surrounding comment listing lowercase strings) does not appear to match what the Rust config parser accepts: ModelConfig.dtype is inference::WeightDtype deserialized via toml::from_str, and WeightDtype has no serde renames/aliases (so it will deserialize from "F32"/"F16"/"Int8"/"Q4K"/"Q4_0" by default, not "f32"/"q4k"/etc). Either update this example to the serde variant spellings, or add serde rename_all/aliases for WeightDtype so lowercase strings in config.toml work as documented.

 Suggested changeset 1 (1)
 config.toml
 Original file line number	Diff line number	Diff line change
 # Number of transformer layers. 1 for the legacy single-layer benchmark
 # data; 32 for full Mixtral-8x7B.
 num_layers = 1
 # On-disk weight dtype: "f32" (4 B/weight, default, legacy), "f16"
 # (2 B/weight), "int8" (1 B/weight + per-tensor scales), "q4k" (Q4_K_M
 # 256-block, GGUF-compatible 4-bit), or "q4_0" (Q4_0 32-block, 4-bit).
 # On-disk weight dtype: "F32" (4 B/weight, default, legacy), "F16"
 # (2 B/weight), "Int8" (1 B/weight + per-tensor scales), "Q4K" (Q4_K_M
 # 256-block, GGUF-compatible 4-bit), or "Q4_0" (Q4_0 32-block, 4-bit).
 # Lower-precision dtypes halve/quarter SSD bytes per expert read, the
 # dominant energy term in this engine. Must match what
 # `extract_mixtral_experts.py` / `gguf-convert` / `gen-data` was invoked
 # with.
 dtype = "f32"
 dtype = "F32"
 [storage]
 # LRU cache slots (per layer). The whole point of this engine is that
	rust-engine/Cargo.toml
	@@ -3,7 +3,6 @@ name = "micro-expert-router"
	version = "0.1.0"
	edition = "2021"
	description = "MoE execution engine that streams experts from NVMe into RAM with O_DIRECT, an LRU expert cache, and a learned predictive prefetcher"
	Copilot commented 35 minutes ago
	Copilot
	Copilot
	AI
	35 minutes ago
	Removing the license field makes the crate fail cargo publish validation (Cargo requires license or license-file). If the intent is to keep the project unlicensed, consider adding publish = false under [package] to make that explicit and prevent accidental publish attempts.

	Suggested changeset 1 (1)
	rust-engine/Cargo.toml
	Original file line number Diff line number Diff line change
	version = "0.1.0"
	edition = "2021"
	description = "MoE execution engine that streams experts from NVMe into RAM with O_DIRECT, an LRU expert cache, and a learned predictive prefetcher"
	publish = false
	[dependencies]
	# Tokio drives the orchestration (scheduling, prefetch tasks, channels).
	Copilot uses AI. Check for mistakes.
	@randyap8-wq Reply...
	config.toml
	Comment on lines +73 to 80
	# On-disk weight dtype: "f32" (4 B/weight, default, legacy), "f16"
	# (2 B/weight), "int8" (1 B/weight + per-tensor scales), "q4k" (Q4_K_M
	# 256-block, GGUF-compatible 4-bit), or "q4_0" (Q4_0 32-block, 4-bit).
	# Lower-precision dtypes halve/quarter SSD bytes per expert read, the
	# dominant energy term in this engine. Must match what
	# `extract_mixtral_experts.py` / `gguf-convert` / `gen-data` was invoked
	# with.
	dtype = "f32"
	Copilot commented 35 minutes ago
	Copilot
	Copilot
	AI
	35 minutes ago
	The sample value dtype = "f32" (and the surrounding comment listing lowercase strings) does not appear to match what the Rust config parser accepts: ModelConfig.dtype is inference::WeightDtype deserialized via toml::from_str, and WeightDtype has no serde renames/aliases (so it will deserialize from "F32"/"F16"/"Int8"/"Q4K"/"Q4_0" by default, not "f32"/"q4k"/etc). Either update this example to the serde variant spellings, or add serde rename_all/aliases for WeightDtype so lowercase strings in config.toml work as documented.

	Suggested changeset 1 (1)
	config.toml
	Original file line number Diff line number Diff line change
	# Number of transformer layers. 1 for the legacy single-layer benchmark
	# data; 32 for full Mixtral-8x7B.
	num_layers = 1
	# On-disk weight dtype: "f32" (4 B/weight, default, legacy), "f16"
	# (2 B/weight), "int8" (1 B/weight + per-tensor scales), "q4k" (Q4_K_M
	# 256-block, GGUF-compatible 4-bit), or "q4_0" (Q4_0 32-block, 4-bit).
	# On-disk weight dtype: "F32" (4 B/weight, default, legacy), "F16"
	# (2 B/weight), "Int8" (1 B/weight + per-tensor scales), "Q4K" (Q4_K_M
	# 256-block, GGUF-compatible 4-bit), or "Q4_0" (Q4_0 32-block, 4-bit).
	# Lower-precision dtypes halve/quarter SSD bytes per expert read, the
	# dominant energy term in this engine. Must match what
	# `extract_mixtral_experts.py` / `gguf-convert` / `gen-data` was invoked
	# with.
	dtype = "f32"
	dtype = "F32"
	[storage]
	# LRU cache slots (per layer). The whole point of this engine is that
No results found