esz135888 · May 24, 2026 02:36
diff --git a/acceptance-tests.md b/acceptance-tests.md
diff --git a/artifact-url-or-pr.md b/artifact-url-or-pr.md
diff --git a/data-model.md b/data-model.md
diff --git a/decision-record.md b/decision-record.md
diff --git a/e2e-verification.md b/e2e-verification.md
diff --git a/learning-memory.json b/learning-memory.json
 {
  "project": "Tokenmaxxing Value Metric Gate",
  "market_learning": [
    "Visible AI usage leaderboards can create perverse incentives and inflate token consumption without real productivity.",
    "Mature measurement practice uses balanced outcome and reliability metrics rather than single activity proxies.",
    "Token cost is still useful as diagnostic data, but it must be separated from performance or adoption ranking."
  ],
  "pls_next_checks": [
    "Does any AI adoption dashboard expose individual token or prompt rankings?",
    "Can HR/performance workflows select diagnostic-only metrics?",
    "Does every AI value metric have baseline, formula, owner, evidence and anti-gaming rule?",
    "Are token cost spikes compared with workflow outcome movement?"
  ],
  "assumptions": [
    "PLS can add a metric registry and gate review workflow to the Operating Console.",
    "Workflow owners can provide outcome evidence for at least three pilot workflows by D7."
  ],
  "next_iteration": "Build Operating Console UI selector and worker gaming detector for blocked metric enforcement."
 }
diff --git a/market-maturity.md b/market-maturity.md
diff --git a/people-sync.md b/people-sync.md
diff --git a/production-brief.md b/production-brief.md
diff --git a/production-readiness.md b/production-readiness.md
diff --git a/skill-usage.md b/skill-usage.md
diff --git a/solution-selection.md b/solution-selection.md
diff --git a/tokenmaxxing-value-metric-gate.html b/tokenmaxxing-value-metric-gate.html
 <!doctype html>
 <html lang="zh-Hant">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Tokenmaxxing Value Metric Gate</title>
  <style>
    :root{--ink:#14212b;--muted:#5f6f7f;--line:#d8e2eb;--paper:#f7f9fc;--card:#fff;--blue:#2255c7;--green:#08785e;--amber:#9a6200;--red:#b13a22;--violet:#6d3cc2}
    *{box-sizing:border-box}body{margin:0;background:var(--paper);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
    header{background:#fff;border-bottom:1px solid var(--line);padding:30px clamp(18px,4vw,56px)}main{padding:24px clamp(18px,4vw,56px) 48px}.grid{display:grid;gap:16px}
    h1{margin:0 0 12px;font-size:clamp(31px,4vw,54px);line-height:1.04;letter-spacing:0;max-width:1120px}h2{margin:0 0 12px;font-size:22px}h3{margin:0 0 6px;font-size:16px}p{margin-top:0}code{background:#eef3f8;border-radius:4px;padding:1px 5px}
    .sub{max-width:1120px;color:var(--muted);font-size:17px}.kpis{grid-template-columns:repeat(4,minmax(0,1fr));margin-top:22px}.two{grid-template-columns:1.05fr .95fr}.three{grid-template-columns:repeat(3,minmax(0,1fr))}.four{grid-template-columns:repeat(4,minmax(0,1fr))}.flow{grid-template-columns:repeat(5,minmax(0,1fr))}
    .card{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:18px;box-shadow:0 1px 2px rgba(20,33,43,.04)}.metric{font-size:34px;font-weight:780}.label{color:var(--muted);font-size:13px}.pill{display:inline-flex;border:1px solid var(--line);border-radius:999px;padding:4px 10px;font-size:12px;background:#fff;margin:0 6px 8px 0;white-space:nowrap}
    .ok{color:var(--green)}.warn{color:var(--amber)}.bad{color:var(--red)}.info{color:var(--blue)}.gate{border-left:4px solid var(--blue)}.blocked{border-left:4px solid var(--red)}.allowed{border-left:4px solid var(--green)}
    table{width:100%;border-collapse:collapse;font-size:14px}th,td{text-align:left;padding:10px;border-bottom:1px solid var(--line);vertical-align:top}th{color:var(--muted);font-size:12px;text-transform:uppercase}.badcell{color:var(--red);font-weight:730}.goodcell{color:var(--green);font-weight:730}.warncell{color:var(--amber);font-weight:730}
    .step{border:1px solid var(--line);border-radius:8px;padding:12px;min-height:132px;background:#fbfdff}.step strong{display:block;color:var(--violet);margin-bottom:6px}.source a{color:var(--blue);word-break:break-word}
    @media(max-width:960px){.kpis,.two,.three,.four,.flow{grid-template-columns:1fr}h1{font-size:34px}}
  </style>
 </head>
 <body>
  <header>
    <span class="pill info">PLS production delivery iteration</span><span class="pill ok">Solution: governance / eval / system spec</span>
    <h1>Tokenmaxxing Value Metric Gate</h1>
    <p class="sub">把「不要用 Token 消耗量衡量 AI 效益」升級成可落地的指標准入系統：任何 AI usage 指標進入績效、預算或採用榜單前，必須通過 outcome linkage、anti-gaming、資料可信度、權限稽核與例外審批。</p>
    <section class="grid kpis">
      <div class="card"><div class="metric bad">BLOCK</div><div class="label">Token / prompt / leaderboard 不得進個人績效</div></div>
      <div class="card"><div class="metric ok">PASS</div><div class="label">任務完成、時間節省、品質、客戶結果</div></div>
      <div class="card"><div class="metric warn">3</div><div class="label">異常訊號：cost spike、no outcome lift、automation loop</div></div>
      <div class="card"><div class="metric">D30</div><div class="label">接入 PLS Operating Console / worker eval</div></div>
    </section>
  </header>
  <main class="grid">
    <section class="grid two">
      <div class="card gate">
        <h2>本輪 production 任務</h2>
        <p>上一版已定義 value scorecard；本輪把它推進成「metric gate」：管理者不能直接把 AI 使用量放進排行或獎懲，必須先證明該指標和真實 workflow outcome 有關，而且有反作弊、權限、稽核、例外與回滾。</p>
        <span class="pill">Owner: AI governance owner</span><span class="pill">Due: D7 metric gate pilot</span><span class="pill">Acceptance: blocked metric cannot be selected</span>
      </div>
      <div class="card">
        <h2>人會拿它做什麼</h2>
        <p>主管提交 AI adoption metric 時，系統判斷是 <strong>blocked</strong>、<strong>diagnostic-only</strong> 或 <strong>approved outcome metric</strong>；員工看到的不是「誰燒最多 Token」，而是「哪個 workflow 真的省時、提質、降風險」。</p>
      </div>
    </section>

    <section class="card">
      <h2>D1 / D7 / D14 / D30</h2>
      <div class="grid four">
        <div class="card gate"><h3>D1</h3><p>建立 blocked metric registry：Token consumption、prompt count、usage leaderboard 預設不得用於績效。</p></div>
        <div class="card gate"><h3>D7</h3><p>完成 metric gate pilot：3 個 AI workflow 都需有 baseline、outcome、quality rubric 與 owner。</p></div>
        <div class="card gate"><h3>D14</h3><p>加入 gaming detector：高 token 成本但 outcome 無提升、重複自動化 loop、低品質高用量。</p></div>
        <div class="card gate"><h3>D30</h3><p>接入 PLS Operating Console，將 blocked metric selector、exception audit、eval report 寫進後台流程。</p></div>
      </div>
    </section>

    <section class="card">
      <h2>Purpose-to-Purpose E2E</h2>
      <div class="grid flow">
        <div class="step"><strong>原始目的</strong>避免 Tokenmaxxing 讓 AI 採用指標失真。</div>
        <div class="step"><strong>產出物</strong>Metric gate、schema、API、驗收、people sync、decision record。</div>
        <div class="step"><strong>人採用</strong>主管用 outcome scorecard 決策，員工不用為了榜單刷用量。</div>
        <div class="step"><strong>指標改善</strong>AI 成本與完成率、品質、滿意度、風險下降連動。</div>
        <div class="step"><strong>錢路徑</strong>停掉高成本低價值用法，把預算導向能省工時或創收的 workflow。</div>
      </div>
    </section>

    <section class="grid two">
      <div class="card">
        <h2>Metric Gate Rules</h2>
        <table>
          <thead><tr><th>指標</th><th>狀態</th><th>准入規則</th></tr></thead>
          <tbody>
            <tr><td>Token consumption</td><td class="badcell">Blocked</td><td>只能作成本診斷，不得進個人績效、排行、獎金、裁員篩選。</td></tr>
            <tr><td>Prompt count</td><td class="badcell">Blocked</td><td>可作 troubleshooting，不得當 productivity proxy。</td></tr>
            <tr><td>Tool active days</td><td class="warncell">Diagnostic-only</td><td>只代表 adoption exposure，需搭配 outcome 才能進 scorecard。</td></tr>
            <tr><td>Task completion rate</td><td class="goodcell">Approved</td><td>需定義 done、review sample、baseline、例外處理。</td></tr>
            <tr><td>Validated time saved</td><td class="goodcell">Approved</td><td>需 workflow baseline、抽樣驗證、不可只自報。</td></tr>
            <tr><td>Quality / defect rate</td><td class="goodcell">Approved</td><td>需 rubric、reviewer、客戶或內部品質證據。</td></tr>
          </tbody>
        </table>
      </div>
      <div class="card">
        <h2>Production Stack</h2>
        <p><strong>資料表：</strong><code>metric_registry</code>, <code>metric_gate_reviews</code>, <code>workflow_outcome_snapshots</code>, <code>token_cost_diagnostics</code>, <code>gaming_signals</code>, <code>metric_exceptions</code>, <code>audit_events</code>.</p>
        <p><strong>API：</strong><code>POST /metric-gate/proposals</code>, <code>POST /metric-gate/reviews</code>, <code>GET /metric-gate/scorecard</code>, <code>POST /metric-gate/exceptions</code>.</p>
        <p><strong>權限：</strong>team owner 可提案；governance owner 可批准；finance 只看成本診斷；HR/performance admin 不可選 blocked metric。</p>
        <p><strong>稽核：</strong>每次 metric status change、exception approval、scorecard export 都寫入 <code>audit_events</code>。</p>
      </div>
    </section>

    <section class="grid three">
      <div class="card allowed"><h2>價值 / 錢路徑</h2><p>防止 AI 預算被「活動量」吞掉，將成本連到完成率、節省時間、缺陷下降、客戶滿意與收入或風險減少。</p></div>
      <div class="card allowed"><h2>人的能力提升</h2><p>管理者學會設計不可被輕易 gaming 的 AI 指標；員工知道 AI 採用是為了更好的成果，不是燒 Token。</p></div>
      <div class="card allowed"><h2>下一輪升級</h2><p>把 metric gate 做進 Operating Console UI：指標提案、blocked selector、exception approval、gaming alert。</p></div>
    </section>

    <section class="card source">
      <h2>外部市場成熟度輸入</h2>
      <p>TechRadar 報導 Amazon 員工因 AI usage leaderboard 與 token 追蹤出現 tokenmaxxing 行為，顯示「用量排行」會產生錯誤誘因：<a href="https://www.techradar.com/pro/amazon-workers-are-apparently-tokenmaxxing-ai-platforms-to-hit-arbitrary-usage-targets">TechRadar, 2026-05-14</a>.</p>
      <p>Indeed 的 AI productivity 觀點主張應看 business outcomes，而不是 usage 或 token counts 這類 proxy：<a href="https://www.indeed.com/news/releases/measuring-ai-productivity-business-outcomes?co=US">Indeed, 2026</a>.</p>
      <p>SPACE framework 與 DORA metrics 是可比成熟做法：平衡多維工程效能與交付可靠性，而不是單一活動量指標。<a href="https://space-framework.com/">SPACE</a> / <a href="https://dora.dev/guides/dora-metrics/">DORA</a></p>
    </section>
  </main>
 </body>
 </html>
Field	Type	Notes
id	uuid	Primary key
metric_key	text	token_consumption, task_completion_rate
metric_name	text	Human-readable name
policy_status	enum	blocked, diagnostic_only, approved, pending
allowed_use	text[]	cost_diagnostic, workflow_scorecard, executive_report
prohibited_use	text[]	performance_rank, bonus, layoff_screen, individual_leaderboard
rationale	text	Why this policy exists
owner_id	uuid	Governance owner
updated_at	timestamptz	Audit timestamp
Field	Type	Notes
id	uuid	Primary key
proposed_metric_key	text	Candidate metric
proposed_by	uuid	Team owner
workflow_id	uuid	Related workflow
outcome_linkage_score	numeric	0-100
gaming_risk_score	numeric	0-100
data_quality_score	numeric	0-100
decision	enum	approve, reject, diagnostic_only, needs_revision
decision_reason	text	Required
approved_by	uuid	Governance owner
	{
	"project": "Tokenmaxxing Value Metric Gate",
	"market_learning": [
	"Visible AI usage leaderboards can create perverse incentives and inflate token consumption without real productivity.",
	"Mature measurement practice uses balanced outcome and reliability metrics rather than single activity proxies.",
	"Token cost is still useful as diagnostic data, but it must be separated from performance or adoption ranking."
	],
	"pls_next_checks": [
	"Does any AI adoption dashboard expose individual token or prompt rankings?",
	"Can HR/performance workflows select diagnostic-only metrics?",
	"Does every AI value metric have baseline, formula, owner, evidence and anti-gaming rule?",
	"Are token cost spikes compared with workflow outcome movement?"
	],
	"assumptions": [
	"PLS can add a metric registry and gate review workflow to the Operating Console.",
	"Workflow owners can provide outcome evidence for at least three pilot workflows by D7."
	],
	"next_iteration": "Build Operating Console UI selector and worker gaming detector for blocked metric enforcement."
	}
	<!doctype html>
	<html lang="zh-Hant">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>Tokenmaxxing Value Metric Gate</title>
	<style>
	:root{--ink:#14212b;--muted:#5f6f7f;--line:#d8e2eb;--paper:#f7f9fc;--card:#fff;--blue:#2255c7;--green:#08785e;--amber:#9a6200;--red:#b13a22;--violet:#6d3cc2}
	*{box-sizing:border-box}body{margin:0;background:var(--paper);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
	header{background:#fff;border-bottom:1px solid var(--line);padding:30px clamp(18px,4vw,56px)}main{padding:24px clamp(18px,4vw,56px) 48px}.grid{display:grid;gap:16px}
	h1{margin:0 0 12px;font-size:clamp(31px,4vw,54px);line-height:1.04;letter-spacing:0;max-width:1120px}h2{margin:0 0 12px;font-size:22px}h3{margin:0 0 6px;font-size:16px}p{margin-top:0}code{background:#eef3f8;border-radius:4px;padding:1px 5px}
	.sub{max-width:1120px;color:var(--muted);font-size:17px}.kpis{grid-template-columns:repeat(4,minmax(0,1fr));margin-top:22px}.two{grid-template-columns:1.05fr .95fr}.three{grid-template-columns:repeat(3,minmax(0,1fr))}.four{grid-template-columns:repeat(4,minmax(0,1fr))}.flow{grid-template-columns:repeat(5,minmax(0,1fr))}
	.card{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:18px;box-shadow:0 1px 2px rgba(20,33,43,.04)}.metric{font-size:34px;font-weight:780}.label{color:var(--muted);font-size:13px}.pill{display:inline-flex;border:1px solid var(--line);border-radius:999px;padding:4px 10px;font-size:12px;background:#fff;margin:0 6px 8px 0;white-space:nowrap}
	.ok{color:var(--green)}.warn{color:var(--amber)}.bad{color:var(--red)}.info{color:var(--blue)}.gate{border-left:4px solid var(--blue)}.blocked{border-left:4px solid var(--red)}.allowed{border-left:4px solid var(--green)}
	table{width:100%;border-collapse:collapse;font-size:14px}th,td{text-align:left;padding:10px;border-bottom:1px solid var(--line);vertical-align:top}th{color:var(--muted);font-size:12px;text-transform:uppercase}.badcell{color:var(--red);font-weight:730}.goodcell{color:var(--green);font-weight:730}.warncell{color:var(--amber);font-weight:730}
	.step{border:1px solid var(--line);border-radius:8px;padding:12px;min-height:132px;background:#fbfdff}.step strong{display:block;color:var(--violet);margin-bottom:6px}.source a{color:var(--blue);word-break:break-word}
	@media(max-width:960px){.kpis,.two,.three,.four,.flow{grid-template-columns:1fr}h1{font-size:34px}}
	</style>
	</head>
	<body>
	<header>
	<span class="pill info">PLS production delivery iteration</span><span class="pill ok">Solution: governance / eval / system spec</span>
	<h1>Tokenmaxxing Value Metric Gate</h1>
	<p class="sub">把「不要用 Token 消耗量衡量 AI 效益」升級成可落地的指標准入系統：任何 AI usage 指標進入績效、預算或採用榜單前，必須通過 outcome linkage、anti-gaming、資料可信度、權限稽核與例外審批。</p>
	<section class="grid kpis">
	<div class="card"><div class="metric bad">BLOCK</div><div class="label">Token / prompt / leaderboard 不得進個人績效</div></div>
	<div class="card"><div class="metric ok">PASS</div><div class="label">任務完成、時間節省、品質、客戶結果</div></div>
	<div class="card"><div class="metric warn">3</div><div class="label">異常訊號：cost spike、no outcome lift、automation loop</div></div>
	<div class="card"><div class="metric">D30</div><div class="label">接入 PLS Operating Console / worker eval</div></div>
	</section>
	</header>
	<main class="grid">
	<section class="grid two">
	<div class="card gate">
	<h2>本輪 production 任務</h2>
	<p>上一版已定義 value scorecard；本輪把它推進成「metric gate」：管理者不能直接把 AI 使用量放進排行或獎懲，必須先證明該指標和真實 workflow outcome 有關，而且有反作弊、權限、稽核、例外與回滾。</p>
	<span class="pill">Owner: AI governance owner</span><span class="pill">Due: D7 metric gate pilot</span><span class="pill">Acceptance: blocked metric cannot be selected</span>
	</div>
	<div class="card">
	<h2>人會拿它做什麼</h2>
	<p>主管提交 AI adoption metric 時，系統判斷是 <strong>blocked</strong>、<strong>diagnostic-only</strong> 或 <strong>approved outcome metric</strong>；員工看到的不是「誰燒最多 Token」，而是「哪個 workflow 真的省時、提質、降風險」。</p>
	</div>
	</section>

	<section class="card">
	<h2>D1 / D7 / D14 / D30</h2>
	<div class="grid four">
	<div class="card gate"><h3>D1</h3><p>建立 blocked metric registry：Token consumption、prompt count、usage leaderboard 預設不得用於績效。</p></div>
	<div class="card gate"><h3>D7</h3><p>完成 metric gate pilot：3 個 AI workflow 都需有 baseline、outcome、quality rubric 與 owner。</p></div>
	<div class="card gate"><h3>D14</h3><p>加入 gaming detector：高 token 成本但 outcome 無提升、重複自動化 loop、低品質高用量。</p></div>
	<div class="card gate"><h3>D30</h3><p>接入 PLS Operating Console，將 blocked metric selector、exception audit、eval report 寫進後台流程。</p></div>
	</div>
	</section>

	<section class="card">
	<h2>Purpose-to-Purpose E2E</h2>
	<div class="grid flow">
	<div class="step"><strong>原始目的</strong>避免 Tokenmaxxing 讓 AI 採用指標失真。</div>
	<div class="step"><strong>產出物</strong>Metric gate、schema、API、驗收、people sync、decision record。</div>
	<div class="step"><strong>人採用</strong>主管用 outcome scorecard 決策，員工不用為了榜單刷用量。</div>
	<div class="step"><strong>指標改善</strong>AI 成本與完成率、品質、滿意度、風險下降連動。</div>
	<div class="step"><strong>錢路徑</strong>停掉高成本低價值用法，把預算導向能省工時或創收的 workflow。</div>
	</div>
	</section>

	<section class="grid two">
	<div class="card">
	<h2>Metric Gate Rules</h2>
	<table>
	<thead><tr><th>指標</th><th>狀態</th><th>准入規則</th></tr></thead>
	<tbody>
	<tr><td>Token consumption</td><td class="badcell">Blocked</td><td>只能作成本診斷，不得進個人績效、排行、獎金、裁員篩選。</td></tr>
	<tr><td>Prompt count</td><td class="badcell">Blocked</td><td>可作 troubleshooting，不得當 productivity proxy。</td></tr>
	<tr><td>Tool active days</td><td class="warncell">Diagnostic-only</td><td>只代表 adoption exposure，需搭配 outcome 才能進 scorecard。</td></tr>
	<tr><td>Task completion rate</td><td class="goodcell">Approved</td><td>需定義 done、review sample、baseline、例外處理。</td></tr>
	<tr><td>Validated time saved</td><td class="goodcell">Approved</td><td>需 workflow baseline、抽樣驗證、不可只自報。</td></tr>
	<tr><td>Quality / defect rate</td><td class="goodcell">Approved</td><td>需 rubric、reviewer、客戶或內部品質證據。</td></tr>
	</tbody>
	</table>
	</div>
	<div class="card">
	<h2>Production Stack</h2>
	<p><strong>資料表：</strong><code>metric_registry</code>, <code>metric_gate_reviews</code>, <code>workflow_outcome_snapshots</code>, <code>token_cost_diagnostics</code>, <code>gaming_signals</code>, <code>metric_exceptions</code>, <code>audit_events</code>.</p>
	<p><strong>API：</strong><code>POST /metric-gate/proposals</code>, <code>POST /metric-gate/reviews</code>, <code>GET /metric-gate/scorecard</code>, <code>POST /metric-gate/exceptions</code>.</p>
	<p><strong>權限：</strong>team owner 可提案；governance owner 可批准；finance 只看成本診斷；HR/performance admin 不可選 blocked metric。</p>
	<p><strong>稽核：</strong>每次 metric status change、exception approval、scorecard export 都寫入 <code>audit_events</code>。</p>
	</div>
	</section>

	<section class="grid three">
	<div class="card allowed"><h2>價值 / 錢路徑</h2><p>防止 AI 預算被「活動量」吞掉，將成本連到完成率、節省時間、缺陷下降、客戶滿意與收入或風險減少。</p></div>
	<div class="card allowed"><h2>人的能力提升</h2><p>管理者學會設計不可被輕易 gaming 的 AI 指標；員工知道 AI 採用是為了更好的成果，不是燒 Token。</p></div>
	<div class="card allowed"><h2>下一輪升級</h2><p>把 metric gate 做進 Operating Console UI：指標提案、blocked selector、exception approval、gaming alert。</p></div>
	</section>

	<section class="card source">
	<h2>外部市場成熟度輸入</h2>
	<p>TechRadar 報導 Amazon 員工因 AI usage leaderboard 與 token 追蹤出現 tokenmaxxing 行為，顯示「用量排行」會產生錯誤誘因：<a href="https://www.techradar.com/pro/amazon-workers-are-apparently-tokenmaxxing-ai-platforms-to-hit-arbitrary-usage-targets">TechRadar, 2026-05-14</a>.</p>
	<p>Indeed 的 AI productivity 觀點主張應看 business outcomes，而不是 usage 或 token counts 這類 proxy：<a href="https://www.indeed.com/news/releases/measuring-ai-productivity-business-outcomes?co=US">Indeed, 2026</a>.</p>
	<p>SPACE framework 與 DORA metrics 是可比成熟做法：平衡多維工程效能與交付可靠性，而不是單一活動量指標。<a href="https://space-framework.com/">SPACE</a> / <a href="https://dora.dev/guides/dora-metrics/">DORA</a></p>
	</section>
	</main>
	</body>
	</html>