esz135888 · May 24, 2026 02:24
diff --git a/acceptance-tests.md b/acceptance-tests.md
diff --git a/artifact-url-or-pr.md b/artifact-url-or-pr.md
diff --git a/data-model.md b/data-model.md
diff --git a/decision-record.md b/decision-record.md
diff --git a/e2e-verification.md b/e2e-verification.md
diff --git a/learning-memory.json b/learning-memory.json
 {
  "project": "AI 自建專案：亞馬遜 Tokenmaxxing 案例提醒",
  "job_id": "d76edb7c-71f9-4bb9-8399-19776c74c1c6",
  "selected_solution": "governance/eval/scorecard",
  "learned_signal": "Using token consumption to measure AI benefit causes meaningless metric gaming; value metrics must focus on actual outcomes.",
  "market_learning": "Mature productivity measurement avoids simple activity metrics and balances outcome, quality, satisfaction, flow, stability, and cost diagnostics.",
  "next_run_bias": "Block token-based performance metrics by default; allow token count only as diagnostic cost data.",
  "must_check_next": [
    "Are token metrics used only for finance/cost diagnostics?",
    "Are prompt count and usage rankings excluded from performance decisions?",
    "Does each value metric have baseline and evidence?",
    "Are gaming alerts connected to workflow outcomes?"
  ]
 }

diff --git a/market-maturity.md b/market-maturity.md
diff --git a/people-sync.md b/people-sync.md
diff --git a/production-brief.md b/production-brief.md
diff --git a/production-readiness.md b/production-readiness.md
diff --git a/skill-usage.md b/skill-usage.md
diff --git a/solution-selection.md b/solution-selection.md
diff --git a/tokenmaxxing-value-metrics-guardrail.html b/tokenmaxxing-value-metrics-guardrail.html
 <!doctype html>
 <html lang="zh-Hant">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Tokenmaxxing 價值指標防護作戰台</title>
  <style>
    :root{--ink:#17202a;--muted:#617080;--line:#d9e1e8;--paper:#f6f8fb;--card:#fff;--blue:#1d4ed8;--green:#0f7f5c;--amber:#a16207;--red:#b3361d;--teal:#0f766e}
    *{box-sizing:border-box}body{margin:0;background:var(--paper);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
    header{background:#fff;border-bottom:1px solid var(--line);padding:28px clamp(20px,4vw,56px)}main{padding:24px clamp(20px,4vw,56px) 48px}
    h1{margin:0 0 12px;font-size:clamp(30px,4vw,52px);line-height:1.05;max-width:1100px}h2{margin:0 0 12px;font-size:22px}h3{margin:0 0 6px;font-size:16px}p{margin-top:0}code{background:#eef3f8;padding:1px 5px;border-radius:4px}
    .sub{max-width:1080px;color:var(--muted);font-size:17px}.grid{display:grid;gap:16px}.kpis{grid-template-columns:repeat(4,minmax(0,1fr));margin-top:22px}.two{grid-template-columns:1.08fr .92fr}.three{grid-template-columns:repeat(3,minmax(0,1fr))}.timeline{grid-template-columns:repeat(4,minmax(0,1fr))}.flow{grid-template-columns:repeat(5,minmax(0,1fr))}
    .card{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:18px;box-shadow:0 1px 2px rgba(23,32,42,.04)}.metric{font-size:34px;font-weight:780}.label{color:var(--muted);font-size:13px}
    .pill{display:inline-flex;border:1px solid var(--line);border-radius:999px;padding:4px 10px;font-size:12px;background:#fff;margin:0 6px 8px 0;white-space:nowrap}.ok{color:var(--green)}.warn{color:var(--amber)}.bad{color:var(--red)}.info{color:var(--blue)}
    table{width:100%;border-collapse:collapse;font-size:14px}th,td{text-align:left;padding:10px;border-bottom:1px solid var(--line);vertical-align:top}th{color:var(--muted);font-size:12px;text-transform:uppercase}.badcell{color:var(--red);font-weight:700}.goodcell{color:var(--green);font-weight:700}
    .day{border-left:4px solid var(--teal)}.step{border:1px solid var(--line);border-radius:8px;padding:12px;min-height:126px;background:#fbfdff}.step strong{display:block;color:var(--teal);margin-bottom:6px}.source a{color:var(--blue);word-break:break-word}
    @media(max-width:920px){.kpis,.two,.three,.timeline,.flow{grid-template-columns:1fr}h1{font-size:34px}}
  </style>
 </head>
 <body>
  <header>
    <span class="pill info">PLS production delivery pack</span><span class="pill ok">Solution: governance / eval / scorecard</span>
    <h1>Tokenmaxxing 價值指標防護作戰台</h1>
    <p class="sub">把「亞馬遜 Tokenmaxxing 案例提醒」變成可執行的 AI 指標防護系統：禁止用 Token 消耗量代表 AI 效益，將評估重心轉向實際產出價值、任務完成、節省時間、品質與客戶滿意，並建立 gaming signal 與治理例外流程。</p>
    <section class="grid kpis">
      <div class="card"><div class="metric bad">0</div><div class="label">Token 用量不得進績效排名</div></div>
      <div class="card"><div class="metric ok">4</div><div class="label">核心價值指標：完成、時間、品質、滿意</div></div>
      <div class="card"><div class="metric">D7</div><div class="label">完成 AI value scorecard v1</div></div>
      <div class="card"><div class="metric">D30</div><div class="label">導入 governance gate 與異常偵測</div></div>
    </section>
  </header>
  <main class="grid">
    <section class="grid two">
      <div class="card">
        <h2>本輪問題</h2>
        <p>Tokenmaxxing 的風險是把成本或活動量誤當價值。當公司用 Token 消耗量衡量 AI 使用效益，員工會被誘導去刷 prompt、開工具、製造看似繁忙的 AI 用量，最後增加成本、降低信任，卻沒有提升客戶或任務結果。</p>
        <span class="pill">Owner: AI governance owner</span><span class="pill">Due: D7 value scorecard</span><span class="pill">Acceptance: token metric blocked</span>
      </div>
      <div class="card">
        <h2>解法選型</h2>
        <p><strong>governance / eval / scorecard</strong>。這不是單純教育宣導，而是管理制度設計；必須有指標准入、反作弊訊號、例外審批、資料模型與驗收規則。</p>
      </div>
    </section>

    <section class="card">
      <h2>D1 / D7 / D14 / D30 路徑</h2>
      <div class="grid timeline">
        <div class="card day"><h3>D1</h3><p>定義禁止指標、替代價值指標、資料欄位、反作弊訊號與 owner。</p></div>
        <div class="card day"><h3>D7</h3><p>完成 AI value scorecard v1，套用到 3 個實際 AI 工作流，建立 baseline。</p></div>
        <div class="card day"><h3>D14</h3><p>導入 gaming detection：Token 暴增但任務/品質不升、工具啟動暴增但客戶滿意不升。</p></div>
        <div class="card day"><h3>D30</h3><p>把 value scorecard 接進 Operating Console 或 PLS 後台，支援預算、考核、加碼/停止決策。</p></div>
      </div>
    </section>

    <section class="card">
      <h2>Purpose-to-Purpose E2E</h2>
      <div class="grid flow">
        <div class="step"><strong>原始目的</strong>避免 AI 使用效益被 Token 消耗量綁架。</div>
        <div class="step"><strong>產出物</strong>價值指標防護台、metric registry、gaming signal、驗收規則。</div>
        <div class="step"><strong>人採用</strong>主管用 outcome metrics 評估，員工專注完成有價值任務。</div>
        <div class="step"><strong>指標改善</strong>成本浪費下降、完成率上升、品質提升、客戶滿意改善。</div>
        <div class="step"><strong>錢路徑</strong>AI 預算流向有效 workflow，停掉只燒 Token 不創值的使用場景。</div>
      </div>
    </section>

    <section class="grid two">
      <div class="card">
        <h2>指標防護規則</h2>
        <table>
          <thead><tr><th>Metric</th><th>Policy</th><th>Production Rule</th></tr></thead>
          <tbody>
            <tr><td>Token consumption</td><td class="badcell">Blocked for performance</td><td>只能作成本診斷，不得用於績效、排行或獎金。</td></tr>
            <tr><td>Prompt count</td><td class="badcell">Blocked</td><td>高 prompt count 不代表成果；只可作 debug signal。</td></tr>
            <tr><td>AI usage leaderboard</td><td class="badcell">Blocked</td><td>禁止個人排行；改看 team outcome。</td></tr>
            <tr><td>Task completion rate</td><td class="goodcell">Allowed</td><td>需有完成定義、驗收證據與品質門檻。</td></tr>
            <tr><td>Actual time saved</td><td class="goodcell">Allowed</td><td>需 baseline 與抽樣驗證，不能自報即算。</td></tr>
            <tr><td>Output quality score</td><td class="goodcell">Allowed</td><td>需 rubric、reviewer 與樣本數。</td></tr>
            <tr><td>Customer satisfaction</td><td class="goodcell">Allowed</td><td>需連到具體 workflow 和客戶回饋來源。</td></tr>
          </tbody>
        </table>
      </div>
      <div class="card">
        <h2>資料 / API / 權限</h2>
        <p><strong>Tables:</strong> <code>ai_value_metrics</code>, <code>metric_policy_rules</code>, <code>ai_workflow_outcomes</code>, <code>token_cost_diagnostics</code>, <code>gaming_alerts</code>, <code>metric_exceptions</code>.</p>
        <p><strong>APIs:</strong> <code>POST /ai-value/metrics</code>, <code>POST /ai-value/outcomes</code>, <code>GET /ai-value/scorecard</code>, <code>POST /ai-value/exceptions</code>.</p>
        <p><strong>Permissions:</strong> team owner can propose value metric; governance owner approves; finance can view token cost diagnostics; performance admins cannot use blocked metrics.</p>
      </div>
    </section>

    <section class="grid three">
      <div class="card"><h2>價值 / 錢路徑</h2><p>把 AI 成本從「燒多少 Token」改成「每筆任務創造多少成果」，能降低浪費、提升預算配置品質，並避免錯誤考核造成行為扭曲。</p></div>
      <div class="card"><h2>人的能力提升</h2><p>主管學會設計不可被輕易操弄的價值指標；員工知道 AI 使用目標是完成更好的工作，而不是刷活動量。</p></div>
      <div class="card"><h2>下一輪升級</h2><p>接 Operating Console 指標設定 UI，新增 Token metric blocker、cost diagnostic dashboard 與 gaming alert。</p></div>
    </section>

    <section class="card source">
      <h2>Market Maturity Inputs</h2>
      <p>McKinsey warns that productivity data can damage organizations if overly simple activity metrics are misused: <a href="https://www.mckinsey.com/industries/technology-media-and-telecommunications/our-insights/yes-you-can-measure-software-developer-productivity?cid=other-eml-mtg-mip-mck">McKinsey productivity measurement</a>.</p>
      <p>The SPACE framework balances satisfaction, performance, activity, communication, and efficiency to prevent over-optimization of one metric: <a href="https://space-framework.com/">SPACE framework</a>.</p>
      <p>DORA metrics focus on delivery performance and reliability rather than raw activity volume: <a href="https://dora.dev/guides/dora-metrics/">DORA metrics guide</a>.</p>
    </section>
  </main>
 </body>
 </html>
	{
	"project": "AI 自建專案：亞馬遜 Tokenmaxxing 案例提醒",
	"job_id": "d76edb7c-71f9-4bb9-8399-19776c74c1c6",
	"selected_solution": "governance/eval/scorecard",
	"learned_signal": "Using token consumption to measure AI benefit causes meaningless metric gaming; value metrics must focus on actual outcomes.",
	"market_learning": "Mature productivity measurement avoids simple activity metrics and balances outcome, quality, satisfaction, flow, stability, and cost diagnostics.",
	"next_run_bias": "Block token-based performance metrics by default; allow token count only as diagnostic cost data.",
	"must_check_next": [
	"Are token metrics used only for finance/cost diagnostics?",
	"Are prompt count and usage rankings excluded from performance decisions?",
	"Does each value metric have baseline and evidence?",
	"Are gaming alerts connected to workflow outcomes?"
	]
	}
	<!doctype html>
	<html lang="zh-Hant">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>Tokenmaxxing 價值指標防護作戰台</title>
	<style>
	:root{--ink:#17202a;--muted:#617080;--line:#d9e1e8;--paper:#f6f8fb;--card:#fff;--blue:#1d4ed8;--green:#0f7f5c;--amber:#a16207;--red:#b3361d;--teal:#0f766e}
	*{box-sizing:border-box}body{margin:0;background:var(--paper);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
	header{background:#fff;border-bottom:1px solid var(--line);padding:28px clamp(20px,4vw,56px)}main{padding:24px clamp(20px,4vw,56px) 48px}
	h1{margin:0 0 12px;font-size:clamp(30px,4vw,52px);line-height:1.05;max-width:1100px}h2{margin:0 0 12px;font-size:22px}h3{margin:0 0 6px;font-size:16px}p{margin-top:0}code{background:#eef3f8;padding:1px 5px;border-radius:4px}
	.sub{max-width:1080px;color:var(--muted);font-size:17px}.grid{display:grid;gap:16px}.kpis{grid-template-columns:repeat(4,minmax(0,1fr));margin-top:22px}.two{grid-template-columns:1.08fr .92fr}.three{grid-template-columns:repeat(3,minmax(0,1fr))}.timeline{grid-template-columns:repeat(4,minmax(0,1fr))}.flow{grid-template-columns:repeat(5,minmax(0,1fr))}
	.card{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:18px;box-shadow:0 1px 2px rgba(23,32,42,.04)}.metric{font-size:34px;font-weight:780}.label{color:var(--muted);font-size:13px}
	.pill{display:inline-flex;border:1px solid var(--line);border-radius:999px;padding:4px 10px;font-size:12px;background:#fff;margin:0 6px 8px 0;white-space:nowrap}.ok{color:var(--green)}.warn{color:var(--amber)}.bad{color:var(--red)}.info{color:var(--blue)}
	table{width:100%;border-collapse:collapse;font-size:14px}th,td{text-align:left;padding:10px;border-bottom:1px solid var(--line);vertical-align:top}th{color:var(--muted);font-size:12px;text-transform:uppercase}.badcell{color:var(--red);font-weight:700}.goodcell{color:var(--green);font-weight:700}
	.day{border-left:4px solid var(--teal)}.step{border:1px solid var(--line);border-radius:8px;padding:12px;min-height:126px;background:#fbfdff}.step strong{display:block;color:var(--teal);margin-bottom:6px}.source a{color:var(--blue);word-break:break-word}
	@media(max-width:920px){.kpis,.two,.three,.timeline,.flow{grid-template-columns:1fr}h1{font-size:34px}}
	</style>
	</head>
	<body>
	<header>
	<span class="pill info">PLS production delivery pack</span><span class="pill ok">Solution: governance / eval / scorecard</span>
	<h1>Tokenmaxxing 價值指標防護作戰台</h1>
	<p class="sub">把「亞馬遜 Tokenmaxxing 案例提醒」變成可執行的 AI 指標防護系統：禁止用 Token 消耗量代表 AI 效益，將評估重心轉向實際產出價值、任務完成、節省時間、品質與客戶滿意，並建立 gaming signal 與治理例外流程。</p>
	<section class="grid kpis">
	<div class="card"><div class="metric bad">0</div><div class="label">Token 用量不得進績效排名</div></div>
	<div class="card"><div class="metric ok">4</div><div class="label">核心價值指標：完成、時間、品質、滿意</div></div>
	<div class="card"><div class="metric">D7</div><div class="label">完成 AI value scorecard v1</div></div>
	<div class="card"><div class="metric">D30</div><div class="label">導入 governance gate 與異常偵測</div></div>
	</section>
	</header>
	<main class="grid">
	<section class="grid two">
	<div class="card">
	<h2>本輪問題</h2>
	<p>Tokenmaxxing 的風險是把成本或活動量誤當價值。當公司用 Token 消耗量衡量 AI 使用效益，員工會被誘導去刷 prompt、開工具、製造看似繁忙的 AI 用量，最後增加成本、降低信任，卻沒有提升客戶或任務結果。</p>
	<span class="pill">Owner: AI governance owner</span><span class="pill">Due: D7 value scorecard</span><span class="pill">Acceptance: token metric blocked</span>
	</div>
	<div class="card">
	<h2>解法選型</h2>
	<p><strong>governance / eval / scorecard</strong>。這不是單純教育宣導，而是管理制度設計；必須有指標准入、反作弊訊號、例外審批、資料模型與驗收規則。</p>
	</div>
	</section>

	<section class="card">
	<h2>D1 / D7 / D14 / D30 路徑</h2>
	<div class="grid timeline">
	<div class="card day"><h3>D1</h3><p>定義禁止指標、替代價值指標、資料欄位、反作弊訊號與 owner。</p></div>
	<div class="card day"><h3>D7</h3><p>完成 AI value scorecard v1，套用到 3 個實際 AI 工作流，建立 baseline。</p></div>
	<div class="card day"><h3>D14</h3><p>導入 gaming detection：Token 暴增但任務/品質不升、工具啟動暴增但客戶滿意不升。</p></div>
	<div class="card day"><h3>D30</h3><p>把 value scorecard 接進 Operating Console 或 PLS 後台，支援預算、考核、加碼/停止決策。</p></div>
	</div>
	</section>

	<section class="card">
	<h2>Purpose-to-Purpose E2E</h2>
	<div class="grid flow">
	<div class="step"><strong>原始目的</strong>避免 AI 使用效益被 Token 消耗量綁架。</div>
	<div class="step"><strong>產出物</strong>價值指標防護台、metric registry、gaming signal、驗收規則。</div>
	<div class="step"><strong>人採用</strong>主管用 outcome metrics 評估，員工專注完成有價值任務。</div>
	<div class="step"><strong>指標改善</strong>成本浪費下降、完成率上升、品質提升、客戶滿意改善。</div>
	<div class="step"><strong>錢路徑</strong>AI 預算流向有效 workflow，停掉只燒 Token 不創值的使用場景。</div>
	</div>
	</section>

	<section class="grid two">
	<div class="card">
	<h2>指標防護規則</h2>
	<table>
	<thead><tr><th>Metric</th><th>Policy</th><th>Production Rule</th></tr></thead>
	<tbody>
	<tr><td>Token consumption</td><td class="badcell">Blocked for performance</td><td>只能作成本診斷，不得用於績效、排行或獎金。</td></tr>
	<tr><td>Prompt count</td><td class="badcell">Blocked</td><td>高 prompt count 不代表成果；只可作 debug signal。</td></tr>
	<tr><td>AI usage leaderboard</td><td class="badcell">Blocked</td><td>禁止個人排行；改看 team outcome。</td></tr>
	<tr><td>Task completion rate</td><td class="goodcell">Allowed</td><td>需有完成定義、驗收證據與品質門檻。</td></tr>
	<tr><td>Actual time saved</td><td class="goodcell">Allowed</td><td>需 baseline 與抽樣驗證，不能自報即算。</td></tr>
	<tr><td>Output quality score</td><td class="goodcell">Allowed</td><td>需 rubric、reviewer 與樣本數。</td></tr>
	<tr><td>Customer satisfaction</td><td class="goodcell">Allowed</td><td>需連到具體 workflow 和客戶回饋來源。</td></tr>
	</tbody>
	</table>
	</div>
	<div class="card">
	<h2>資料 / API / 權限</h2>
	<p><strong>Tables:</strong> <code>ai_value_metrics</code>, <code>metric_policy_rules</code>, <code>ai_workflow_outcomes</code>, <code>token_cost_diagnostics</code>, <code>gaming_alerts</code>, <code>metric_exceptions</code>.</p>
	<p><strong>APIs:</strong> <code>POST /ai-value/metrics</code>, <code>POST /ai-value/outcomes</code>, <code>GET /ai-value/scorecard</code>, <code>POST /ai-value/exceptions</code>.</p>
	<p><strong>Permissions:</strong> team owner can propose value metric; governance owner approves; finance can view token cost diagnostics; performance admins cannot use blocked metrics.</p>
	</div>
	</section>

	<section class="grid three">
	<div class="card"><h2>價值 / 錢路徑</h2><p>把 AI 成本從「燒多少 Token」改成「每筆任務創造多少成果」，能降低浪費、提升預算配置品質，並避免錯誤考核造成行為扭曲。</p></div>
	<div class="card"><h2>人的能力提升</h2><p>主管學會設計不可被輕易操弄的價值指標；員工知道 AI 使用目標是完成更好的工作，而不是刷活動量。</p></div>
	<div class="card"><h2>下一輪升級</h2><p>接 Operating Console 指標設定 UI，新增 Token metric blocker、cost diagnostic dashboard 與 gaming alert。</p></div>
	</section>

	<section class="card source">
	<h2>Market Maturity Inputs</h2>
	<p>McKinsey warns that productivity data can damage organizations if overly simple activity metrics are misused: <a href="https://www.mckinsey.com/industries/technology-media-and-telecommunications/our-insights/yes-you-can-measure-software-developer-productivity?cid=other-eml-mtg-mip-mck">McKinsey productivity measurement</a>.</p>
	<p>The SPACE framework balances satisfaction, performance, activity, communication, and efficiency to prevent over-optimization of one metric: <a href="https://space-framework.com/">SPACE framework</a>.</p>
	<p>DORA metrics focus on delivery performance and reliability rather than raw activity volume: <a href="https://dora.dev/guides/dora-metrics/">DORA metrics guide</a>.</p>
	</section>
	</main>
	</body>
	</html>