esz135888 · May 24, 2026 02:13
diff --git a/acceptance-tests.md b/acceptance-tests.md
diff --git a/anti-tokenmaxxing-metric-governance-console.html b/anti-tokenmaxxing-metric-governance-console.html
 <!doctype html>
 <html lang="zh-Hant">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Operating Console 反 Tokenmaxxing 指標治理台</title>
  <style>
    :root{--ink:#18212f;--muted:#627083;--line:#d9e1e8;--paper:#f6f8fb;--card:#fff;--blue:#1d4ed8;--green:#0f7f5c;--amber:#a16207;--red:#b3361d;--violet:#6d28d9}
    *{box-sizing:border-box}body{margin:0;background:var(--paper);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
    header{background:#fff;border-bottom:1px solid var(--line);padding:28px clamp(20px,4vw,56px)}main{padding:24px clamp(20px,4vw,56px) 48px}
    h1{margin:0 0 12px;font-size:clamp(30px,4vw,52px);line-height:1.05;max-width:1080px}h2{margin:0 0 12px;font-size:22px}h3{margin:0 0 6px;font-size:16px}p{margin-top:0}code{background:#eef3f8;padding:1px 5px;border-radius:4px}
    .sub{max-width:1080px;color:var(--muted);font-size:17px}.grid{display:grid;gap:16px}.kpis{grid-template-columns:repeat(4,minmax(0,1fr));margin-top:22px}.two{grid-template-columns:1.08fr .92fr}.three{grid-template-columns:repeat(3,minmax(0,1fr))}.timeline{grid-template-columns:repeat(4,minmax(0,1fr))}.flow{grid-template-columns:repeat(5,minmax(0,1fr))}
    .card{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:18px;box-shadow:0 1px 2px rgba(24,33,47,.04)}.metric{font-size:34px;font-weight:780}.label{color:var(--muted);font-size:13px}
    .pill{display:inline-flex;border:1px solid var(--line);border-radius:999px;padding:4px 10px;font-size:12px;background:#fff;margin:0 6px 8px 0;white-space:nowrap}.ok{color:var(--green)}.warn{color:var(--amber)}.bad{color:var(--red)}.info{color:var(--blue)}
    table{width:100%;border-collapse:collapse;font-size:14px}th,td{text-align:left;padding:10px;border-bottom:1px solid var(--line);vertical-align:top}th{color:var(--muted);font-size:12px;text-transform:uppercase}.badcell{color:var(--red);font-weight:700}.goodcell{color:var(--green);font-weight:700}
    .day{border-left:4px solid var(--violet)}.step{border:1px solid var(--line);border-radius:8px;padding:12px;min-height:126px;background:#fbfdff}.step strong{display:block;color:var(--violet);margin-bottom:6px}.source a{color:var(--blue);word-break:break-word}
    @media(max-width:920px){.kpis,.two,.three,.timeline,.flow{grid-template-columns:1fr}h1{font-size:34px}}
  </style>
 </head>
 <body>
  <header>
    <span class="pill info">PLS production delivery pack</span><span class="pill ok">Solution: governance / eval / dashboard</span>
    <h1>Operating Console 反 Tokenmaxxing 指標治理台</h1>
    <p class="sub">把「反 Tokenmaxxing」從文件章節升級成可驗收的指標治理系統：明確禁止 Token 消耗數量、工具啟動次數、AI 使用人數排行等誘發灌水的活動指標，改用任務完成率、實際節省時間、客戶滿意度、產出品質評分與反作弊稽核。</p>
    <section class="grid kpis">
      <div class="card"><div class="metric bad">3</div><div class="label">禁止指標：Token、啟動次數、使用排行</div></div>
      <div class="card"><div class="metric ok">4</div><div class="label">替代指標：完成率、節省時間、CSAT、品質</div></div>
      <div class="card"><div class="metric">D7</div><div class="label">完成第一版 metric registry 與 gate</div></div>
      <div class="card"><div class="metric">D30</div><div class="label">接進 Operating Console 考核治理</div></div>
    </section>
  </header>
  <main class="grid">
    <section class="grid two">
      <div class="card">
        <h2>本輪問題</h2>
        <p>Operating Console 若用 Token 消耗、工具啟動次數、AI 使用人數排行當成績效，會把人推向「看起來很 AI」而不是「真的完成任務」。這是典型 Goodhart's law：一旦指標成為目標，它就會失去衡量價值。</p>
        <span class="pill">Owner: Operating Console owner</span><span class="pill">Due: D7 metric registry</span><span class="pill">Acceptance: bad metrics blocked</span>
      </div>
      <div class="card">
        <h2>解法選型</h2>
        <p><strong>governance / eval / dashboard</strong>。這不是單篇規格補充，而是考核制度風險。需要 metric registry、禁止清單、替代指標、review workflow、稽核和例外批准。</p>
      </div>
    </section>

    <section class="card">
      <h2>D1 / D7 / D14 / D30 路徑</h2>
      <div class="grid timeline">
        <div class="card day"><h3>D1</h3><p>建立禁止指標與替代指標 registry，定義每個指標的 owner、公式、資料源、反作弊檢查。</p></div>
        <div class="card day"><h3>D7</h3><p>在 Operating Console 指標設定加入 governance gate，bad metric 不得上線，例外需 decision record。</p></div>
        <div class="card day"><h3>D14</h3><p>接 3 個實際 AI 工作流，用 outcome metrics 驗證儀表板是否能反映真價值。</p></div>
        <div class="card day"><h3>D30</h3><p>形成 AI performance governance：指標、品質、客戶滿意、節省時間、稽核異常同表決策。</p></div>
      </div>
    </section>

    <section class="card">
      <h2>Purpose-to-Purpose E2E</h2>
      <div class="grid flow">
        <div class="step"><strong>原始目的</strong>Operating Console 要衡量 AI 對業務的真實價值。</div>
        <div class="step"><strong>風險</strong>Tokenmaxxing 把人推向增加消耗與表演式使用。</div>
        <div class="step"><strong>治理</strong>metric registry 阻擋壞指標，替代成 outcome/quality/time/customer metrics。</div>
        <div class="step"><strong>採用</strong>主管用可驗證結果考核；員工專注完成任務與提升品質。</div>
        <div class="step"><strong>結果</strong>降低浪費、提升任務完成率、改善客戶滿意、避免制度誘發錯誤行為。</div>
      </div>
    </section>

    <section class="grid two">
      <div class="card">
        <h2>Metric Registry Gate</h2>
        <table>
          <thead><tr><th>Metric</th><th>Status</th><th>Reason / Replacement</th></tr></thead>
          <tbody>
            <tr><td>Token consumed</td><td class="badcell">Blocked</td><td>誘發灌水與低效率；替代為 task completed per verified outcome。</td></tr>
            <tr><td>Tool launches</td><td class="badcell">Blocked</td><td>啟動不等於採用；替代為 workflow completion rate。</td></tr>
            <tr><td>AI usage leaderboard</td><td class="badcell">Blocked</td><td>誘發排名焦慮和表演；替代為 team outcome score。</td></tr>
            <tr><td>Task completion rate</td><td class="goodcell">Allowed</td><td>需定義任務完成證據與品質門檻。</td></tr>
            <tr><td>Actual time saved</td><td class="goodcell">Allowed</td><td>需 baseline 與抽樣驗證。</td></tr>
            <tr><td>Customer satisfaction</td><td class="goodcell">Allowed</td><td>需與 AI-assisted workflow 連結，避免單點歸因。</td></tr>
            <tr><td>Output quality score</td><td class="goodcell">Allowed</td><td>需 rubric、reviewer、sample size 與異議流程。</td></tr>
          </tbody>
        </table>
      </div>
      <div class="card">
        <h2>資料 / API / 權限</h2>
        <p><strong>Tables:</strong> <code>metric_registry</code>, <code>metric_reviews</code>, <code>metric_observations</code>, <code>gaming_signals</code>, <code>governance_exceptions</code>.</p>
        <p><strong>APIs:</strong> <code>POST /console/metrics/register</code>, <code>POST /console/metrics/:id/review</code>, <code>GET /console/metrics/governance-scorecard</code>.</p>
        <p><strong>Permissions:</strong> team owner can propose metrics; governance owner approves; Louis can override with reason; blocked metrics require exception audit.</p>
      </div>
    </section>

    <section class="grid three">
      <div class="card"><h2>價值 / 錢路徑</h2><p>避免把預算花在 Token 和工具啟動次數上，將投資導向節省時間、提升品質、客戶滿意和任務完成，降低制度性浪費。</p></div>
      <div class="card"><h2>人的能力提升</h2><p>主管學會設計不易被操弄的指標；員工知道 AI 使用的目標是交付成果，而不是堆活動量。</p></div>
      <div class="card"><h2>下一輪升級</h2><p>接實際 Operating Console 指標設定 UI，加入 bad metric blocker、metric review workflow 和 gaming alert。</p></div>
    </section>

    <section class="card source">
      <h2>Market Maturity Inputs</h2>
      <p>McKinsey notes productivity data can damage organizations if simple activity metrics such as lines of code or commit counts are misused: <a href="https://www.mckinsey.com/industries/technology-media-and-telecommunications/our-insights/yes-you-can-measure-software-developer-productivity?cid=other-eml-mtg-mip-mck">McKinsey developer productivity measurement</a>.</p>
      <p>The SPACE framework balances Satisfaction, Performance, Activity, Communication, and Efficiency to avoid over-optimizing one visible activity metric: <a href="https://space-framework.com/">SPACE framework</a>.</p>
      <p>DORA metrics connect delivery performance with reliability and stability rather than raw activity volume: <a href="https://dora.dev/guides/dora-metrics/">DORA metrics guide</a>.</p>
    </section>
  </main>
 </body>
 </html>
diff --git a/artifact-url-or-pr.md b/artifact-url-or-pr.md
diff --git a/data-model.md b/data-model.md
diff --git a/decision-record.md b/decision-record.md
diff --git a/e2e-verification.md b/e2e-verification.md
diff --git a/learning-memory.json b/learning-memory.json
 {
  "project": "AI 自建專案：Operating Console 工具規格",
  "job_id": "a766341b-1d53-4a61-9363-e6c74790cea2",
  "selected_solution": "governance/eval/dashboard",
  "learned_signal": "Operating Console spec added anti-Tokenmaxxing guidance: do not use token consumption, tool launch count, or AI usage leaderboard as performance metrics.",
  "market_learning": "Mature productivity measurement avoids simple activity metrics and uses balanced outcome, quality, satisfaction, flow, and reliability metrics.",
  "next_run_bias": "Treat metric design as incentive governance; block bad metrics before they reach scorecards.",
  "must_check_next": [
    "Is token consumption used only for cost diagnostics?",
    "Are activity metrics excluded from performance evaluation?",
    "Does each approved metric have formula, data source, owner, baseline, quality gate, and anti-gaming check?",
    "Who can approve metric exceptions?"
  ]
 }

diff --git a/market-maturity.md b/market-maturity.md
diff --git a/people-sync.md b/people-sync.md
diff --git a/production-brief.md b/production-brief.md
diff --git a/production-readiness.md b/production-readiness.md
diff --git a/skill-usage.md b/skill-usage.md
diff --git a/solution-selection.md b/solution-selection.md
	<!doctype html>
	<html lang="zh-Hant">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>Operating Console 反 Tokenmaxxing 指標治理台</title>
	<style>
	:root{--ink:#18212f;--muted:#627083;--line:#d9e1e8;--paper:#f6f8fb;--card:#fff;--blue:#1d4ed8;--green:#0f7f5c;--amber:#a16207;--red:#b3361d;--violet:#6d28d9}
	*{box-sizing:border-box}body{margin:0;background:var(--paper);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
	header{background:#fff;border-bottom:1px solid var(--line);padding:28px clamp(20px,4vw,56px)}main{padding:24px clamp(20px,4vw,56px) 48px}
	h1{margin:0 0 12px;font-size:clamp(30px,4vw,52px);line-height:1.05;max-width:1080px}h2{margin:0 0 12px;font-size:22px}h3{margin:0 0 6px;font-size:16px}p{margin-top:0}code{background:#eef3f8;padding:1px 5px;border-radius:4px}
	.sub{max-width:1080px;color:var(--muted);font-size:17px}.grid{display:grid;gap:16px}.kpis{grid-template-columns:repeat(4,minmax(0,1fr));margin-top:22px}.two{grid-template-columns:1.08fr .92fr}.three{grid-template-columns:repeat(3,minmax(0,1fr))}.timeline{grid-template-columns:repeat(4,minmax(0,1fr))}.flow{grid-template-columns:repeat(5,minmax(0,1fr))}
	.card{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:18px;box-shadow:0 1px 2px rgba(24,33,47,.04)}.metric{font-size:34px;font-weight:780}.label{color:var(--muted);font-size:13px}
	.pill{display:inline-flex;border:1px solid var(--line);border-radius:999px;padding:4px 10px;font-size:12px;background:#fff;margin:0 6px 8px 0;white-space:nowrap}.ok{color:var(--green)}.warn{color:var(--amber)}.bad{color:var(--red)}.info{color:var(--blue)}
	table{width:100%;border-collapse:collapse;font-size:14px}th,td{text-align:left;padding:10px;border-bottom:1px solid var(--line);vertical-align:top}th{color:var(--muted);font-size:12px;text-transform:uppercase}.badcell{color:var(--red);font-weight:700}.goodcell{color:var(--green);font-weight:700}
	.day{border-left:4px solid var(--violet)}.step{border:1px solid var(--line);border-radius:8px;padding:12px;min-height:126px;background:#fbfdff}.step strong{display:block;color:var(--violet);margin-bottom:6px}.source a{color:var(--blue);word-break:break-word}
	@media(max-width:920px){.kpis,.two,.three,.timeline,.flow{grid-template-columns:1fr}h1{font-size:34px}}
	</style>
	</head>
	<body>
	<header>
	<span class="pill info">PLS production delivery pack</span><span class="pill ok">Solution: governance / eval / dashboard</span>
	<h1>Operating Console 反 Tokenmaxxing 指標治理台</h1>
	<p class="sub">把「反 Tokenmaxxing」從文件章節升級成可驗收的指標治理系統：明確禁止 Token 消耗數量、工具啟動次數、AI 使用人數排行等誘發灌水的活動指標，改用任務完成率、實際節省時間、客戶滿意度、產出品質評分與反作弊稽核。</p>
	<section class="grid kpis">
	<div class="card"><div class="metric bad">3</div><div class="label">禁止指標：Token、啟動次數、使用排行</div></div>
	<div class="card"><div class="metric ok">4</div><div class="label">替代指標：完成率、節省時間、CSAT、品質</div></div>
	<div class="card"><div class="metric">D7</div><div class="label">完成第一版 metric registry 與 gate</div></div>
	<div class="card"><div class="metric">D30</div><div class="label">接進 Operating Console 考核治理</div></div>
	</section>
	</header>
	<main class="grid">
	<section class="grid two">
	<div class="card">
	<h2>本輪問題</h2>
	<p>Operating Console 若用 Token 消耗、工具啟動次數、AI 使用人數排行當成績效，會把人推向「看起來很 AI」而不是「真的完成任務」。這是典型 Goodhart's law：一旦指標成為目標，它就會失去衡量價值。</p>
	<span class="pill">Owner: Operating Console owner</span><span class="pill">Due: D7 metric registry</span><span class="pill">Acceptance: bad metrics blocked</span>
	</div>
	<div class="card">
	<h2>解法選型</h2>
	<p><strong>governance / eval / dashboard</strong>。這不是單篇規格補充，而是考核制度風險。需要 metric registry、禁止清單、替代指標、review workflow、稽核和例外批准。</p>
	</div>
	</section>

	<section class="card">
	<h2>D1 / D7 / D14 / D30 路徑</h2>
	<div class="grid timeline">
	<div class="card day"><h3>D1</h3><p>建立禁止指標與替代指標 registry，定義每個指標的 owner、公式、資料源、反作弊檢查。</p></div>
	<div class="card day"><h3>D7</h3><p>在 Operating Console 指標設定加入 governance gate，bad metric 不得上線，例外需 decision record。</p></div>
	<div class="card day"><h3>D14</h3><p>接 3 個實際 AI 工作流，用 outcome metrics 驗證儀表板是否能反映真價值。</p></div>
	<div class="card day"><h3>D30</h3><p>形成 AI performance governance：指標、品質、客戶滿意、節省時間、稽核異常同表決策。</p></div>
	</div>
	</section>

	<section class="card">
	<h2>Purpose-to-Purpose E2E</h2>
	<div class="grid flow">
	<div class="step"><strong>原始目的</strong>Operating Console 要衡量 AI 對業務的真實價值。</div>
	<div class="step"><strong>風險</strong>Tokenmaxxing 把人推向增加消耗與表演式使用。</div>
	<div class="step"><strong>治理</strong>metric registry 阻擋壞指標，替代成 outcome/quality/time/customer metrics。</div>
	<div class="step"><strong>採用</strong>主管用可驗證結果考核；員工專注完成任務與提升品質。</div>
	<div class="step"><strong>結果</strong>降低浪費、提升任務完成率、改善客戶滿意、避免制度誘發錯誤行為。</div>
	</div>
	</section>

	<section class="grid two">
	<div class="card">
	<h2>Metric Registry Gate</h2>
	<table>
	<thead><tr><th>Metric</th><th>Status</th><th>Reason / Replacement</th></tr></thead>
	<tbody>
	<tr><td>Token consumed</td><td class="badcell">Blocked</td><td>誘發灌水與低效率；替代為 task completed per verified outcome。</td></tr>
	<tr><td>Tool launches</td><td class="badcell">Blocked</td><td>啟動不等於採用；替代為 workflow completion rate。</td></tr>
	<tr><td>AI usage leaderboard</td><td class="badcell">Blocked</td><td>誘發排名焦慮和表演；替代為 team outcome score。</td></tr>
	<tr><td>Task completion rate</td><td class="goodcell">Allowed</td><td>需定義任務完成證據與品質門檻。</td></tr>
	<tr><td>Actual time saved</td><td class="goodcell">Allowed</td><td>需 baseline 與抽樣驗證。</td></tr>
	<tr><td>Customer satisfaction</td><td class="goodcell">Allowed</td><td>需與 AI-assisted workflow 連結，避免單點歸因。</td></tr>
	<tr><td>Output quality score</td><td class="goodcell">Allowed</td><td>需 rubric、reviewer、sample size 與異議流程。</td></tr>
	</tbody>
	</table>
	</div>
	<div class="card">
	<h2>資料 / API / 權限</h2>
	<p><strong>Tables:</strong> <code>metric_registry</code>, <code>metric_reviews</code>, <code>metric_observations</code>, <code>gaming_signals</code>, <code>governance_exceptions</code>.</p>
	<p><strong>APIs:</strong> <code>POST /console/metrics/register</code>, <code>POST /console/metrics/:id/review</code>, <code>GET /console/metrics/governance-scorecard</code>.</p>
	<p><strong>Permissions:</strong> team owner can propose metrics; governance owner approves; Louis can override with reason; blocked metrics require exception audit.</p>
	</div>
	</section>

	<section class="grid three">
	<div class="card"><h2>價值 / 錢路徑</h2><p>避免把預算花在 Token 和工具啟動次數上，將投資導向節省時間、提升品質、客戶滿意和任務完成，降低制度性浪費。</p></div>
	<div class="card"><h2>人的能力提升</h2><p>主管學會設計不易被操弄的指標；員工知道 AI 使用的目標是交付成果，而不是堆活動量。</p></div>
	<div class="card"><h2>下一輪升級</h2><p>接實際 Operating Console 指標設定 UI，加入 bad metric blocker、metric review workflow 和 gaming alert。</p></div>
	</section>

	<section class="card source">
	<h2>Market Maturity Inputs</h2>
	<p>McKinsey notes productivity data can damage organizations if simple activity metrics such as lines of code or commit counts are misused: <a href="https://www.mckinsey.com/industries/technology-media-and-telecommunications/our-insights/yes-you-can-measure-software-developer-productivity?cid=other-eml-mtg-mip-mck">McKinsey developer productivity measurement</a>.</p>
	<p>The SPACE framework balances Satisfaction, Performance, Activity, Communication, and Efficiency to avoid over-optimizing one visible activity metric: <a href="https://space-framework.com/">SPACE framework</a>.</p>
	<p>DORA metrics connect delivery performance with reliability and stability rather than raw activity volume: <a href="https://dora.dev/guides/dora-metrics/">DORA metrics guide</a>.</p>
	</section>
	</main>
	</body>
	</html>
	{
	"project": "AI 自建專案：Operating Console 工具規格",
	"job_id": "a766341b-1d53-4a61-9363-e6c74790cea2",
	"selected_solution": "governance/eval/dashboard",
	"learned_signal": "Operating Console spec added anti-Tokenmaxxing guidance: do not use token consumption, tool launch count, or AI usage leaderboard as performance metrics.",
	"market_learning": "Mature productivity measurement avoids simple activity metrics and uses balanced outcome, quality, satisfaction, flow, and reliability metrics.",
	"next_run_bias": "Treat metric design as incentive governance; block bad metrics before they reach scorecards.",
	"must_check_next": [
	"Is token consumption used only for cost diagnostics?",
	"Are activity metrics excluded from performance evaluation?",
	"Does each approved metric have formula, data source, owner, baseline, quality gate, and anti-gaming check?",
	"Who can approve metric exceptions?"
	]
	}