esz135888 · May 23, 2026 20:33
diff --git a/acceptance-tests.md b/acceptance-tests.md
diff --git a/artifact-url-or-pr.md b/artifact-url-or-pr.md
diff --git a/d14-correction-router-scorecard.html b/d14-correction-router-scorecard.html
 <!doctype html>
 <html lang="zh-Hant">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>AI 預測驗證 D14 Correction Router & Weekly Scorecard</title>
  <style>
    :root {
      --ink:#18222d; --muted:#607080; --line:#d7dde5; --bg:#f7f8fb; --panel:#fff;
      --blue:#2258c9; --green:#147a53; --amber:#9c6200; --red:#b3261e; --violet:#6d3fc7;
    }
    *{box-sizing:border-box}
    body{margin:0;background:var(--bg);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
    header{padding:24px clamp(18px,4vw,48px);background:#fff;border-bottom:1px solid var(--line)}
    h1{margin:0;font-size:clamp(24px,3vw,36px);letter-spacing:0}
    h2{margin:0 0 12px;font-size:18px}
    h3{margin:0 0 8px;font-size:15px}
    p{margin:0 0 10px}
    main{padding:22px clamp(18px,4vw,48px) 48px;display:grid;gap:16px}
    .sub{color:var(--muted);max-width:1100px;margin-top:8px}
    .grid{display:grid;gap:16px}.cols4{grid-template-columns:repeat(4,minmax(0,1fr))}.cols3{grid-template-columns:repeat(3,minmax(0,1fr))}.cols2{grid-template-columns:repeat(2,minmax(0,1fr))}
    .panel{background:var(--panel);border:1px solid var(--line);border-radius:8px;padding:16px}
    .metric{min-height:118px;display:flex;flex-direction:column;justify-content:space-between}
    .label{font-size:13px;color:var(--muted)}.value{font-size:30px;font-weight:760}.ok{color:var(--green)}.warn{color:var(--amber)}.stop{color:var(--red)}
    .tag{display:inline-flex;align-items:center;height:24px;padding:0 8px;border:1px solid var(--line);border-radius:999px;background:#fbfcfe;color:var(--muted);font-size:12px;margin-right:6px}
    table{width:100%;border-collapse:collapse;font-size:13px}th,td{border-bottom:1px solid var(--line);padding:10px 8px;text-align:left;vertical-align:top}th{background:#fbfcfe;color:var(--muted)}
    code{background:#eef2f7;border-radius:4px;padding:2px 5px;font-size:12px}
    .lane{border-left:4px solid var(--blue);padding-left:12px}.lane:nth-child(2){border-color:var(--amber)}.lane:nth-child(3){border-color:var(--violet)}.lane:nth-child(4){border-color:var(--green)}
    ul{padding-left:18px;margin:0}li{margin:6px 0}
    @media(max-width:980px){.cols4,.cols3,.cols2{grid-template-columns:1fr}}
  </style>
 </head>
 <body>
  <header>
    <h1>D14 Correction Router & Weekly Scorecard</h1>
    <p class="sub">接續 D7 calibration run：把 miss、partial、unknown、reviewer dispute 轉成可派工的修復路由，並定義 D30 weekly scorecard 上線前的資料、權限、稽核與採用門檻。</p>
    <p><span class="tag">Owner: Louis</span><span class="tag">Review: zihrou / iron</span><span class="tag">Due: 2026-06-07</span><span class="tag">No route, no close</span></p>
  </header>
  <main>
    <section class="grid cols4">
      <div class="panel metric"><span class="label">D14 Goal</span><span class="value">100%</span><span class="label">non-hit items routed</span></div>
      <div class="panel metric"><span class="label">Source Gap SLA</span><span class="value warn">7d</span><span class="label">adapter owner must respond</span></div>
      <div class="panel metric"><span class="label">Re-run Cohort</span><span class="value ok">>=10</span><span class="label">fixed cases before D30 scorecard</span></div>
      <div class="panel metric"><span class="label">Release Gate</span><span class="value stop">Block</span><span class="label">if unresolved gap > 20%</span></div>
    </section>
    <section class="panel">
      <h2>30 天推進路徑</h2>
      <div class="grid cols4">
        <div class="lane"><h3>D1</h3><p>讀取 D7 run scorecard，鎖定所有 miss、partial、unknown、dispute。</p></div>
        <div class="lane"><h3>D7</h3><p>完成路由分類：rubric fix、source adapter gap、owner follow-up、model prompt fix。</p></div>
        <div class="lane"><h3>D14</h3><p>每個路由有 owner、due、evidence、repair action，並啟動 re-run cohort。</p></div>
        <div class="lane"><h3>D30</h3><p>週報 scorecard 上線：accuracy trend、unknown trend、repair cycle time、adoption gate。</p></div>
      </div>
    </section>
    <section class="panel">
      <h2>目的到目的 E2E</h2>
      <table>
        <tr><th>階段</th><th>輸入</th><th>產出</th><th>人如何採用</th><th>指標改善</th></tr>
        <tr><td>原始目的</td><td>上次 review 預測與多來源 evidence</td><td>D7 labels</td><td>Louis 看是否可信</td><td>降低 false confidence</td></tr>
        <tr><td>D14 修復</td><td>miss/unknown/dispute</td><td><code>correction_route</code></td><td>zihrou/iron 確認責任歸因</td><td>縮短人工追查時間</td></tr>
        <tr><td>D30 採用</td><td>已修復 cohort 與 re-run 結果</td><td><code>weekly_scorecard</code></td><td>PLS 推送週報與下一輪派工</td><td>提高 AI review 可治理性</td></tr>
      </table>
    </section>
    <section class="grid cols2">
      <div class="panel">
        <h2>Correction Router</h2>
        <table>
          <tr><th>Route</th><th>When</th><th>Owner</th><th>Acceptance</th></tr>
          <tr><td>rubric_fix</td><td>prediction wording or success criteria was ambiguous</td><td>zihrou</td><td>new rubric tested on >=10 historical cases</td></tr>
          <tr><td>source_adapter_gap</td><td>evidence exists but signal/action-item sync missed it</td><td>iron</td><td>adapter maps source id, timestamp, extractor version</td></tr>
          <tr><td>owner_followup</td><td>human action status is unknown</td><td>Louis delegate</td><td>owner replies done / blocked / rejected</td></tr>
          <tr><td>model_prompt_fix</td><td>model overpredicted from weak signal</td><td>PLS worker</td><td>prompt version and before/after eval recorded</td></tr>
        </table>
      </div>
      <div class="panel">
        <h2>價值 / 錢路徑</h2>
        <ul>
          <li>營收：只把通過驗證的 AI review 模式放進業務或管理流程，避免錯誤建議拉低成交與決策品質。</li>
          <li>省成本：用 route taxonomy 取代逐案人工討論，讓 reviewer 只處理高風險樣本。</li>
          <li>降風險：未知率與 source gap 未修前禁止 dashboard 化，避免漂亮圖表掩蓋資料缺口。</li>
          <li>釋放人力：把每個 miss 直接變成 owner/due/acceptance 的下一步，不再靠人追問。</li>
        </ul>
      </div>
    </section>
    <section class="grid cols3">
      <div class="panel"><h2>Data / API</h2><p>新增 <code>correction_route</code>、<code>repair_action</code>、<code>rerun_cohort</code>、<code>weekly_prediction_scorecard</code>。API: <code>POST /routes/bulk</code>、<code>PATCH /routes/:id</code>、<code>POST /reruns</code>、<code>GET /weekly-scorecard</code>。</p></div>
      <div class="panel"><h2>權限 / 稽核</h2><p>Owner 可 close route；reviewer 可改 dispute status；worker 可寫路由與 re-run 結果。每筆修改保留 evidence hash、actor、timestamp、model/prompt version。</p></div>
      <div class="panel"><h2>提升人的能力</h2><p>Louis 得到 go/no-go 節奏；zihrou 把模糊預測變 rubric；iron 把資料缺口變 adapter backlog；PLS worker 知道下一輪要修哪一類。</p></div>
    </section>
    <section class="panel">
      <h2>LINE 草稿</h2>
      <p>AI 預測驗證進入 D14 修復路由。請 Louis 確認 2026-06-07 前所有 D7 non-hit 都要有 owner/due/next action；zihrou 看 rubric_fix；iron 看 source_adapter_gap。若 unresolved gap >20%，D30 weekly scorecard 不上線，只派 source/rubric 修復。</p>
    </section>
  </main>
 </body>
 </html>
diff --git a/data-model.md b/data-model.md
diff --git a/decision-record.md b/decision-record.md
diff --git a/learning-memory.json b/learning-memory.json
 {
  "job_id": "28e3e2ec-b385-42a7-92a7-421129fe81a7",
  "project_topic": "AI prediction verification module for signals and action-item evidence",
  "current_artifact": "D14 Correction Router and Weekly Scorecard Pack",
  "previous_artifact": "D7 Calibration Run Control Tower",
  "owner": "Louis",
  "reviewers": ["zihrou", "iron"],
  "due": "2026-06-07",
  "market_learning": [
    "Current AI observability practice combines traces, evals, reviewer annotations, production examples, experiments, and monitoring scorecards.",
    "OpenTelemetry GenAI conventions suggest preserving standard trace/evidence attributes rather than vendor-specific logs only.",
    "Phoenix, Evidently, and LangSmith all point toward continuous evaluation and production monitoring rather than one-time reports."
  ],
  "next_worker_rule": {
    "if_no_d7_run": "Return to D7 Calibration Run Control Tower and create calibration_run first.",
    "if_non_hits_unrouted": "Create correction_route records for every miss, partial, unknown, and dispute.",
    "if_unresolved_gap_rate_gt_20_percent": "Do not ship weekly scorecard; dispatch source_adapter_gap and rubric_fix tasks.",
    "if_routes_repaired_but_not_rerun": "Create rerun_cohort and compare before/after unknown and miss rates.",
    "if_gates_pass": "Build D30 weekly scorecard/dashboard for PLS backend and LINE cadence."
  },
  "acceptance_gate": {
    "routed_non_hit_rate": 1.0,
    "unresolved_gap_rate_max": 0.2,
    "rerun_cohort_min": 10,
    "reviewer_agreement_target": 0.8
  },
  "do_not_repeat": [
    "Do not build another generic AI prediction verification pack.",
    "Do not build dashboard before correction routes and unresolved gap gate are checked.",
    "Do not close a route without owner, due date, evidence refs, and acceptance rule."
  ]
 }
diff --git a/production-brief.md b/production-brief.md
diff --git a/sources.md b/sources.md
	<!doctype html>
	<html lang="zh-Hant">
	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>AI 預測驗證 D14 Correction Router & Weekly Scorecard</title>
	<style>
	:root {
	--ink:#18222d; --muted:#607080; --line:#d7dde5; --bg:#f7f8fb; --panel:#fff;
	--blue:#2258c9; --green:#147a53; --amber:#9c6200; --red:#b3261e; --violet:#6d3fc7;
	}
	*{box-sizing:border-box}
	body{margin:0;background:var(--bg);color:var(--ink);font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;line-height:1.5}
	header{padding:24px clamp(18px,4vw,48px);background:#fff;border-bottom:1px solid var(--line)}
	h1{margin:0;font-size:clamp(24px,3vw,36px);letter-spacing:0}
	h2{margin:0 0 12px;font-size:18px}
	h3{margin:0 0 8px;font-size:15px}
	p{margin:0 0 10px}
	main{padding:22px clamp(18px,4vw,48px) 48px;display:grid;gap:16px}
	.sub{color:var(--muted);max-width:1100px;margin-top:8px}
	.grid{display:grid;gap:16px}.cols4{grid-template-columns:repeat(4,minmax(0,1fr))}.cols3{grid-template-columns:repeat(3,minmax(0,1fr))}.cols2{grid-template-columns:repeat(2,minmax(0,1fr))}
	.panel{background:var(--panel);border:1px solid var(--line);border-radius:8px;padding:16px}
	.metric{min-height:118px;display:flex;flex-direction:column;justify-content:space-between}
	.label{font-size:13px;color:var(--muted)}.value{font-size:30px;font-weight:760}.ok{color:var(--green)}.warn{color:var(--amber)}.stop{color:var(--red)}
	.tag{display:inline-flex;align-items:center;height:24px;padding:0 8px;border:1px solid var(--line);border-radius:999px;background:#fbfcfe;color:var(--muted);font-size:12px;margin-right:6px}
	table{width:100%;border-collapse:collapse;font-size:13px}th,td{border-bottom:1px solid var(--line);padding:10px 8px;text-align:left;vertical-align:top}th{background:#fbfcfe;color:var(--muted)}
	code{background:#eef2f7;border-radius:4px;padding:2px 5px;font-size:12px}
	.lane{border-left:4px solid var(--blue);padding-left:12px}.lane:nth-child(2){border-color:var(--amber)}.lane:nth-child(3){border-color:var(--violet)}.lane:nth-child(4){border-color:var(--green)}
	ul{padding-left:18px;margin:0}li{margin:6px 0}
	@media(max-width:980px){.cols4,.cols3,.cols2{grid-template-columns:1fr}}
	</style>
	</head>
	<body>
	<header>
	<h1>D14 Correction Router & Weekly Scorecard</h1>
	<p class="sub">接續 D7 calibration run：把 miss、partial、unknown、reviewer dispute 轉成可派工的修復路由，並定義 D30 weekly scorecard 上線前的資料、權限、稽核與採用門檻。</p>
	<p><span class="tag">Owner: Louis</span><span class="tag">Review: zihrou / iron</span><span class="tag">Due: 2026-06-07</span><span class="tag">No route, no close</span></p>
	</header>
	<main>
	<section class="grid cols4">
	<div class="panel metric"><span class="label">D14 Goal</span><span class="value">100%</span><span class="label">non-hit items routed</span></div>
	<div class="panel metric"><span class="label">Source Gap SLA</span><span class="value warn">7d</span><span class="label">adapter owner must respond</span></div>
	<div class="panel metric"><span class="label">Re-run Cohort</span><span class="value ok">>=10</span><span class="label">fixed cases before D30 scorecard</span></div>
	<div class="panel metric"><span class="label">Release Gate</span><span class="value stop">Block</span><span class="label">if unresolved gap > 20%</span></div>
	</section>
	<section class="panel">
	<h2>30 天推進路徑</h2>
	<div class="grid cols4">
	<div class="lane"><h3>D1</h3><p>讀取 D7 run scorecard，鎖定所有 miss、partial、unknown、dispute。</p></div>
	<div class="lane"><h3>D7</h3><p>完成路由分類：rubric fix、source adapter gap、owner follow-up、model prompt fix。</p></div>
	<div class="lane"><h3>D14</h3><p>每個路由有 owner、due、evidence、repair action，並啟動 re-run cohort。</p></div>
	<div class="lane"><h3>D30</h3><p>週報 scorecard 上線：accuracy trend、unknown trend、repair cycle time、adoption gate。</p></div>
	</div>
	</section>
	<section class="panel">
	<h2>目的到目的 E2E</h2>
	<table>
	<tr><th>階段</th><th>輸入</th><th>產出</th><th>人如何採用</th><th>指標改善</th></tr>
	<tr><td>原始目的</td><td>上次 review 預測與多來源 evidence</td><td>D7 labels</td><td>Louis 看是否可信</td><td>降低 false confidence</td></tr>
	<tr><td>D14 修復</td><td>miss/unknown/dispute</td><td><code>correction_route</code></td><td>zihrou/iron 確認責任歸因</td><td>縮短人工追查時間</td></tr>
	<tr><td>D30 採用</td><td>已修復 cohort 與 re-run 結果</td><td><code>weekly_scorecard</code></td><td>PLS 推送週報與下一輪派工</td><td>提高 AI review 可治理性</td></tr>
	</table>
	</section>
	<section class="grid cols2">
	<div class="panel">
	<h2>Correction Router</h2>
	<table>
	<tr><th>Route</th><th>When</th><th>Owner</th><th>Acceptance</th></tr>
	<tr><td>rubric_fix</td><td>prediction wording or success criteria was ambiguous</td><td>zihrou</td><td>new rubric tested on >=10 historical cases</td></tr>
	<tr><td>source_adapter_gap</td><td>evidence exists but signal/action-item sync missed it</td><td>iron</td><td>adapter maps source id, timestamp, extractor version</td></tr>
	<tr><td>owner_followup</td><td>human action status is unknown</td><td>Louis delegate</td><td>owner replies done / blocked / rejected</td></tr>
	<tr><td>model_prompt_fix</td><td>model overpredicted from weak signal</td><td>PLS worker</td><td>prompt version and before/after eval recorded</td></tr>
	</table>
	</div>
	<div class="panel">
	<h2>價值 / 錢路徑</h2>
	<ul>
	<li>營收：只把通過驗證的 AI review 模式放進業務或管理流程，避免錯誤建議拉低成交與決策品質。</li>
	<li>省成本：用 route taxonomy 取代逐案人工討論，讓 reviewer 只處理高風險樣本。</li>
	<li>降風險：未知率與 source gap 未修前禁止 dashboard 化，避免漂亮圖表掩蓋資料缺口。</li>
	<li>釋放人力：把每個 miss 直接變成 owner/due/acceptance 的下一步，不再靠人追問。</li>
	</ul>
	</div>
	</section>
	<section class="grid cols3">
	<div class="panel"><h2>Data / API</h2><p>新增 <code>correction_route</code>、<code>repair_action</code>、<code>rerun_cohort</code>、<code>weekly_prediction_scorecard</code>。API: <code>POST /routes/bulk</code>、<code>PATCH /routes/:id</code>、<code>POST /reruns</code>、<code>GET /weekly-scorecard</code>。</p></div>
	<div class="panel"><h2>權限 / 稽核</h2><p>Owner 可 close route；reviewer 可改 dispute status；worker 可寫路由與 re-run 結果。每筆修改保留 evidence hash、actor、timestamp、model/prompt version。</p></div>
	<div class="panel"><h2>提升人的能力</h2><p>Louis 得到 go/no-go 節奏；zihrou 把模糊預測變 rubric；iron 把資料缺口變 adapter backlog；PLS worker 知道下一輪要修哪一類。</p></div>
	</section>
	<section class="panel">
	<h2>LINE 草稿</h2>
	<p>AI 預測驗證進入 D14 修復路由。請 Louis 確認 2026-06-07 前所有 D7 non-hit 都要有 owner/due/next action；zihrou 看 rubric_fix；iron 看 source_adapter_gap。若 unresolved gap >20%，D30 weekly scorecard 不上線，只派 source/rubric 修復。</p>
	</section>
	</main>
	</body>
	</html>
欄位	型別	必填	說明
id	uuid	yes	route id
calibration_run_id	uuid	yes	來源 D7 run
calibration_run_item_id	uuid	yes	non-hit/dispute item
route_type	enum	yes	`rubric_fix`, `source_adapter_gap`, `owner_followup`, `model_prompt_fix`, `ignore_with_reason`
severity	enum	yes	`P0`, `P1`, `P2`
owner_user_id	uuid	yes	route owner
due_at	datetime	yes	修復期限
evidence_refs	jsonb	yes	signals/action items/reviewer sample refs
acceptance_rule	text	yes	如何判定修好
status	enum	yes	`open`, `in_progress`, `ready_for_rerun`, `verified`, `closed`, `blocked`
audit_ref	text	yes	decision record / worker run id
欄位	型別	必填	說明
id	uuid	yes	action id
correction_route_id	uuid	yes	parent route
action_type	enum	yes	`edit_rubric`, `fix_adapter`, `ask_owner`, `update_prompt`, `document_ignore`
before_state	jsonb	yes	修復前 evidence/rubric/prompt/source 狀態
after_state	jsonb	no	修復後狀態
actor_user_or_worker_id	text	yes	人或 worker
completed_at	datetime	no	完成時間
欄位	型別	必填	說明
id	uuid	yes	cohort id
source_calibration_run_id	uuid	yes	原 D7 run
route_ids	uuid[]	yes	要重跑的 routes
status	enum	yes	`planned`, `running`, `passed`, `failed`
before_unknown_rate	decimal	yes	修復前 unknown
after_unknown_rate	decimal	no	修復後 unknown
before_miss_rate	decimal	yes	修復前 miss
after_miss_rate	decimal	no	修復後 miss
欄位	型別	必填	說明
week_start	date	yes	週期
project_id	uuid	yes	PLS project
calibration_run_id	uuid	yes	最新 run
routed_non_hit_rate	decimal	yes	目標 1.0
unresolved_gap_rate	decimal	yes	必須 <=0.2
reviewer_agreement_rate	decimal	yes	目標 >=0.8
rerun_improvement_delta	decimal	yes	re-run 改善幅度
adoption_gate	enum	yes	`ship_weekly_scorecard`, `repair_first`, `blocked`
API	Method	用途
`/ai-prediction/correction-routes/bulk`	POST	從 D7 run 批次建立 routes
`/ai-prediction/correction-routes/:id`	PATCH	更新 owner、status、acceptance、evidence
`/ai-prediction/repair-actions`	POST	寫入修復動作
`/ai-prediction/rerun-cohorts`	POST	建立修復後 re-run cohort
`/ai-prediction/weekly-scorecards/:project_id`	GET	給 PLS 後台/LINE 取週報
	{
	"job_id": "28e3e2ec-b385-42a7-92a7-421129fe81a7",
	"project_topic": "AI prediction verification module for signals and action-item evidence",
	"current_artifact": "D14 Correction Router and Weekly Scorecard Pack",
	"previous_artifact": "D7 Calibration Run Control Tower",
	"owner": "Louis",
	"reviewers": ["zihrou", "iron"],
	"due": "2026-06-07",
	"market_learning": [
	"Current AI observability practice combines traces, evals, reviewer annotations, production examples, experiments, and monitoring scorecards.",
	"OpenTelemetry GenAI conventions suggest preserving standard trace/evidence attributes rather than vendor-specific logs only.",
	"Phoenix, Evidently, and LangSmith all point toward continuous evaluation and production monitoring rather than one-time reports."
	],
	"next_worker_rule": {
	"if_no_d7_run": "Return to D7 Calibration Run Control Tower and create calibration_run first.",
	"if_non_hits_unrouted": "Create correction_route records for every miss, partial, unknown, and dispute.",
	"if_unresolved_gap_rate_gt_20_percent": "Do not ship weekly scorecard; dispatch source_adapter_gap and rubric_fix tasks.",
	"if_routes_repaired_but_not_rerun": "Create rerun_cohort and compare before/after unknown and miss rates.",
	"if_gates_pass": "Build D30 weekly scorecard/dashboard for PLS backend and LINE cadence."
	},
	"acceptance_gate": {
	"routed_non_hit_rate": 1.0,
	"unresolved_gap_rate_max": 0.2,
	"rerun_cohort_min": 10,
	"reviewer_agreement_target": 0.8
	},
	"do_not_repeat": [
	"Do not build another generic AI prediction verification pack.",
	"Do not build dashboard before correction routes and unresolved gap gate are checked.",
	"Do not close a route without owner, due date, evidence refs, and acceptance rule."
	]
	}
時點	應長成什麼樣子	驗收
D1	讀取 D7 run scorecard，鎖定所有 non-hit/dispute item。	D7 run id、scorecard、non-hit list 可追溯。
D7	完成 correction route taxonomy。	100% non-hit 有 route type、owner、due、evidence。
D14	啟動修復 action 與 re-run cohort。	至少 10 件修復後 re-run，未解 gap 不超過 20%。
D30	Weekly scorecard 可進 PLS 後台與 LINE 節奏。	accuracy/unknown/repair cycle/adoption gate 可週更。
層	交付內容
脈絡框架	D7 non-hit/dispute -> D14 correction route -> D30 weekly scorecard。
作業流程	scorecard ingest -> route assignment -> owner repair -> re-run cohort -> adoption gate。
資料模型	`correction_route`、`repair_action`、`rerun_cohort`、`weekly_prediction_scorecard`。
可操作工具	HTML control tower、資料模型、驗收測試、LINE 草稿、learning memory。
驗收指標	100% non-hit routed、unresolved gap <=20%、re-run cohort >=10、scorecard schema ready。
採用升級	通過 D14 後進 PLS 後台週報；未通過則只派 source/rubric 修復。