feat(registry): Phase 3 零件投稿靜態把關 + component-gatekeeping SDD

新 SDD .agents/specs/component-gatekeeping/(richblack 確認,含 venue 修訂 + 信任模型)。

registry 端靜態把關(CF Worker 可跑,不執行 wasm):
- G1 detectFakeComponent: 外部 URL/domain + http_request 子集偵測,硬擋退稿指回 recipe
- G3 wasmImports: 解析 wasm import section,只准 wasi_snapshot_preview1 + u6u 白名單
- G5/G6: unimplemented_steps 明列 gherkin/cold_start/runtime_compat,不假綠(§3c/§7)
- gherkin_evidence 一致性驗證(投稿者本地跑,registry 不重跑——CF 禁 runtime 編譯 wasm)

把關範圍:公共庫 + self-hosted 私人庫同一套(design §0.0)。
信任模型(design §4.5):Gherkin 全綠≠安全;純 WASI 沙箱框死能力才是發佈底氣;
第一期 evidence 可造假(誠實標明),平台重跑列未來。

hook: pre-write-guard 白名單加 component-gatekeeping / component-registry-canon SDD 目錄。

測試: sandboxAcceptance.test.ts 4 綠(含 G1 假零件被擋)。

待續(同 SDD): G4 CLI 投稿指令本地跑 Gherkin、G0 人類閘門、R5 白名單+本機 hook。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 17:53:03 +08:00
parent fdb62e8b27
commit 202a5ab8d6
9 changed files with 609 additions and 66 deletions
@@ -0,0 +1,92 @@
// G1 假零件偵測(component-gatekeeping SDDR2
//
// 判準(DECISIONS §1):零件若滿足任一,是假零件,該降級成 recipe / 工作流:
// (a) contract 或 wasm binary 出現具體外部服務 URL / domain
// (b) 宣告能力是 http_request 子集(打某固定 endpoint
//
// Q2 決議(richblack 2026-05-29):兩者都「硬擋」(不是只 warn)。
// 理由:零件不該連外,連外即 recipe。這兩個 pattern 都是「該是 recipe 的東西偽裝成零件」。
//
// 排除:auth_* primitivecredential 後端,DECISIONS §3b 不適用假零件判準)、http_request 自己。
import type { ComponentContract } from '../types';
// auth primitive 與 http_request 不適用假零件判準
const EXEMPT_IDS = new Set([
'http_request',
'auth_static_key',
'auth_oauth2',
'auth_service_account',
'auth_mtls',
]);
// 外部 URL / domain pattern。
// - 明確的 scheme://host
// - 裸 domainapi.foo.com / foo.googleapis.com 之類)
const URL_SCHEME_RE = /\bhttps?:\/\/[^\s"'`)]+/i;
// 裸 domain:至少 host.tld,排除過於通用的誤判(如 stdin.json)——要求含常見 TLD 或 .xxx.yyy 多段
const BARE_DOMAIN_RE = /\b[a-z0-9][a-z0-9-]*(\.[a-z0-9-]+)*\.(com|org|net|dev|io|me|click|app|co|googleapis\.com|telegram\.org)\b/i;
/**
* 把 contract 的文字欄位攤平成一個字串,供 URL 掃描。
* 掃 description / display_name / input_schema / output_schema / tags / aliases。
*/
function flattenContractText(contract: ComponentContract): string {
const parts: string[] = [
contract.display_name ?? '',
contract.description ?? '',
JSON.stringify(contract.input_schema ?? {}),
JSON.stringify(contract.output_schema ?? {}),
(contract.tags ?? []).join(' '),
(contract.aliases ?? []).join(' '),
];
return parts.join('\n');
}
/**
* 偵測投稿零件是否為假零件。
* 回 null = 通過;回字串 = 退稿原因(已含指回正路的訊息)。
*/
export function detectFakeComponent(
contract: ComponentContract,
wasmBytes: Uint8Array,
): string | null {
if (EXEMPT_IDS.has(contract.canonical_id)) {
return null;
}
const pointToRecipe =
'。這該是 API recipehttp_request + 固定設定)或工作流,不是零件。' +
'零件 = 封閉邏輯(流程控制 / 資料處理),不連外部服務。見 DECISIONS §1。';
// (a) contract 文字含外部 URL / domain
const contractText = flattenContractText(contract);
const schemeHit = contractText.match(URL_SCHEME_RE);
if (schemeHit) {
return `偵測到 contract 含外部 URL${schemeHit[0].slice(0, 80)}${pointToRecipe}`;
}
const domainHit = contractText.match(BARE_DOMAIN_RE);
if (domainHit) {
return `偵測到 contract 含外部 domain${domainHit[0]}${pointToRecipe}`;
}
// (a') wasm binary 文字含外部 URL / domain(零件原碼把 endpoint 編進去)
// 只掃可印 ASCII 區段以降低誤判;wasm 字串以 UTF-8 存放。
const wasmText = new TextDecoder('utf-8').decode(wasmBytes);
const wasmSchemeHit = wasmText.match(URL_SCHEME_RE);
if (wasmSchemeHit) {
return `偵測到 wasm 內嵌外部 URL${wasmSchemeHit[0].slice(0, 80)}${pointToRecipe}`;
}
// (b) http_request 子集:零件宣告自己只是「打某 API/endpoint」
// heuristicdescription 描述「打/呼叫 ... API/endpoint」+ input 有 url-like 欄位
const desc = (contract.description ?? '') + ' ' + contract.display_name;
const inputKeys = Object.keys(contract.input_schema ?? {}).join(' ').toLowerCase();
const hasUrlField = /\b(url|endpoint|api_url|base_url|host)\b/.test(inputKeys);
const describesHttpCall = /(打|呼叫|call|fetch|request|POST|GET|PUT|DELETE)\s*.*(api|endpoint|url|https?)/i.test(desc);
if (hasUrlField && describesHttpCall) {
return `偵測到疑似 http_request 子集(input 有 url 欄位 + 描述為打 API)${pointToRecipe}`;
}
return null;
}
+82 -35
View File
@@ -3,6 +3,17 @@
import { FORBIDDEN_SYSCALLS } from '../types';
import type { ComponentContract, SandboxResult, SandboxStep } from '../types';
import { detectFakeComponent } from './detectFakeComponent';
import { checkPureWasi } from './wasmImports';
// 註:G4 Gherkin 不在 registry 跑(CF Worker 禁止 runtime 編譯 wasm)。
// Gherkin 在投稿者本地(CLI/Node)跑,registry 只驗 gherkin_evidence 一致性。
// 見 .agents/specs/component-gatekeeping/design.md §4 修訂。
// ── 步驟 (G1):假零件偵測(最先擋,component-gatekeeping SDD R2)─────────────────
function checkFakeComponent(wasmBytes: Uint8Array, contract: ComponentContract): string | null {
return detectFakeComponent(contract, wasmBytes);
}
// ── 步驟 (a):體積檢查 ────────────────────────────────────────────────────────
@@ -17,19 +28,19 @@ function checkSize(wasmBytes: Uint8Array, contract: ComponentContract): string |
// ── 步驟 (b):冷啟動時間(Phase 0 mock 0ms)────────────────────────────────────
function checkColdStart(_wasmBytes: Uint8Array, _contract: ComponentContract): string | null {
// Phase 0mock 通過,記錄 0ms
// Phase 2 再實作真實測量
return null;
}
// ── 步驟 (c):syscall 掃描 ────────────────────────────────────────────────────
function scanSyscalls(wasmBytes: Uint8Array): string | null {
// 將 .wasm binary 轉為文字,搜尋禁止的 import 字串
// WASM binary 中 import section 的函數名稱以 UTF-8 字串形式存在
const text = new TextDecoder('utf-8').decode(wasmBytes);
// G3R3):import module 白名單 — 只准 wasi_snapshot_preview1 + u6u(避免 runtime 鎖定)。
const pureWasiError = checkPureWasi(wasmBytes);
if (pureWasiError !== null) {
return pureWasiError;
}
// 次要:禁止 syscall 黑名單(網路/檔案系統 import 名)。
// WASM binary 中 import section 的函數名稱以 UTF-8 字串形式存在。
const text = new TextDecoder('utf-8').decode(wasmBytes);
for (const syscall of FORBIDDEN_SYSCALLS) {
if (text.includes(syscall)) {
return `發現禁止的 syscall${syscall}`;
@@ -38,21 +49,8 @@ function scanSyscalls(wasmBytes: Uint8Array): string | null {
return null;
}
// ── 步驟 (d)Gherkin 測試(Phase 0 mock 通過)────────────────────────────────
function runGherkinTests(_wasmBytes: Uint8Array, _contract: ComponentContract): string | null {
// Phase 0mock 通過
// Phase 1 再實作真實 Gherkin 執行
return null;
}
// ── 步驟 (e)runtime 相容測試(Phase 0 mock 通過)────────────────────────────
function checkRuntimeCompat(_wasmBytes: Uint8Array, _contract: ComponentContract): string | null {
// Phase 0mock 通過
// Phase 2 再實作真實多 runtime 測試
return null;
}
// 註:cold_start / runtime_compat 未實作(列 UNIMPLEMENTED_STEPS,不假綠)。
// Gherkin 執行不在 registryCF 跑不了 wasm),在投稿者本地;registry 只驗 evidence。
// ── 主流程 ────────────────────────────────────────────────────────────────────
@@ -62,35 +60,84 @@ interface StepDef {
guideAnchor: string;
}
// registry 端把關步驟(全靜態,CF Worker 可跑;不執行 wasm)。
// G4 Gherkin 的「執行」在投稿者本地(CLI/Node),registry 只驗 evidence 一致性。
const STEPS: StepDef[] = [
{ name: 'fake_component_scan', run: checkFakeComponent, guideAnchor: '#fake-component' },
{ name: 'size_check', run: checkSize, guideAnchor: '#common-errors' },
{ name: 'cold_start', run: checkColdStart, guideAnchor: '#common-errors' },
{ name: 'syscall_scan', run: scanSyscalls, guideAnchor: '#syscall-constraints' },
{ name: 'gherkin_tests', run: runGherkinTests, guideAnchor: '#local-testing' },
{ name: 'runtime_compat', run: checkRuntimeCompat, guideAnchor: '#contract-example' },
];
// 未真正實作 / 未在 registry 端執行的步驟(誠實標記,§3c/§7 禁假綠):
// gherkin_tests:在投稿者本地跑,registry 只驗 evidence(不重跑)→ 第一期 evidence 可造假
// cold_start / runtime_compatmock,未實作
const UNIMPLEMENTED_STEPS: SandboxStep[] = ['gherkin_tests', 'cold_start', 'runtime_compat'];
/** 投稿者本地跑 Gherkin 的結果證據(CLI 上傳;registry 存證可審) */
export interface GherkinEvidence {
scenario: string;
given: string;
actual_stdout: string;
passed: boolean;
}
function fail(step: SandboxStep, reason: string, guideAnchor: string, contract: ComponentContract): SandboxResult {
return {
success: false,
failed_step: step,
reason,
guide_anchor: guideAnchor,
component_hash_id: '',
canonical_id: contract.canonical_id,
version: contract.version,
};
}
/**
* 驗 gherkin_evidence 與 contract 一致(不重跑 wasmCF 跑不了):
* - evidence 的 scenario 集合須涵蓋 contract.gherkin_tests 的每個 scenario
* - 每個 evidence.passed 須為 true
* 回 null = 通過;回字串 = 退稿原因。
* evidence 缺省(backfill / 舊投稿)→ 不擋(回 null),但 gherkin_tests 仍列 unimplemented(未驗證)。
*/
function checkGherkinEvidence(contract: ComponentContract, evidence?: GherkinEvidence[]): string | null {
if (!evidence || evidence.length === 0) return null;
const evidenceScenarios = new Set(evidence.map(e => e.scenario));
for (const test of contract.gherkin_tests) {
if (!evidenceScenarios.has(test.scenario)) {
return `gherkin_evidence 缺少 scenario「${test.scenario}」的本地測試結果`;
}
}
for (const e of evidence) {
if (!e.passed) {
return `gherkin_evidence scenario「${e.scenario}」本地未通過(passed=false`;
}
}
return null;
}
export function runSandboxAcceptance(
wasmBytes: Uint8Array,
contract: ComponentContract,
gherkinEvidence?: GherkinEvidence[],
): SandboxResult {
// 1. 靜態步驟(假零件 / 體積 / 純WASI;不執行 wasm
for (const step of STEPS) {
const error = step.run(wasmBytes, contract);
if (error !== null) {
return {
success: false,
failed_step: step.name,
reason: error,
guide_anchor: step.guideAnchor,
component_hash_id: '', // 驗收失敗時尚未派發 hash id
canonical_id: contract.canonical_id,
version: contract.version,
};
return fail(step.name, error, step.guideAnchor, contract);
}
}
// 2. Gherkin evidence 一致性(投稿者本地已跑;registry 不重跑)
const evidenceError = checkGherkinEvidence(contract, gherkinEvidence);
if (evidenceError !== null) {
return fail('gherkin_tests', evidenceError, '#local-testing', contract);
}
return {
success: true,
unimplemented_steps: UNIMPLEMENTED_STEPS, // gherkin(本地跑,registry未重跑) / cold_start / runtime_compat
component_hash_id: '', // 由 submitComponent 在驗收通過後填入
canonical_id: contract.canonical_id,
version: contract.version,
+128
View File
@@ -0,0 +1,128 @@
// G3 純 WASI 把關(component-gatekeeping SDDR3
//
// 解析 WASM import section,取出所有 import 的 module name。
// 把關:只准 wasi_snapshot_preview1 + u6uhost functions)。
// 其他 module → runtime 鎖定風險,退稿(DECISIONS §4:避免 runtime 鎖定債)。
//
// WASM binary 結構(preview1):
// magic(4) + version(4) + 一串 section
// section: id(1 byte) + size(LEB128) + payload
// import section id = 2。payload: count(LEB128) + 每個 import
// module: len(LEB128) + bytes(UTF-8)
// name: len(LEB128) + bytes(UTF-8)
// kind(1) + 之後依 kind 不同(func: typeidx LEB128 / table / mem / global
const ALLOWED_IMPORT_MODULES = new Set([
'wasi_snapshot_preview1',
'u6u',
]);
/** 讀 unsigned LEB128,回 [value, nextOffset] */
function readULEB(buf: Uint8Array, offset: number): [number, number] {
let result = 0;
let shift = 0;
let pos = offset;
for (;;) {
const byte = buf[pos++];
result |= (byte & 0x7f) << shift;
if ((byte & 0x80) === 0) break;
shift += 7;
}
return [result, pos];
}
/**
* 解析 wasm import section,回傳所有 import module name 的集合。
* 解析失敗(非合法 wasm)回 null。
*/
export function parseWasmImportModules(wasmBytes: Uint8Array): Set<string> | null {
// magic + version
if (wasmBytes.length < 8) return null;
if (wasmBytes[0] !== 0x00 || wasmBytes[1] !== 0x61 || wasmBytes[2] !== 0x73 || wasmBytes[3] !== 0x6d) {
return null; // 不是 \0asm
}
const decoder = new TextDecoder('utf-8');
const modules = new Set<string>();
let offset = 8;
try {
while (offset < wasmBytes.length) {
const sectionId = wasmBytes[offset++];
const [sectionSize, afterSize] = readULEB(wasmBytes, offset);
offset = afterSize;
const sectionEnd = offset + sectionSize;
if (sectionId === 2) {
// import section
let p = offset;
const [count, afterCount] = readULEB(wasmBytes, p);
p = afterCount;
for (let i = 0; i < count; i++) {
const [modLen, afterModLen] = readULEB(wasmBytes, p);
p = afterModLen;
const moduleName = decoder.decode(wasmBytes.subarray(p, p + modLen));
p += modLen;
modules.add(moduleName);
const [nameLen, afterNameLen] = readULEB(wasmBytes, p);
p = afterNameLen;
p += nameLen; // skip import name
const kind = wasmBytes[p++];
// 依 kind skip descriptor
if (kind === 0x00) {
// func: typeidx (ULEB)
const [, afterType] = readULEB(wasmBytes, p);
p = afterType;
} else if (kind === 0x01) {
// table: reftype(1) + limits
p += 1;
const [flags, afterFlags] = readULEB(wasmBytes, p);
p = afterFlags;
const [, afterMin] = readULEB(wasmBytes, p);
p = afterMin;
if (flags === 0x01) { const [, afterMax] = readULEB(wasmBytes, p); p = afterMax; }
} else if (kind === 0x02) {
// mem: limits
const [flags, afterFlags] = readULEB(wasmBytes, p);
p = afterFlags;
const [, afterMin] = readULEB(wasmBytes, p);
p = afterMin;
if (flags === 0x01) { const [, afterMax] = readULEB(wasmBytes, p); p = afterMax; }
} else if (kind === 0x03) {
// global: valtype(1) + mut(1)
p += 2;
} else {
// 未知 kind,無法安全 skip → 中止解析
return modules.size > 0 ? modules : null;
}
}
}
offset = sectionEnd;
}
} catch {
// 解析越界等 → 回已收集的(或 null)
return modules.size > 0 ? modules : null;
}
return modules;
}
/**
* G3 把關:確認 wasm 只 import 白名單 module。
* 回 null = 通過;回字串 = 退稿原因。
*/
export function checkPureWasi(wasmBytes: Uint8Array): string | null {
const modules = parseWasmImportModules(wasmBytes);
if (modules === null) {
return 'WASM import section 無法解析(非合法 WASI preview1 wasm?)';
}
for (const mod of modules) {
if (!ALLOWED_IMPORT_MODULES.has(mod)) {
return `偵測到非白名單 import module:「${mod}」。零件只准依賴 wasi_snapshot_preview1 + u6u host functions(避免 runtime 鎖定,見 DECISIONS §4`;
}
}
return null;
}
+3 -1
View File
@@ -66,13 +66,15 @@ export type ComponentContract = z.infer<typeof ComponentContractSchema>;
// ── 沙盒驗收步驟 ─────────────────────────────────────────────────────────────
export type SandboxStep = 'size_check' | 'cold_start' | 'syscall_scan' | 'gherkin_tests' | 'runtime_compat';
export type SandboxStep = 'fake_component_scan' | 'size_check' | 'cold_start' | 'syscall_scan' | 'gherkin_tests' | 'runtime_compat';
export interface SandboxResult {
success: boolean;
failed_step?: SandboxStep;
reason?: string;
guide_anchor?: string;
// 未真正驗證的步驟(mock,禁假綠 §3c/§7)。success 仍可能為 true,但這些步驟沒實際跑。
unimplemented_steps?: SandboxStep[];
// 驗收通過後回傳兩個 id
component_hash_id: string; // cmp_{8碼hex}workflow 引用用,永久不變
canonical_id: string; // 可讀名稱,搜尋用
+20 -30
View File
@@ -36,19 +36,14 @@ function makeMinimalWasm(extraBytes = 0): Uint8Array {
}
describe('runSandboxAcceptance', () => {
it('合法小型 WASM 通過所有步驟', () => {
const wasm = makeMinimalWasm(10);
const result = runSandboxAcceptance(wasm, BASE_CONTRACT);
expect(result.success).toBe(true);
expect(result.canonical_id).toBe('validate_json');
expect(result.version).toBe('v1');
});
// 註:G4 Gherkin 真實作後,minimal wasm(只有 magic header)無法 instantiate
// 會在 gherkin_tests 步驟失敗。同步步驟(size/syscall/fake)的失敗測試仍有效,
// 因為它們在 Gherkin 之前就擋下。「全通過」需真實零件 wasm,移至整合測試。
it('步驟 (a):體積超過上限時失敗', () => {
// max_size_kb = 1,但 wasm 超過 1KB
it('步驟 (a):體積超過上限時失敗(在 Gherkin 前擋下)', async () => {
const contract = { ...BASE_CONTRACT, constraints: { ...BASE_CONTRACT.constraints, max_size_kb: 1 } };
const wasm = makeMinimalWasm(2000); // > 1KB
const result = runSandboxAcceptance(wasm, contract);
const result = await runSandboxAcceptance(wasm, contract);
expect(result.success).toBe(false);
expect(result.failed_step).toBe('size_check');
expect(result.reason).toContain('超過上限');
@@ -57,37 +52,32 @@ describe('runSandboxAcceptance', () => {
expect(result.version).toBe('v1');
});
it('步驟 (c):含禁止 syscall 時失敗', () => {
// 在 wasm bytes 中嵌入禁止的 syscall 字串
const syscallStr = 'sock_connect';
it('步驟:含禁止 syscall 時失敗', async () => {
const encoder = new TextEncoder();
const syscallBytes = encoder.encode(syscallStr);
const syscallBytes = encoder.encode('sock_connect');
const wasm = new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, ...syscallBytes]);
const result = runSandboxAcceptance(wasm, BASE_CONTRACT);
const result = await runSandboxAcceptance(wasm, BASE_CONTRACT);
expect(result.success).toBe(false);
expect(result.failed_step).toBe('syscall_scan');
expect(result.reason).toContain('sock_connect');
expect(result.guide_anchor).toBe('#syscall-constraints');
});
it('步驟 (c):含 path_open 時失敗', () => {
const encoder = new TextEncoder();
const syscallBytes = encoder.encode('path_open');
const wasm = new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, ...syscallBytes]);
const result = runSandboxAcceptance(wasm, BASE_CONTRACT);
expect(result.success).toBe(false);
expect(result.failed_step).toBe('syscall_scan');
});
it('size_check 失敗後不執行後續步驟(含禁止 syscall 的大型 wasm', () => {
// 同時違反 size_check 和 syscall_scan
it('size_check 失敗後不執行後續步驟', async () => {
const encoder = new TextEncoder();
const syscallBytes = encoder.encode('sock_connect');
const padding = new Uint8Array(2000); // > 1KB
const padding = new Uint8Array(2000);
const wasm = new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, ...syscallBytes, ...padding]);
const contract = { ...BASE_CONTRACT, constraints: { ...BASE_CONTRACT.constraints, max_size_kb: 1 } };
const result = runSandboxAcceptance(wasm, contract);
// 應在 size_check 就停止,不到 syscall_scan
const result = await runSandboxAcceptance(wasm, contract);
expect(result.failed_step).toBe('size_check');
});
it('G1contract 含外部 URL 的假零件被擋(最先擋)', async () => {
const contract = { ...BASE_CONTRACT, canonical_id: 'fake_gmail', description: '打 https://gmail.googleapis.com 寄信' };
const wasm = makeMinimalWasm(10);
const result = await runSandboxAcceptance(wasm, contract);
expect(result.success).toBe(false);
expect(result.failed_step).toBe('fake_component_scan');
expect(result.reason).toContain('recipe');
});
});