feat(registry): component_hash_id — stable id system for workflow references

Problem: canonical_id is readable but mutable; if a component is renamed,
all workflows referencing it by canonical_id break.

Solution: dual-id system
- component_hash_id: cmp_{sha256(canonical_id).slice(0,8)}, derived deterministically,
  never changes, safe for workflow references
- canonical_id: human-readable name, used for search and display
- idx:{canonical_id} KV key: reverse-lookup index for resolving canonical_id → hash_id

Changes:
- types.ts: SandboxResult.component_id → component_hash_id + canonical_id,
  added 'data' to category enum
- submitComponent.ts: deriveHashId(), writes idx: reverse-lookup on submit
- queryComponents.ts: full rewrite — removed KBDB dependency, uses SUBMISSIONS_KV;
  supports both cmp_* and canonical_id as query id; Phase 0 keyword search
  with note to upgrade to Vectorize in Phase 2
- sandboxAcceptance.ts: updated field names, fixed TextDecoder TS type
- ensureTemplate.ts: removed KBDB dependency, now a KV health check
- tests: updated component_id → canonical_id
- CONTRIBUTING.md: explain hash_id derivation and dual-id workflow reference syntax

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-16 14:41:22 +08:00
parent d8028eabe0
commit 8e2c32e466
7 changed files with 177 additions and 178 deletions
+6
View File
@@ -53,6 +53,12 @@ registry/components/my_component/
所有語言共用相同的合約格式:
```yaml
# component_hash_id 由 Registry 在提交時自動派發,格式為 cmp_{8碼hex}
# 提交者不需要填這個欄位,Registry 會根據 canonical_id 確定性生成
# Workflow 引用零件時,用 component_hash_id 才能保證永久不壞:
# component://cmp_a3f9b2c1 ← 推薦,即使 canonical_id 改名也不受影響
# component://string_reverse ← 方便,AI 寫 workflow 時用這個,Registry 自動解析
canonical_id: "string_reverse" # 見下方命名規範
display_name: "字串反轉" # 人類可讀,可中文,供 UI 顯示用
description: > # 語意搜尋用,見下方說明
+12 -57
View File
@@ -1,66 +1,21 @@
// 確保 KBDB 中存在 tpl-component Template Block
// ensureTemplate — 確保 SUBMISSIONS_KV 可正常存取(健康檢查用)
// Requirements: 12.1
//
// 原本此模組負責在 KBDB 建立 tpl-component Template Block。
// 已改為 SUBMISSIONS_KV 模式後,不再需要預建 Template。
// 此函式改為驗證 KV binding 是否正常,供 /init 端點呼叫。
import type { Bindings } from '../types';
const TEMPLATE_ID = 'tpl-component';
const SLOT_KEYS = [
'canonical_id',
'display_name',
'category',
'version',
'wasi_target',
'stability',
'runtime_compat',
'component_type',
'max_size_kb',
'max_cold_start_ms',
'no_network_syscall',
'input_schema',
'output_schema',
'gherkin_tests',
'wasm_r2_key',
'cypher_binding_url',
'service_binding_key',
'description',
'tags',
'success_rate',
'avg_duration_ms',
'call_count',
'status',
'deprecated_at',
];
export async function ensureTemplate(env: Bindings): Promise<{ created: boolean; template_id: string }> {
const kbdbUrl = env.KBDB_URL || 'https://kbdb.finally.click';
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${env.KBDB_INTERNAL_TOKEN}`,
};
// 寫入並讀取一個測試 key,確認 KV binding 正常
const testKey = '_init_health_check';
await env.SUBMISSIONS_KV.put(testKey, '1', { expirationTtl: 60 });
const val = await env.SUBMISSIONS_KV.get(testKey);
// 先嘗試取得現有 template
const getRes = await fetch(`${kbdbUrl}/templates/${TEMPLATE_ID}`, { headers });
if (getRes.ok) {
return { created: false, template_id: TEMPLATE_ID };
if (val !== '1') {
throw new Error('SUBMISSIONS_KV binding 異常:寫入後無法讀取');
}
// 不存在則建立
const createRes = await fetch(`${kbdbUrl}/templates`, {
method: 'POST',
headers,
body: JSON.stringify({
template_id: TEMPLATE_ID,
name: 'Component',
description: 'u6u 零件合約 Template,每個零件版本對應一個 Block',
slot_keys: SLOT_KEYS,
}),
});
if (!createRes.ok) {
const errText = await createRes.text();
throw new Error(`建立 tpl-component 失敗(${createRes.status}):${errText.slice(0, 200)}`);
}
return { created: true, template_id: TEMPLATE_ID };
return { created: true, template_id: 'submissions_kv' };
}
+100 -100
View File
@@ -1,9 +1,13 @@
// queryComponents — 查詢零件合約與語意搜尋
// queryComponents — 查詢零件合約
// 支援兩種查詢 id
// component_hash_idcmp_xxxxxxxx)— 永久穩定,workflow 引用用
// canonical_id(小寫底線) — 可讀名稱,透過 idx: 反查索引解析
// Requirements: 12.2, 12.3
import type { Bindings } from '../types';
export interface ComponentVersion {
export interface ComponentRecord {
component_hash_id: string;
canonical_id: string;
display_name: string;
version: string;
@@ -11,137 +15,135 @@ export interface ComponentVersion {
stability: string;
status: string;
description: string;
aliases: string[];
tags: string[];
success_rate: number;
avg_duration_ms: number;
call_count: number;
wasm_r2_key?: string;
cypher_binding_url?: string;
score: number;
}
/** 從 KBDB 取得零件的最優版本合約 */
// ── id 解析:支援 hash_id 和 canonical_id 兩種格式 ──────────────────────────
async function resolveHashId(id: string, env: Bindings): Promise<string | null> {
// 已經是 hash_id 格式
if (id.startsWith('cmp_')) return id;
// canonical_id → 透過 idx: 反查索引
const hashId = await env.SUBMISSIONS_KV.get(`idx:${id}`);
return hashId;
}
// ── 取得零件的所有版本 ────────────────────────────────────────────────────────
async function listVersions(hashId: string, env: Bindings): Promise<ComponentRecord[]> {
const prefix = `comp:${hashId}:`;
const list = await env.SUBMISSIONS_KV.list({ prefix });
const records: ComponentRecord[] = [];
for (const key of list.keys) {
const raw = await env.SUBMISSIONS_KV.get(key.name);
if (!raw) continue;
try {
const v = JSON.parse(raw);
if (v.status === 'tombstone') continue;
records.push(toComponentRecord(v));
} catch {
continue;
}
}
return records;
}
// ── 公開 API ──────────────────────────────────────────────────────────────────
/** 取得零件最優版本(floating 策略:成功率 × 速度 × log(使用次數)) */
export async function getComponent(
canonicalId: string,
id: string,
env: Bindings,
): Promise<ComponentVersion | null> {
const kbdbUrl = env.KBDB_URL || 'https://kbdb.finally.click';
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${env.KBDB_INTERNAL_TOKEN}`,
};
): Promise<ComponentRecord | null> {
const hashId = await resolveHashId(id, env);
if (!hashId) return null;
// 搜尋所有版本(block_id 前綴 comp-{id}-
const res = await fetch(
`${kbdbUrl}/records/search?template_id=tpl-component&canonical_id=${encodeURIComponent(canonicalId)}&limit=20`,
{ headers },
);
const versions = await listVersions(hashId, env);
if (versions.length === 0) return null;
if (!res.ok) return null;
const data = await res.json() as { records?: Array<{ record_id: string; values: Record<string, string> }> };
const records = (data.records ?? []).filter(r =>
r.values.canonical_id === canonicalId && r.values.status !== 'tombstone'
);
if (records.length === 0) return null;
// 選取評分最高的版本(floating 策略)
const scored = records.map(r => ({
...r.values,
score: computeScore(r.values),
}));
scored.sort((a, b) => b.score - a.score);
const best = scored[0];
return toComponentVersion(best);
versions.sort((a, b) => b.score - a.score);
return versions[0];
}
/** 取得零件所有版本清單(含評分排序) */
export async function getComponentVersions(
canonicalId: string,
id: string,
env: Bindings,
): Promise<ComponentVersion[]> {
const kbdbUrl = env.KBDB_URL || 'https://kbdb.finally.click';
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${env.KBDB_INTERNAL_TOKEN}`,
};
): Promise<ComponentRecord[]> {
const hashId = await resolveHashId(id, env);
if (!hashId) return [];
const res = await fetch(
`${kbdbUrl}/records/search?template_id=tpl-component&canonical_id=${encodeURIComponent(canonicalId)}&limit=20`,
{ headers },
);
if (!res.ok) return [];
const data = await res.json() as { records?: Array<{ record_id: string; values: Record<string, string> }> };
const records = (data.records ?? []).filter(r =>
r.values.canonical_id === canonicalId && r.values.status !== 'tombstone'
);
return records
.map(r => ({ ...r.values, score: computeScore(r.values) }))
.sort((a, b) => b.score - a.score)
.slice(0, 10)
.map(toComponentVersion);
const versions = await listVersions(hashId, env);
versions.sort((a, b) => b.score - a.score);
return versions.slice(0, 10);
}
/** 語意搜尋零件(透過 KBDB Vectorize */
/** 關鍵字搜尋(掃描 KV prefix comp:,比對 canonical_id / display_name / description / aliases
*
* 注意:這是 Phase 0 的純文字比對版本。
* Phase 2 接入 Cloudflare Vectorize 後改為語意搜尋,API 介面不變。
*/
export async function searchComponents(
query: string,
env: Bindings,
): Promise<ComponentVersion[]> {
const kbdbUrl = env.KBDB_URL || 'https://kbdb.finally.click';
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${env.KBDB_INTERNAL_TOKEN}`,
};
): Promise<ComponentRecord[]> {
const q = query.toLowerCase();
// 透過 KBDB 語意搜尋(Vectorize
const res = await fetch(`${kbdbUrl}/search`, {
method: 'POST',
headers,
body: JSON.stringify({
query,
type: 'suggest',
topK: 10,
filter: { template_id: 'tpl-component' },
}),
});
// 列出所有 comp: 前綴的 key(只取最新一頁,最多 1000 個
const list = await env.SUBMISSIONS_KV.list({ prefix: 'comp:' });
if (!res.ok) return [];
const seen = new Set<string>(); // 每個 hash_id 只取最優版本
const candidates: ComponentRecord[] = [];
const data = await res.json() as { matches?: Array<{ block_id: string; score: number; metadata?: Record<string, string> }> };
const matches = data.matches ?? [];
for (const key of list.keys) {
const raw = await env.SUBMISSIONS_KV.get(key.name);
if (!raw) continue;
let v: Record<string, unknown>;
try { v = JSON.parse(raw); } catch { continue; }
// 取得每個匹配的完整合約
const results: ComponentVersion[] = [];
for (const match of matches.slice(0, 10)) {
const blockRes = await fetch(`${kbdbUrl}/records/${match.block_id}`, { headers });
if (!blockRes.ok) continue;
const block = await blockRes.json() as { values: Record<string, string> };
if (block.values.status === 'tombstone') continue;
results.push(toComponentVersion({ ...block.values, score: match.score }));
if (v.status === 'tombstone' || v.visibility !== 'public') continue;
// 比對:canonical_id / display_name / description / aliases
const searchable = [
String(v.canonical_id ?? ''),
String(v.display_name ?? ''),
String(v.description ?? ''),
...(Array.isArray(v.aliases) ? v.aliases.map(String) : []),
...(Array.isArray(v.tags) ? v.tags.map(String) : []),
].join(' ').toLowerCase();
if (!searchable.includes(q)) continue;
const hashId = String(v.component_hash_id ?? '');
if (seen.has(`${hashId}:${v.version}`)) continue;
seen.add(`${hashId}:${v.version}`);
candidates.push(toComponentRecord(v));
}
return results;
candidates.sort((a, b) => b.score - a.score);
return candidates.slice(0, 10);
}
// ── 內部工具函數 ──────────────────────────────────────────────────────────────
/** 計算零件評分:成功率 × 速度評分 × log(被調用次數 + 1) */
function computeScore(v: Record<string, string>): number {
const successRate = parseFloat(v.success_rate ?? '1');
const avgDuration = parseFloat(v.avg_duration_ms ?? '10');
const callCount = parseInt(v.call_count ?? '0', 10);
// 速度評分:越快越高,50ms 為基準
function computeScore(v: Record<string, unknown>): number {
const successRate = parseFloat(String(v.success_rate ?? '1'));
const avgDuration = parseFloat(String(v.avg_duration_ms ?? '10'));
const callCount = parseInt(String(v.call_count ?? '0'), 10);
const speedScore = Math.max(0, 1 - avgDuration / 1000);
return successRate * speedScore * Math.log(callCount + 2);
}
function toComponentVersion(v: Record<string, string | number>): ComponentVersion {
function toComponentRecord(v: Record<string, unknown>): ComponentRecord {
return {
component_hash_id: String(v.component_hash_id ?? ''),
canonical_id: String(v.canonical_id ?? ''),
display_name: String(v.display_name ?? ''),
version: String(v.version ?? 'v1'),
@@ -149,14 +151,12 @@ function toComponentVersion(v: Record<string, string | number>): ComponentVersio
stability: String(v.stability ?? 'floating'),
status: String(v.status ?? 'active'),
description: String(v.description ?? ''),
tags: (() => {
try { return JSON.parse(String(v.tags ?? '[]')); } catch { return []; }
})(),
aliases: Array.isArray(v.aliases) ? v.aliases.map(String) : [],
tags: Array.isArray(v.tags) ? v.tags.map(String) : [],
success_rate: parseFloat(String(v.success_rate ?? '1')),
avg_duration_ms: parseFloat(String(v.avg_duration_ms ?? '0')),
call_count: parseInt(String(v.call_count ?? '0'), 10),
wasm_r2_key: v.wasm_r2_key ? String(v.wasm_r2_key) : undefined,
cypher_binding_url: v.cypher_binding_url ? String(v.cypher_binding_url) : undefined,
score: typeof v.score === 'number' ? v.score : parseFloat(String(v.score ?? '0')),
score: computeScore(v),
};
}
+5 -3
View File
@@ -28,7 +28,7 @@ function checkColdStart(_wasmBytes: Uint8Array, _contract: ComponentContract): s
function scanSyscalls(wasmBytes: Uint8Array): string | null {
// 將 .wasm binary 轉為文字,搜尋禁止的 import 字串
// WASM binary 中 import section 的函數名稱以 UTF-8 字串形式存在
const text = new TextDecoder('utf-8', { fatal: false }).decode(wasmBytes);
const text = new TextDecoder('utf-8').decode(wasmBytes);
for (const syscall of FORBIDDEN_SYSCALLS) {
if (text.includes(syscall)) {
@@ -82,7 +82,8 @@ export function runSandboxAcceptance(
failed_step: step.name,
reason: error,
guide_anchor: step.guideAnchor,
component_id: contract.canonical_id,
component_hash_id: '', // 驗收失敗時尚未派發 hash id
canonical_id: contract.canonical_id,
version: contract.version,
};
}
@@ -90,7 +91,8 @@ export function runSandboxAcceptance(
return {
success: true,
component_id: contract.canonical_id,
component_hash_id: '', // 由 submitComponent 在驗收通過後填入
canonical_id: contract.canonical_id,
version: contract.version,
};
}
+40 -12
View File
@@ -1,13 +1,31 @@
// 零件提交:沙盒驗收 → 寫入 SUBMISSIONS_KV → 上傳 R2
// 零件提交:沙盒驗收 → 派發 hash id → 寫入 SUBMISSIONS_KV → 上傳 R2
// Requirements: 2.1, 2.2, 2.3
//
// arcrun registry 不依賴 KBDBInkStone 內部服務)。
// 零件元數據存入 SUBMISSIONS_KVkey = comp:{canonical_id}:{version})。
// WASM 二進位存入 WASM_BUCKET R2key = components/{id}/{version}.wasm)。
// KV key 設計:
// comp:{hash_id}:{version} → 零件元數據 JSON
// idx:{canonical_id} → hash_id 反查索引(canonical_id → hash_id
//
// hash_id 派發規則:
// hash_id = 'cmp_' + sha256(canonical_id).slice(0, 8)
// 相同 canonical_id 永遠得到相同 hash_id(冪等)
// 不同 canonical_id 的 hash_id 碰撞機率極低(2^32 空間)
import { runSandboxAcceptance } from './sandboxAcceptance';
import type { ComponentContract, SandboxResult, Bindings } from '../types';
// ── hash id 生成 ─────────────────────────────────────────────────────────────
async function deriveHashId(canonicalId: string): Promise<string> {
const encoder = new TextEncoder();
const data = encoder.encode(canonicalId);
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return 'cmp_' + hex.slice(0, 8);
}
// ── 主流程 ────────────────────────────────────────────────────────────────────
export async function submitComponent(
wasmBytes: Uint8Array,
contract: ComponentContract,
@@ -19,32 +37,36 @@ export async function submitComponent(
return sandboxResult;
}
const kvKey = `comp:${contract.canonical_id}:${contract.version}`;
const r2Key = `components/${contract.canonical_id}/${contract.version}.wasm`;
// 2. 派發 hash idcanonical_id 的確定性 hash,相同輸入永遠得到相同 id)
const hashId = await deriveHashId(contract.canonical_id);
// 2. 冪等:若已存在相同 (id, version) 直接回傳
const kvKey = `comp:${hashId}:${contract.version}`;
const r2Key = `components/${hashId}/${contract.version}.wasm`;
// 3. 冪等:若已存在相同 (hash_id, version) 直接回傳
const existing = await env.SUBMISSIONS_KV.get(kvKey);
if (existing) {
return {
success: true,
component_id: contract.canonical_id,
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
wasm_r2_key: r2Key,
};
}
// 3. 上傳 .wasm 至 R2
// 4. 上傳 .wasm 至 R2
await env.WASM_BUCKET.put(r2Key, wasmBytes, {
httpMetadata: { contentType: 'application/wasm' },
});
// 4. 寫入 SUBMISSIONS_KV(元數據 + 初始統計)
// 5. 寫入 SUBMISSIONS_KV(元數據 + 初始統計)
const record = {
component_hash_id: hashId,
canonical_id: contract.canonical_id,
display_name: contract.display_name,
category: contract.category,
version: contract.version,
author: contract.author ?? '',
wasi_target: contract.wasi_target,
stability: contract.stability,
runtime_compat: contract.runtime_compat,
@@ -55,6 +77,7 @@ export async function submitComponent(
gherkin_tests: contract.gherkin_tests,
wasm_r2_key: r2Key,
description: contract.description ?? '',
aliases: contract.aliases ?? [],
tags: contract.tags ?? [],
// 初始統計
success_rate: 1,
@@ -69,9 +92,14 @@ export async function submitComponent(
await env.SUBMISSIONS_KV.put(kvKey, JSON.stringify(record));
// 6. 寫入 canonical_id → hash_id 反查索引
// 同一個 canonical_id 的所有版本共用同一個 hash_id,索引只需存一份
await env.SUBMISSIONS_KV.put(`idx:${contract.canonical_id}`, hashId);
return {
success: true,
component_id: contract.canonical_id,
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
wasm_r2_key: r2Key,
};
+12 -4
View File
@@ -7,8 +7,11 @@ import { z } from 'zod';
export type Bindings = {
WASM_BUCKET: R2Bucket;
AI: Ai;
SUBMISSIONS_KV: KVNamespace; // 零件元數據 + 可見性狀態(key = comp:{id}:{version}
ANALYTICS_KV: KVNamespace; // 執行統計匯總(key = stats:{id}:{version}
// KV key 格式:
// comp:{hash_id}:{version} → 零件元數據(hash_id = cmp_ + sha256 前 8 碼
// idx:{canonical_id} → canonical_id → hash_id 反查索引
SUBMISSIONS_KV: KVNamespace;
ANALYTICS_KV: KVNamespace; // 執行統計匯總(key = stats:{hash_id}:{version}
ENVIRONMENT: string;
};
@@ -28,9 +31,12 @@ export const GherkinTestSchema = z.object({
});
export const ComponentContractSchema = z.object({
// canonical_id:提交者填寫的可讀名稱(小寫底線),用於搜尋與 workflow 引用
// component_hash_id:由 Registry 在提交時派發,格式 cmp_{8碼hex}workflow 引用此 id 才能保證永久不壞
// 兩者都可以在 workflow 中引用,Registry 會互相解析
canonical_id: z.string().min(1).regex(/^[a-z][a-z0-9_]*$/, 'canonical_id 必須為小寫底線格式'),
display_name: z.string().min(1),
category: z.enum(['logic', 'api', 'ui', 'style', 'anim']),
category: z.enum(['logic', 'api', 'ui', 'style', 'anim', 'data']),
version: z.string().min(1).regex(/^v\d+$/, 'version 格式必須為 vN'),
wasi_target: z.literal('preview1'),
stability: z.enum(['floating', 'stable', 'pinned']),
@@ -64,7 +70,9 @@ export interface SandboxResult {
failed_step?: SandboxStep;
reason?: string;
guide_anchor?: string;
component_id: string;
// 驗收通過後回傳兩個 id
component_hash_id: string; // cmp_{8碼hex}workflow 引用用,永久不變
canonical_id: string; // 可讀名稱,搜尋用
version: string;
}
+2 -2
View File
@@ -40,7 +40,7 @@ describe('runSandboxAcceptance', () => {
const wasm = makeMinimalWasm(10);
const result = runSandboxAcceptance(wasm, BASE_CONTRACT);
expect(result.success).toBe(true);
expect(result.component_id).toBe('validate_json');
expect(result.canonical_id).toBe('validate_json');
expect(result.version).toBe('v1');
});
@@ -53,7 +53,7 @@ describe('runSandboxAcceptance', () => {
expect(result.failed_step).toBe('size_check');
expect(result.reason).toContain('超過上限');
expect(result.guide_anchor).toBeDefined();
expect(result.component_id).toBe('validate_json');
expect(result.canonical_id).toBe('validate_json');
expect(result.version).toBe('v1');
});