feat(registry): component_hash_id — stable id system for workflow references

Problem: canonical_id is readable but mutable; if a component is renamed,
all workflows referencing it by canonical_id break.

Solution: dual-id system
- component_hash_id: cmp_{sha256(canonical_id).slice(0,8)}, derived deterministically,
  never changes, safe for workflow references
- canonical_id: human-readable name, used for search and display
- idx:{canonical_id} KV key: reverse-lookup index for resolving canonical_id → hash_id

Changes:
- types.ts: SandboxResult.component_id → component_hash_id + canonical_id,
  added 'data' to category enum
- submitComponent.ts: deriveHashId(), writes idx: reverse-lookup on submit
- queryComponents.ts: full rewrite — removed KBDB dependency, uses SUBMISSIONS_KV;
  supports both cmp_* and canonical_id as query id; Phase 0 keyword search
  with note to upgrade to Vectorize in Phase 2
- sandboxAcceptance.ts: updated field names, fixed TextDecoder TS type
- ensureTemplate.ts: removed KBDB dependency, now a KV health check
- tests: updated component_id → canonical_id
- CONTRIBUTING.md: explain hash_id derivation and dual-id workflow reference syntax

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-16 14:41:22 +08:00
parent d8028eabe0
commit 8e2c32e466
7 changed files with 177 additions and 178 deletions
+40 -12
View File
@@ -1,13 +1,31 @@
// 零件提交:沙盒驗收 → 寫入 SUBMISSIONS_KV → 上傳 R2
// 零件提交:沙盒驗收 → 派發 hash id → 寫入 SUBMISSIONS_KV → 上傳 R2
// Requirements: 2.1, 2.2, 2.3
//
// arcrun registry 不依賴 KBDBInkStone 內部服務)。
// 零件元數據存入 SUBMISSIONS_KVkey = comp:{canonical_id}:{version})。
// WASM 二進位存入 WASM_BUCKET R2key = components/{id}/{version}.wasm)。
// KV key 設計:
// comp:{hash_id}:{version} → 零件元數據 JSON
// idx:{canonical_id} → hash_id 反查索引(canonical_id → hash_id
//
// hash_id 派發規則:
// hash_id = 'cmp_' + sha256(canonical_id).slice(0, 8)
// 相同 canonical_id 永遠得到相同 hash_id(冪等)
// 不同 canonical_id 的 hash_id 碰撞機率極低(2^32 空間)
import { runSandboxAcceptance } from './sandboxAcceptance';
import type { ComponentContract, SandboxResult, Bindings } from '../types';
// ── hash id 生成 ─────────────────────────────────────────────────────────────
async function deriveHashId(canonicalId: string): Promise<string> {
const encoder = new TextEncoder();
const data = encoder.encode(canonicalId);
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return 'cmp_' + hex.slice(0, 8);
}
// ── 主流程 ────────────────────────────────────────────────────────────────────
export async function submitComponent(
wasmBytes: Uint8Array,
contract: ComponentContract,
@@ -19,32 +37,36 @@ export async function submitComponent(
return sandboxResult;
}
const kvKey = `comp:${contract.canonical_id}:${contract.version}`;
const r2Key = `components/${contract.canonical_id}/${contract.version}.wasm`;
// 2. 派發 hash idcanonical_id 的確定性 hash,相同輸入永遠得到相同 id)
const hashId = await deriveHashId(contract.canonical_id);
// 2. 冪等:若已存在相同 (id, version) 直接回傳
const kvKey = `comp:${hashId}:${contract.version}`;
const r2Key = `components/${hashId}/${contract.version}.wasm`;
// 3. 冪等:若已存在相同 (hash_id, version) 直接回傳
const existing = await env.SUBMISSIONS_KV.get(kvKey);
if (existing) {
return {
success: true,
component_id: contract.canonical_id,
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
wasm_r2_key: r2Key,
};
}
// 3. 上傳 .wasm 至 R2
// 4. 上傳 .wasm 至 R2
await env.WASM_BUCKET.put(r2Key, wasmBytes, {
httpMetadata: { contentType: 'application/wasm' },
});
// 4. 寫入 SUBMISSIONS_KV(元數據 + 初始統計)
// 5. 寫入 SUBMISSIONS_KV(元數據 + 初始統計)
const record = {
component_hash_id: hashId,
canonical_id: contract.canonical_id,
display_name: contract.display_name,
category: contract.category,
version: contract.version,
author: contract.author ?? '',
wasi_target: contract.wasi_target,
stability: contract.stability,
runtime_compat: contract.runtime_compat,
@@ -55,6 +77,7 @@ export async function submitComponent(
gherkin_tests: contract.gherkin_tests,
wasm_r2_key: r2Key,
description: contract.description ?? '',
aliases: contract.aliases ?? [],
tags: contract.tags ?? [],
// 初始統計
success_rate: 1,
@@ -69,9 +92,14 @@ export async function submitComponent(
await env.SUBMISSIONS_KV.put(kvKey, JSON.stringify(record));
// 6. 寫入 canonical_id → hash_id 反查索引
// 同一個 canonical_id 的所有版本共用同一個 hash_id,索引只需存一份
await env.SUBMISSIONS_KV.put(`idx:${contract.canonical_id}`, hashId);
return {
success: true,
component_id: contract.canonical_id,
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
wasm_r2_key: r2Key,
};