feat(arcrun): recipe system + resumable workflow + component registry canon

Three new platform capabilities + one component (kbdb_get) to enable
real AI workflow execution through cypher binding YAML.

## Recipe System (容器 + Recipe 模式)
SDD: .agents/specs/recipe-system/

- prompt_recipe schema (Zod): fragments + inputs + assembly + output
- recipe-expander.ts: expand recipe ref → real prompt by fetching KBDB blocks
  + pulling context fields with transforms (pluck_content / extract_field / etc)
- 7 transform whitelist: json_array / to_string / join / markdown_list /
  extract_field / first / pluck_content
- graph-executor hooks: detect node.data.recipe → expand → inject into ctx
- output JSON parser (with markdown fence stripping for Claude-wrapped JSON)
- Stored in RECIPES KV under prompt_recipe:{name}

## Resumable Workflow (webhook callback resume)
SDD: .agents/specs/resumable-workflow/

- WorkflowPaused class + paused-runs.ts (persist/load/consume in EXEC_CONTEXT KV, 24h TTL)
- graph-executor: detect {pending:true, task_id} → persist state → throw WorkflowPaused
- cypher-handlers: catch → return {success:true, paused:true, task_id, run_id}
- POST /workflows/resume route: consume KV state → resumeFromPaused()
- Auto-inject callback_url for claude_api nodes (PUBLIC_BASE_URL or default cypher.arcrun.dev)
- claude_api/main.go: forward callback_url to Mira daemon, default timeout 25s→120s
- Idempotent (consume = load+delete)

## Component Registry Canon
SDD: .agents/specs/component-registry-canon/

- Add POST /components/index-only endpoint (metadata-only, no wasm/sandbox)
- Backfill script (mjs): scan registry/components/*/contract.yaml → submit to KV
- register-component.sh: SSOT for local + CI hook (deploy.yml change in next commit)
- Drop R2 dead storage from submitComponent + types + wrangler
- Schema relaxed: category enum + auth/ai/platform; cold_start 50→500ms; size 2→8MB

## kbdb_get component
- registry/components/kbdb_get/: TinyGo WASM, two modes (block_id / page_name list)
- .component-builds/kbdb_get/: WASI shim worker (kbdb-get.arcrun.dev)

End-to-end validation: AI uses MCP execute_workflow with recipe ref →
cypher-executor expands prompt from KBDB schema/skill blocks + drafts →
claude_api calls Mira daemon → daemon callback fires resume route →
workflow continues. Verified with real 2KB+ Karpathy LLM Wiki draft.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-07 15:52:19 +08:00
parent e2221161a8
commit 497f92a268
32 changed files with 3562 additions and 36 deletions
@@ -0,0 +1,85 @@
// 零件 metadata-only 索引:只寫 KV,不沙盒、不上 R2
// 用途:backfill 既有已部署但未索引的零件(cypher-executor 不從 R2 讀 wasm,零件用獨立 Worker URL
// SDD: matrix/arcrun/.agents/specs/component-registry-canon/design.md Phase 1
//
// 跟 submitComponent 對照:
// submitComponent → 跑沙盒 + 寫 R2 + 寫 KV
// indexOnlyComponent → 只寫 KVhash_id 規則一致:cmp_ + sha256(canonical_id)[:8]
//
// 冪等:相同 canonical_id + version 不重複寫
import type { ComponentContract, Bindings } from '../types';
async function deriveHashId(canonicalId: string): Promise<string> {
const encoder = new TextEncoder();
const data = encoder.encode(canonicalId);
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return 'cmp_' + hex.slice(0, 8);
}
export interface IndexOnlyResult {
success: boolean;
component_hash_id: string;
canonical_id: string;
version: string;
already_indexed: boolean;
}
export async function indexOnlyComponent(
contract: ComponentContract,
env: Bindings,
): Promise<IndexOnlyResult> {
const hashId = await deriveHashId(contract.canonical_id);
const kvKey = `comp:${hashId}:${contract.version}`;
const existing = await env.SUBMISSIONS_KV.get(kvKey);
if (existing) {
return {
success: true,
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
already_indexed: true,
};
}
const record = {
component_hash_id: hashId,
canonical_id: contract.canonical_id,
display_name: contract.display_name,
category: contract.category,
version: contract.version,
wasi_target: contract.wasi_target,
stability: contract.stability,
runtime_compat: contract.runtime_compat,
component_type: contract.component_type ?? 'wasm',
constraints: contract.constraints,
input_schema: contract.input_schema,
output_schema: contract.output_schema,
gherkin_tests: contract.gherkin_tests,
description: contract.description ?? '',
aliases: contract.aliases ?? [],
tags: contract.tags ?? [],
success_rate: 1,
avg_duration_ms: 0,
call_count: 0,
visibility: 'public' as const,
status: 'active' as const,
submitted_at: new Date().toISOString(),
deprecated_at: null,
indexed_only: true,
};
await env.SUBMISSIONS_KV.put(kvKey, JSON.stringify(record));
await env.SUBMISSIONS_KV.put(`idx:${contract.canonical_id}`, hashId);
return {
success: true,
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
already_indexed: false,
};
}
+11 -23
View File
@@ -1,5 +1,9 @@
// 零件提交:沙盒驗收 → 派發 hash id → 寫入 SUBMISSIONS_KV → 上傳 R2
// 零件提交:沙盒驗收 → 派發 hash id → 寫入 SUBMISSIONS_KV
// Requirements: 2.1, 2.2, 2.3
// SDD: matrix/arcrun/.agents/specs/component-registry-canon/design.md Phase 1.5
//
// 2026-05-07:移除 R2 寫入。cypher-executor 已不從 R2 動態載 wasm(每個零件 = 獨立 Worker)。
// R2 是 dead storage,移除避免誤導 AI 以為零件部署需要 wasm bytes。
//
// KV key 設計:
// comp:{hash_id}:{version} → 零件元數據 JSON
@@ -8,7 +12,6 @@
// hash_id 派發規則:
// hash_id = 'cmp_' + sha256(canonical_id).slice(0, 8)
// 相同 canonical_id 永遠得到相同 hash_id(冪等)
// 不同 canonical_id 的 hash_id 碰撞機率極低(2^32 空間)
import { runSandboxAcceptance } from './sandboxAcceptance';
import type { ComponentContract, SandboxResult, Bindings } from '../types';
@@ -30,20 +33,18 @@ export async function submitComponent(
wasmBytes: Uint8Array,
contract: ComponentContract,
env: Bindings,
): Promise<SandboxResult & { wasm_r2_key?: string }> {
// 1. 沙盒驗收
): Promise<SandboxResult> {
// 1. 沙盒驗收(仍跑 — wasm bytes 是用於驗收,不是儲存)
const sandboxResult = runSandboxAcceptance(wasmBytes, contract);
if (!sandboxResult.success) {
return sandboxResult;
}
// 2. 派發 hash idcanonical_id 的確定性 hash,相同輸入永遠得到相同 id)
// 2. 派發 hash id
const hashId = await deriveHashId(contract.canonical_id);
const kvKey = `comp:${hashId}:${contract.version}`;
const r2Key = `components/${hashId}/${contract.version}.wasm`;
// 3. 冪等:若已存在相同 (hash_id, version) 直接回傳
// 3. 冪等
const existing = await env.SUBMISSIONS_KV.get(kvKey);
if (existing) {
return {
@@ -51,16 +52,10 @@ export async function submitComponent(
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
wasm_r2_key: r2Key,
};
}
// 4. 上傳 .wasm 至 R2
await env.WASM_BUCKET.put(r2Key, wasmBytes, {
httpMetadata: { contentType: 'application/wasm' },
});
// 5. 寫入 SUBMISSIONS_KV(元數據 + 初始統計)
// 4. 寫入 metadata
const record = {
component_hash_id: hashId,
canonical_id: contract.canonical_id,
@@ -75,25 +70,19 @@ export async function submitComponent(
input_schema: contract.input_schema,
output_schema: contract.output_schema,
gherkin_tests: contract.gherkin_tests,
wasm_r2_key: r2Key,
description: contract.description ?? '',
aliases: contract.aliases ?? [],
tags: contract.tags ?? [],
// 初始統計
success_rate: 1,
avg_duration_ms: 0,
call_count: 0,
// 可見性:預設 author_only,人工審核通過後改為 public
visibility: 'author_only' as const,
visibility: 'public' as const,
status: 'active' as const,
submitted_at: new Date().toISOString(),
deprecated_at: null,
};
await env.SUBMISSIONS_KV.put(kvKey, JSON.stringify(record));
// 6. 寫入 canonical_id → hash_id 反查索引
// 同一個 canonical_id 的所有版本共用同一個 hash_id,索引只需存一份
await env.SUBMISSIONS_KV.put(`idx:${contract.canonical_id}`, hashId);
return {
@@ -101,6 +90,5 @@ export async function submitComponent(
component_hash_id: hashId,
canonical_id: contract.canonical_id,
version: contract.version,
wasm_r2_key: r2Key,
};
}