feat: KBDB self-hosted 查詢 + embed 模組 + thin-shell 收窄 + search_workflow(code done 待端到端)

按 issue 分段標明(檔 #5/#8 改動交疊處無法乾淨拆檔,故併一個 commit):

#4 thin-shell §3.1 自力救濟階梯 + code-node 規則(純文檔/規則,code-node 零件未實作)
#5 KBDB source filter(json_extract metadata_json 零建表)+ 能力對照;documents 聚合與
   DELETE proxy 部分擱置等頂層 T8
#7 base embed 模組(kbdb/src/embed.ts)+ vectorize 開關(deploy/config/wrangler.toml 註解範本)
   + 語義查詢降級閉環(mode=semantic 未開→LIKE+capability_hint)
#8 部分(workflow-discovery):
   - KBDB /entries/search 加 base 通用 entry_type filter(entry-crud/embed/route/kbdb-proxy 透傳)
   - /webhooks/named 強制 description(空→400,訊息要求操盤 AI 據實寫一句)
   - 部署雙寫 entry_type=workflow embeddable entry(waitUntil 非阻塞,供 search)
   - cypher GET /workflows/search + MCP u6u_search_workflows(優先語意、降級 hint)
   - cypher POST /workflows/backfill-search-entries(無 desc 列出不編造)
   - GET /webhooks/named 補回 description/created_at 欄位(為 list 來源收斂備)

⚠️ tsc 綠 = code done,非完成(mindset §7 禁假綠):
- #7/#8 端到端待 leo21c 部署驗(Vectorize 需官方憑證、CC 跑不了)
- #8 ①-a(MCP deploy 改打 /webhooks/named)未做、MCP deploy 那半仍 404
- #8 端到端(強制填擋空/語義命中/租戶隔離/降級 hint)未驗

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
uncle6me-web
2026-06-27 17:52:52 +08:00
parent 013b55e97e
commit 934b9265d9
16 changed files with 610 additions and 33 deletions
+9 -6
View File
@@ -121,17 +121,20 @@ kbdbProxyRouter.get('/kbdb/records/:recordId', async (c) => {
// ── search(限本租戶範圍內)────────────────────────────────────────────────────
// GET /kbdb/search?q= — entries LIKE 關鍵字搜尋,限本租戶 owner_id。
// GET /kbdb/search?q=&entry_type=&source=&mode= — entries 搜尋,限本租戶 owner_id。
// 透傳 entry_typebase 通用 filterworkflow-discovery Q4/ source / mode 給 KBDB /entries/search。
kbdbProxyRouter.get('/kbdb/search', async (c) => {
const owner = tenant(c);
if (!owner) return c.json(NEED_KEY, 401);
const q = c.req.query('q');
if (!q) return c.json({ error: 'q 必填' }, 400);
const { base, headers } = kbdbBase(c.env);
const res = await fetch(
`${base}/entries/search?q=${encodeURIComponent(q)}&owner_id=${encodeURIComponent(owner)}`,
{ headers },
);
const params = new URLSearchParams({ q, owner_id: owner });
for (const k of ['entry_type', 'source', 'mode']) {
const v = c.req.query(k);
if (v) params.set(k, v);
}
const res = await fetch(`${base}/entries/search?${params.toString()}`, { headers });
return new Response(res.body, { status: res.status, headers: { 'Content-Type': 'application/json' } });
});
@@ -165,7 +168,7 @@ kbdbProxyRouter.get('/kbdb/entries', async (c) => {
const { base, headers } = kbdbBase(c.env);
const params = new URLSearchParams();
params.set('owner_id', owner); // 強制本租戶,不接受 caller 覆寫
for (const k of ['entry_type', 'parent_id', 'page_name', 'limit', 'offset']) {
for (const k of ['entry_type', 'parent_id', 'page_name', 'source', 'limit', 'offset']) {
const v = c.req.query(k);
if (v) params.set(k, v);
}
+140 -10
View File
@@ -53,6 +53,44 @@ function kvKey(apiKey: string, name: string): string {
return `${apiKey}:wf:${name}`;
}
/**
* workflow-discovery R2/Phase 2.1:部署時雙寫一個 embeddable entry 到 KBDB,讓 workflow 可被語意搜尋。
*
* 雙寫(design 方案 C):WEBHOOKS KV record 照舊(list/get/trigger 不動),另寫 entry_type=workflow 的
* entry 供 search。owner_id = api_key(租戶隔離,與 kbdb-proxy 同身份模型)。
* content = description(被 embed 的主體);metadata.embed:true → 命中 #7 精耕條件進 Vectorize(模組開時)。
*
* 非阻塞 + 失敗不致命(waitUntil + catch):search 可發現性是加值,不該擋部署成功(對齊 #7 embedOnWrite 慣例)。
* KBDB 連法沿用既有慣例(KBDB_BASE_URL fetch + 選用 token),不新增 service bindingrule 02 §3.1)。
*/
async function writeWorkflowSearchEntry(
env: Bindings,
apiKey: string,
name: string,
description: string,
workflowId?: string,
): Promise<void> {
const base = (env.KBDB_BASE_URL ?? 'https://arcrun-kbdb.uncle6-me.workers.dev').replace(/\/$/, '');
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
if (env.KBDB_INTERNAL_TOKEN) headers['Authorization'] = `Bearer ${env.KBDB_INTERNAL_TOKEN}`;
await fetch(`${base}/entries`, {
method: 'POST',
headers,
body: JSON.stringify({
entry_type: 'workflow',
owner_id: apiKey, // 租戶隔離(與 kbdb-proxy 同身份)
page_name: name,
content: description, // 被 embed / LIKE 命中的主體
// KBDB createEntry 吃 metadata_jsonTEXT),embed.ts isEmbeddable 讀 metadata_json.embed === true。
metadata_json: JSON.stringify({
embed: true, // #7 精耕開關:標 true 才進 Vectorize
workflow_name: name,
workflow_id: workflowId ?? name,
}),
}),
});
}
// POST /webhooks/named — 部署(acr push 呼叫)
webhooksNamedRouter.post('/webhooks/named', async (c) => {
const apiKey = c.req.header('X-Arcrun-API-Key');
@@ -72,6 +110,16 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
return c.json({ error: '缺少必要欄位:name, graph' }, 400);
}
// workflow-discovery R1description 強制非空(供語意搜尋,工作流可被發現)。
// 定位(Q2 定案):要求操盤的 AI 據實寫一句「這工作流能做什麼」,非逼 low-code 用戶手填、
// 非介面層機械塞佔位。空 → 擋下,由操盤 CC 據實補一句再部署(用戶可改)。
if (typeof body.description !== 'string' || body.description.trim() === '') {
return c.json({
error: 'description 必填:請操盤的 AI 據實寫一句「這工作流能做什麼」(如「呼叫可 Upsert Google Sheets」),用戶可再改。供語意搜尋用,不是寫文章。',
requires: 'description',
}, 400);
}
const name = body.name.trim();
if (!/^[\w-]+$/.test(name)) {
return c.json({ error: 'workflow name 只能包含英文字母、數字、底線和連字號' }, 400);
@@ -93,7 +141,7 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
name,
graph: body.graph,
config: body.config,
description: typeof body.description === 'string' ? body.description : '',
description: body.description.trim(), // R1:已驗非空(見上),存 trim 後的值
created_at: new Date().toISOString(),
cron_expr: cronExpr ?? undefined,
// 法律憑證:存人類明示同意(本次新同意或沿用既有)
@@ -107,6 +155,12 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
// (避免 push 改 yaml 拿掉 cron 後殘留)。scheduled() 每分鐘只 get 這一個 key。
await updateCronIndexEntry(c.env.WEBHOOKS, apiKey, name, cronExpr);
// workflow-discovery Phase 2.1:雙寫 embeddable search-entry(讓此 workflow 可被語意搜尋)。
// 非阻塞(waitUntil)+ 失敗不致命(catch):可發現性是加值,不擋部署成功(對齊 #7 embedOnWrite 慣例)。
c.executionCtx.waitUntil(
writeWorkflowSearchEntry(c.env, apiKey, name, record.description).catch(() => {}),
);
// Implicit telemetry (LI M1.2)
recordTelemetry(c.env, apiKey, {
event_type: 'deploy_success',
@@ -124,6 +178,75 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
}, 201);
});
// GET /workflows/search?q=&mode= — workflow-discovery R2:語意搜尋本租戶的工作流。
// 轉發 KBDB /entries/search(限 entry_type=workflow + 本租戶 owner_id)。優先語意、未開 Vectorize
// 降級 keyword + capability_hintKBDB 端已實作 #7 閉環,本端純轉發 + 注 entry_type/owner_id)。
// 形態對齊 u6u_search_components:自然語言 q 進、結果 + capability_hint 出。flag 安全:AI 主動 pull,無輪詢。
webhooksNamedRouter.get('/workflows/search', async (c) => {
const apiKey = c.req.header('X-Arcrun-API-Key');
if (!apiKey) return c.json({ error: '缺少 X-Arcrun-API-Key header' }, 401);
const q = c.req.query('q');
if (!q) return c.json({ error: 'q 必填:用自然語言描述要找的工作流(如「把資料寫進 Google Sheets」)' }, 400);
// 預設優先語意;caller 傳 mode=keyword 才強制關鍵字。KBDB 端未開 Vectorize 會自動降級。
const mode = c.req.query('mode') === 'keyword' ? 'keyword' : 'semantic';
const base = (c.env.KBDB_BASE_URL ?? 'https://arcrun-kbdb.uncle6-me.workers.dev').replace(/\/$/, '');
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
if (c.env.KBDB_INTERNAL_TOKEN) headers['Authorization'] = `Bearer ${c.env.KBDB_INTERNAL_TOKEN}`;
const params = new URLSearchParams({
q,
owner_id: apiKey, // 租戶隔離(只搜本租戶的 workflow)
entry_type: 'workflow', // base 通用 filterQ4),只回 workflow entry
mode,
});
const res = await fetch(`${base}/entries/search?${params.toString()}`, { headers });
return new Response(res.body, { status: res.status, headers: { 'Content-Type': 'application/json' } });
});
// POST /workflows/backfill-search-entries — workflow-discovery R3:把既有 workflow 補成可搜的 search-entry。
// 有 description 的 → 補寫 entry(讓它們可被 u6u_search_workflows 搜到);無 description 的 → 列出待 re-deploy。
// 誠實:不自動編造 description(無 desc 的只列出、不假裝)。flag 安全:人/AI 主動呼叫一次,非 cron/輪詢。
webhooksNamedRouter.post('/workflows/backfill-search-entries', async (c) => {
const apiKey = c.req.header('X-Arcrun-API-Key');
if (!apiKey) return c.json({ error: '缺少 X-Arcrun-API-Key header' }, 401);
const prefix = `${apiKey}:wf:`;
const list = await c.env.WEBHOOKS.list({ prefix });
const backfilled: string[] = [];
const needsDescription: string[] = [];
const errors: string[] = [];
for (const k of list.keys) {
const name = k.name.slice(prefix.length);
const raw = await c.env.WEBHOOKS.get(k.name, 'text');
if (!raw) continue;
const rec = JSON.parse(raw) as NamedWorkflowRecord;
const desc = rec.description?.trim();
if (!desc) {
// 不自動編造:無 description 的列出來,請操盤 CC re-deploy 時據實補(誠實,mindset §7)。
needsDescription.push(name);
continue;
}
try {
await writeWorkflowSearchEntry(c.env, apiKey, name, desc);
backfilled.push(name);
} catch (e) {
errors.push(`${name}: ${e instanceof Error ? e.message : String(e)}`);
}
}
return c.json({
backfilled,
backfilled_count: backfilled.length,
needs_description: needsDescription,
needs_description_count: needsDescription.length,
errors,
hint: needsDescription.length > 0
? `${needsDescription.length} 個工作流缺 description 無法被搜尋。請操盤的 AI re-deploy 它們時據實補一句「能做什麼」(不自動編造)。`
: undefined,
});
});
// POST /webhooks/named/migrate-cron-index — 一次性 migration8.P0):把舊的 per-key
// cron-idx:{apiKey}:{name} 折進單一 cron-idx:_all(這裡才 list 一次,非每分鐘 tick)。
// 增量寫、不刪舊 key(重跑安全、冪等)。部署 8.P0 後跑一次,讓既有 cron workflow 不漏掉。
@@ -241,16 +364,23 @@ webhooksNamedRouter.get('/webhooks/named', async (c) => {
const prefix = `${apiKey}:wf:`;
const list = await c.env.WEBHOOKS.list({ prefix });
const workflows = list.keys.map(k => {
const name = k.name.slice(prefix.length);
return { name };
});
// workflow-discovery 方向①:list 回完整欄位(description/created_at),讓 MCP u6u_list_workflows
// 改讀本端點時欄位齊(取代舊的讀 workflow_metadata record)。需 get 每個 record 取 description。
const baseUrl = new URL(c.req.url).origin;
const result = workflows.map(w => ({
name: w.name,
webhook_url: `${baseUrl}/webhooks/named/${w.name}/trigger`,
}));
const result = await Promise.all(
list.keys.map(async (k) => {
const name = k.name.slice(prefix.length);
const raw = await c.env.WEBHOOKS.get(k.name, 'text');
const rec = raw ? (JSON.parse(raw) as NamedWorkflowRecord) : null;
return {
name,
description: rec?.description ?? '',
created_at: rec?.created_at ?? '',
cron_expr: rec?.cron_expr,
webhook_url: `${baseUrl}/webhooks/named/${name}/trigger`,
};
}),
);
return c.json({ workflows: result, total: result.length });
});