feat: KBDB self-hosted 查詢 + embed 模組 + thin-shell 收窄 + search_workflow(code done 待端到端)

按 issue 分段標明(檔 #5/#8 改動交疊處無法乾淨拆檔,故併一個 commit):

#4 thin-shell §3.1 自力救濟階梯 + code-node 規則(純文檔/規則,code-node 零件未實作)
#5 KBDB source filter(json_extract metadata_json 零建表)+ 能力對照;documents 聚合與
   DELETE proxy 部分擱置等頂層 T8
#7 base embed 模組(kbdb/src/embed.ts)+ vectorize 開關(deploy/config/wrangler.toml 註解範本)
   + 語義查詢降級閉環(mode=semantic 未開→LIKE+capability_hint)
#8 部分(workflow-discovery):
   - KBDB /entries/search 加 base 通用 entry_type filter(entry-crud/embed/route/kbdb-proxy 透傳)
   - /webhooks/named 強制 description(空→400,訊息要求操盤 AI 據實寫一句)
   - 部署雙寫 entry_type=workflow embeddable entry(waitUntil 非阻塞,供 search)
   - cypher GET /workflows/search + MCP u6u_search_workflows(優先語意、降級 hint)
   - cypher POST /workflows/backfill-search-entries(無 desc 列出不編造)
   - GET /webhooks/named 補回 description/created_at 欄位(為 list 來源收斂備)

⚠️ tsc 綠 = code done,非完成(mindset §7 禁假綠):
- #7/#8 端到端待 leo21c 部署驗(Vectorize 需官方憑證、CC 跑不了)
- #8 ①-a(MCP deploy 改打 /webhooks/named)未做、MCP deploy 那半仍 404
- #8 端到端(強制填擋空/語義命中/租戶隔離/降級 hint)未驗

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
uncle6me-web
2026-06-27 17:52:52 +08:00
parent 013b55e97e
commit 934b9265d9
16 changed files with 610 additions and 33 deletions
+25 -9
View File
@@ -170,23 +170,39 @@ export function registerQuery(server: McpServer, env: Env) {
);
}
/** kbdb_search — 對 entries 做 D1 LIKE 關鍵字搜尋(基本盤,非語義)。 */
/**
* kbdb_search — 對 entries 做搜尋。mode=keywordD1 LIKE,基本盤永遠可用)或 semantic(向量,需開 embed 模組)。
* 語義/關鍵字都在同一 KBDB MCP(用戶資料 RAG),不分散(issue #7 / D17 邊界)。
* mode=semantic 但沒開 vectorize → base 自動降級 keyword + 回 capability_hint(發現閉環,叫 CC 幫開)。
*/
export function registerSearch(server: McpServer, env: Env) {
server.tool(
"kbdb_search",
" KBDB 內容做關鍵字搜尋(D1 LIKE,基本盤層;語義搜尋是另外的 embed 模組,基本盤沒有)。" +
"回命中的 entries。要按 template 取整批結構化資料用 kbdb_query。",
"搜尋 KBDB 內容。mode='keyword'(預設,D1 LIKE 關鍵字,基本盤永遠可用)或 'semantic'AI 向量語義搜尋," +
"需先開 embed 模組)。語義沒開時會自動降級關鍵字並告訴你怎麼開。要按 template 取整批結構化資料用 kbdb_query。",
{
q: z.string().min(1).describe("搜尋關鍵字"),
q: z.string().min(1).describe("搜尋關鍵字 / 語義查詢句"),
owner_id: z.string().optional().describe("限定某歸屬範圍內搜(選填)"),
source: z.string().optional().describe("只搜某來源(ingest source.uri,選填)"),
mode: z.enum(["keyword", "semantic"]).optional().describe("keyword(預設)或 semantic(需開 vectorize"),
},
async ({ q, owner_id }) => {
async ({ q, owner_id, source, mode }) => {
try {
const path = `/entries/search?q=${encodeURIComponent(q)}` + (owner_id ? `&owner_id=${encodeURIComponent(owner_id)}` : "");
const res = await kbdbFetch(env, path);
const qs = new URLSearchParams({ q });
if (owner_id) qs.set("owner_id", owner_id);
if (source) qs.set("source", source);
if (mode) qs.set("mode", mode);
const res = await kbdbFetch(env, `/entries/search?${qs.toString()}`);
if (!res.ok) return errorResponse("search_failed", `搜尋失敗`, ["稍後重試"], await res.text().catch(() => ""));
const data = await res.json();
return successResponse(data, ["mode:keyword = D1 LIKE(基本盤)", "找不到時換個關鍵字,或用 kbdb_query 按 template 列出"]);
const data = (await res.json()) as { mode?: string; capability_hint?: string };
// base 回 capability_hint → 語義沒開、已降級 keyword。把它當 next-step 傳給 AI(發現閉環)。
const hints =
data.capability_hint
? [data.capability_hint, "要開:跟用戶確認後,CC 可代開(寫 config kbdb_embed:true + acr update"]
: data.mode === "semantic"
? ["mode:semantic = AI 向量語義搜尋"]
: ["mode:keyword = D1 LIKE(基本盤)", "想要語義搜尋:mode='semantic'(需先開 vectorize"];
return successResponse(data, hints);
} catch (e) {
return errorResponse("internal_error", e instanceof Error ? e.message : String(e), ["稍後重試"]);
}
+2
View File
@@ -6,6 +6,7 @@ import { registerDeployWorkflow } from "./u6u_deploy_workflow.js";
import { registerPublishComponent } from "./u6u_publish_component.js";
import { registerListWorkflows } from "./u6u_list_workflows.js";
import { registerGetWorkflow } from "./u6u_get_workflow.js";
import { registerSearchWorkflows } from "./u6u_search_workflows.js";
import { registerListComponents } from "./u6u_list_components.js";
import { registerGetComponent } from "./u6u_get_component.js";
import { registerGetComponentGuide } from "./u6u_get_component_guide.js";
@@ -30,6 +31,7 @@ export function registerAllTools(server: McpServer, env: Env, orgNamespace: stri
registerPublishComponent(server, env, orgNamespace);
registerListWorkflows(server, env, orgNamespace);
registerGetWorkflow(server, env, orgNamespace);
registerSearchWorkflows(server, env, orgNamespace, partnerToken); // workflow-discovery R2
registerListComponents(server, env, orgNamespace);
registerGetComponent(server, env, orgNamespace);
registerGetComponentGuide(server, env, orgNamespace);
+86
View File
@@ -0,0 +1,86 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import { Env } from "../types.js";
/**
* u6u_search_workflows — 用自然語言找現成工作流(workflow-discovery R2
*
* 北極星入口:AI 先查「有沒有現成工作流能做這件事」→ 找到就執行,別重造。
* 呼叫 cypher GET /workflows/search → 轉發 KBDB /entries/searchentry_type=workflow + 本租戶)。
* 優先語意搜尋;KBDB 未開 Vectorize → 自動降級關鍵字 + 回 capability_hint(不假裝語義)。
*
* 薄殼(rule 07):只做參數轉換 + 呼叫 + 格式化,零業務邏輯。形態對齊 u6u_search_components。
* flag 安全:AI 收到意圖時主動 call 一次,無輪詢/排程。
*/
export function registerSearchWorkflows(
server: McpServer,
env: Env,
orgNamespace: string,
partnerToken: string,
) {
server.tool(
"u6u_search_workflows",
"用自然語言找現成的工作流(先查有沒有現成的能做這件事,找到就用,別重造)。例如:「把資料寫進 Google Sheets」、「每天抓 RSS 發通知」、「webhook 轉發到別的 API」。回傳本帳號下符合的工作流清單。",
{
query: z.string().describe("自然語言描述要找的工作流,如「把資料寫進 Google Sheets」"),
},
async ({ query }) => {
try {
if (!env.CYPHER_EXECUTOR) {
return {
content: [{ type: "text", text: "Error: CYPHER_EXECUTOR service binding is not configured." }],
isError: true,
};
}
const response = await env.CYPHER_EXECUTOR.fetch(
`http://cypher-executor/workflows/search?q=${encodeURIComponent(query)}`,
{ method: "GET", headers: { "X-Arcrun-API-Key": partnerToken } },
);
if (!response.ok) {
const errorText = await response.text();
return {
content: [{ type: "text", text: `Search failed: ${errorText}` }],
isError: true,
};
}
const result = await response.json() as {
entries?: Array<{ page_name?: string; content?: string }>;
count?: number;
mode?: string;
capability_hint?: string;
};
const entries = result.entries ?? [];
const count = result.count ?? entries.length;
if (count === 0) {
const hint = result.capability_hint ? `\n\n${result.capability_hint}` : "";
return {
content: [{
type: "text",
text: `找不到符合「${query}」的現成工作流。可以用 u6u_deploy_workflow 部署一個新的。${hint}`,
}],
};
}
// capability_hint 透傳給 AI:未開語義時 AI 看到就能主動問用戶要不要開 Vectorize(R2.3 閉環)。
const hintLine = result.capability_hint
? `\n\n⚠️ ${result.capability_hint}`
: "";
return {
content: [{
type: "text",
text: `找到 ${count} 個工作流(mode: ${result.mode ?? "keyword"}):\n${JSON.stringify(entries, null, 2)}${hintLine}`,
}],
};
} catch (error) {
return {
content: [{ type: "text", text: `Internal Error: ${error instanceof Error ? error.message : String(error)}` }],
isError: true,
};
}
}
);
}