feat: KBDB self-hosted 查詢 + embed 模組 + thin-shell 收窄 + search_workflow(code done 待端到端)
按 issue 分段標明(檔 #5/#8 改動交疊處無法乾淨拆檔,故併一個 commit): #4 thin-shell §3.1 自力救濟階梯 + code-node 規則(純文檔/規則,code-node 零件未實作) #5 KBDB source filter(json_extract metadata_json 零建表)+ 能力對照;documents 聚合與 DELETE proxy 部分擱置等頂層 T8 #7 base embed 模組(kbdb/src/embed.ts)+ vectorize 開關(deploy/config/wrangler.toml 註解範本) + 語義查詢降級閉環(mode=semantic 未開→LIKE+capability_hint) #8 部分(workflow-discovery): - KBDB /entries/search 加 base 通用 entry_type filter(entry-crud/embed/route/kbdb-proxy 透傳) - /webhooks/named 強制 description(空→400,訊息要求操盤 AI 據實寫一句) - 部署雙寫 entry_type=workflow embeddable entry(waitUntil 非阻塞,供 search) - cypher GET /workflows/search + MCP u6u_search_workflows(優先語意、降級 hint) - cypher POST /workflows/backfill-search-entries(無 desc 列出不編造) - GET /webhooks/named 補回 description/created_at 欄位(為 list 來源收斂備) ⚠️ tsc 綠 = code done,非完成(mindset §7 禁假綠): - #7/#8 端到端待 leo21c 部署驗(Vectorize 需官方憑證、CC 跑不了) - #8 ①-a(MCP deploy 改打 /webhooks/named)未做、MCP deploy 那半仍 404 - #8 端到端(強制填擋空/語義命中/租戶隔離/降級 hint)未驗 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -248,11 +248,21 @@ async function initSelfHosted(
|
||||
console.log(chalk.yellow(` ⚠ 查 subdomain 失敗(${e instanceof Error ? e.message : e}),稍後可手動補`));
|
||||
}
|
||||
|
||||
// 3.5 語義查詢開關(issue #7 / T2.4):問用戶要不要開(預設關,free-tier 友善)。
|
||||
// 開 → deploy 建 CF Vectorize index + 注入 binding。關 → base 維持 LIKE keyword,零花費。
|
||||
// 之後想開:跟 CC 說「幫我開語義查詢」或設 kbdb_embed:true + acr update(不必重 init)。
|
||||
const embedAns = (await prompt(
|
||||
rl,
|
||||
'要開語義查詢嗎?(KBDB 加 AI 向量搜尋;用 CF Vectorize,可能多花費;預設關,之後可隨時開) [y/N]',
|
||||
)).trim().toLowerCase();
|
||||
const kbdbEmbed = embedAns === 'y' || embedAns === 'yes';
|
||||
if (kbdbEmbed) console.log(chalk.gray(' → 已選開語義查詢:部署時會建 Vectorize index。'));
|
||||
|
||||
// 4. 下載 repo 部署物(含預編譯 wasm)+ 注入 KV id + wrangler deploy 全部 Worker
|
||||
console.log(chalk.gray('\n → 下載部署物 + 部署 Worker(從 GitHub 拉預編譯 wasm,用你的 CF token 部署)...'));
|
||||
// selfHosted: true → deploy 注入 MULTI_TENANT="false"(mcp-account-source §5.5,修 MCP 401)。
|
||||
// init.ts 這條本就是 --self-hosted 分支(config.mode 稍後寫 'self-hosted')。
|
||||
const deployCtx: DeployContext = { accountId, apiToken: cfApiToken, workerSubdomain, kvNamespaceIds, d1DatabaseId, selfHosted: true };
|
||||
const deployCtx: DeployContext = { accountId, apiToken: cfApiToken, workerSubdomain, kvNamespaceIds, d1DatabaseId, selfHosted: true, kbdbEmbed };
|
||||
const deploy = await downloadAndDeploy(deployCtx);
|
||||
const cypherUrl = deploy.cypherExecutorUrl
|
||||
?? (workerSubdomain ? `https://arcrun-cypher-executor.${workerSubdomain}.workers.dev` : '');
|
||||
@@ -274,6 +284,7 @@ async function initSelfHosted(
|
||||
webhooks_kv_namespace_id: kvNamespaceIds['WEBHOOKS'],
|
||||
credentials_kv_namespace_id: kvNamespaceIds['CREDENTIALS_KV'],
|
||||
multi_tenant: false,
|
||||
kbdb_embed: kbdbEmbed, // 語義查詢開關(issue #7);存進 config 讓後續 acr update 維持一致
|
||||
};
|
||||
saveConfig(config);
|
||||
createCredentialsYamlIfMissing();
|
||||
|
||||
@@ -82,6 +82,9 @@ export async function cmdUpdate(opts: { force?: boolean } = {}): Promise<void> {
|
||||
// self-hosted → 注入 MULTI_TENANT="false"(mcp-account-source §5.5,修 acr update 部署的 MCP 401)。
|
||||
// config 源頭:init 寫 multi_tenant:false + mode:'self-hosted'。acr update 只在 self-hosted 跑。
|
||||
selfHosted: config.mode === 'self-hosted' || config.multi_tenant === false,
|
||||
// 語義查詢開關(issue #7):config.kbdb_embed:true → 部署建 Vectorize index + 注入 binding。
|
||||
// 這也是「CC 幫開」的落地路徑:CC 寫 kbdb_embed:true 進 config → acr update redeploy 即生效。
|
||||
kbdbEmbed: config.kbdb_embed === true,
|
||||
};
|
||||
|
||||
const result = await downloadAndDeploy(ctx, 'main', { force: opts.force });
|
||||
|
||||
@@ -28,6 +28,12 @@ export interface ArcrunConfig {
|
||||
// SDD: sdk-and-website/mcp-account-source.md
|
||||
mcp_url?: string;
|
||||
multi_tenant?: boolean;
|
||||
// 語義查詢開關(issue #7 / SDD T2.4,self-hosted 從零做)。
|
||||
// true → deploy 時建 CF Vectorize index 並注入 kbdb worker 的 [[vectorize]]+[ai] binding;
|
||||
// kbdb embed 模組啟用(寫入時對標記 embed 的 entry embed、search 支援 mode=semantic)。
|
||||
// 未設/false → base 維持 LIKE keyword(free-tier 友善,不建 index、不花費)。
|
||||
// 開法:設 kbdb_embed:true → redeploy(acr update)。「CC 幫開」=CC 寫此欄 true + 跑 acr update。
|
||||
kbdb_embed?: boolean;
|
||||
// 資料外流警示:本機記住「已同意暴露 / 選擇不再警示」的資源,避免每次 push 重問(§3 首次問記住)。
|
||||
// key 格式:`{kind}:{resourceName}`(如 "webhook:contacts_lookup" / "recipe:kbdb_get")。
|
||||
// 注意:這只是 CLI 端 UX(不重問);server 端獨立存法律憑證並強制(防 CLI 被繞過)。
|
||||
@@ -160,6 +166,11 @@ function readEnvOverrides(): Partial<ArcrunConfig> {
|
||||
(out as Record<string, unknown>)[field] = v;
|
||||
}
|
||||
}
|
||||
// bool 開關(issue #7):env 可選覆蓋,'true'/'1' → true。
|
||||
const embedEnv = process.env.ARCRUN_KBDB_EMBED;
|
||||
if (embedEnv !== undefined && embedEnv !== '') {
|
||||
out.kbdb_embed = embedEnv === 'true' || embedEnv === '1';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
+62
-1
@@ -102,8 +102,15 @@ export interface DeployContext {
|
||||
// 讓 MCP partner-auth 走 namespace 明碼分支(mcp-account-source §5.5)。
|
||||
// 未設 / false → 不注入(官方 SaaS 多租戶,行為不變)。
|
||||
selfHosted?: boolean;
|
||||
// 語義查詢開關(issue #7 / SDD T2.4)。true → 部署前建 CF Vectorize index 並注入 kbdb worker 的
|
||||
// [[vectorize]]+[ai] binding(取消 wrangler.toml 註解段)→ embed 模組啟用。未設/false → 不建、不注入,
|
||||
// base 維持 LIKE keyword(free-tier 友善)。
|
||||
kbdbEmbed?: boolean;
|
||||
}
|
||||
|
||||
/** Vectorize index 名(kbdb embed 模組用)。bge-base-en-v1.5 = 768 維、cosine。 */
|
||||
export const KBDB_VECTORIZE_INDEX = 'arcrun-kbdb-embed';
|
||||
|
||||
export interface DeployResult {
|
||||
implemented: boolean;
|
||||
cypherExecutorUrl?: string;
|
||||
@@ -186,11 +193,26 @@ export async function downloadAndDeploy(
|
||||
console.log(chalk.yellow(` ⚠ 共享安裝失敗,退回各 worker 自裝${tail ? `:${tail}` : ''}`));
|
||||
}
|
||||
|
||||
const failures: string[] = [];
|
||||
|
||||
// 2.6 語義查詢(issue #7 / T2.4):開 kbdb_embed → 先確保 Vectorize index 存在(REST,冪等),
|
||||
// 再由 injectWranglerConfig 取消 kbdb toml 的 [[vectorize]]+[ai] 註解 → embed 模組上線。
|
||||
// 失敗不致命(收進 failures,base 仍可部署、維持 keyword)。
|
||||
if (ctx.kbdbEmbed) {
|
||||
try {
|
||||
process.stdout.write(chalk.gray(' → 開語義查詢:確保 Vectorize index 存在...'));
|
||||
await ensureVectorizeIndex(ctx);
|
||||
console.log(chalk.green(' ✓'));
|
||||
} catch (e) {
|
||||
console.log(chalk.yellow(' ⚠'));
|
||||
failures.push(`Vectorize index (${KBDB_VECTORIZE_INDEX}): ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 對每個 worker:注入 KV id(+ cypher WORKER_SUBDOMAIN)→ wrangler deploy。tier1 先 tier2 後。
|
||||
// 逐 worker 串流進度(每個含 pnpm install + wrangler deploy,沉默會讓人以為卡住——
|
||||
// 壓測 2026-06-11 richblack 觀察:「D1 ✓」後停很久其實在這個迴圈靜默部署 20+ worker)。
|
||||
const allDirs = [...tier1, ...tier2];
|
||||
const failures: string[] = [];
|
||||
let deployed = 0;
|
||||
let skipped = 0;
|
||||
// 內容指紋 manifest:未變動且上次成功的 worker 跳過(key 用 worker 名,不用 temp 絕對路徑)。
|
||||
@@ -296,6 +318,33 @@ async function applyD1Migration(ctx: DeployContext, sql: string): Promise<void>
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 確保 KBDB embed 用的 Vectorize index 存在(issue #7 / T2.4)。
|
||||
* REST `POST /accounts/{id}/vectorize/v2/indexes`(dimensions=768/metric=cosine,對齊 bge-base-en-v1.5)。
|
||||
* 冪等:已存在(CF 回「already exists」類錯)視為成功,不報錯。用 init 已驗的 apiToken+accountId。
|
||||
*/
|
||||
async function ensureVectorizeIndex(ctx: DeployContext): Promise<void> {
|
||||
const url = `https://api.cloudflare.com/client/v4/accounts/${ctx.accountId}/vectorize/v2/indexes`;
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { Authorization: `Bearer ${ctx.apiToken}`, 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
name: KBDB_VECTORIZE_INDEX,
|
||||
config: { dimensions: 768, metric: 'cosine' },
|
||||
description: 'arcrun KBDB optional embed module (issue #7)',
|
||||
}),
|
||||
signal: AbortSignal.timeout(60_000),
|
||||
});
|
||||
if (res.ok) return;
|
||||
// 冪等:已存在 → 視為成功(CF 回 409 或 errors 含 already exists / duplicate)。
|
||||
const json = (await res.json().catch(() => null)) as
|
||||
| { success?: boolean; errors?: Array<{ message?: string; code?: number }> }
|
||||
| null;
|
||||
const msg = (json?.errors?.map(e => e.message).filter(Boolean).join('; ') || `HTTP ${res.status}`).toLowerCase();
|
||||
if (res.status === 409 || /already exists|duplicate|conflict/.test(msg)) return;
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
/** 下載 codeload tarball 解壓到暫存目錄,回傳解壓出的 repo root 路徑。*/
|
||||
async function downloadRepoTarball(ref: string): Promise<string> {
|
||||
const url = `https://codeload.github.com/${ARCRUN_REPO}/tar.gz/${ref}`;
|
||||
@@ -411,6 +460,18 @@ function injectWranglerConfig(tomlPath: string, ctx: DeployContext): void {
|
||||
|
||||
toml = stripOfficialOnlyBindings(toml);
|
||||
|
||||
// 語義查詢(issue #7 / T2.4):開 kbdb_embed → 取消 kbdb toml 的 [[vectorize]]+[ai] 註解段(注入 active binding)。
|
||||
// **必須在 stripOfficialOnlyBindings 之後**:strip 會移除 [ai] 區塊(官方專屬),若先注入會被它清掉。
|
||||
// 只對含該註解段的 toml(= kbdb)生效;其餘 worker toml 無此段,replace 不命中、不動。
|
||||
// 未開 → 維持註解 → worker env 無 VECTORIZE/AI → embedEnabled()=false → base keyword(不花費)。
|
||||
if (ctx.kbdbEmbed) {
|
||||
toml = toml.replace(
|
||||
/# (\[\[vectorize\]\])\n# (binding = "VECTORIZE")\n# (index_name = "[^"]*")/,
|
||||
'$1\n$2\n$3',
|
||||
);
|
||||
toml = toml.replace(/# (\[ai\])\n# (binding = "AI")/, '$1\n$2');
|
||||
}
|
||||
|
||||
writeFileSync(tomlPath, toml, 'utf8');
|
||||
}
|
||||
|
||||
|
||||
@@ -121,17 +121,20 @@ kbdbProxyRouter.get('/kbdb/records/:recordId', async (c) => {
|
||||
|
||||
// ── search(限本租戶範圍內)────────────────────────────────────────────────────
|
||||
|
||||
// GET /kbdb/search?q= — entries LIKE 關鍵字搜尋,限本租戶 owner_id。
|
||||
// GET /kbdb/search?q=&entry_type=&source=&mode= — entries 搜尋,限本租戶 owner_id。
|
||||
// 透傳 entry_type(base 通用 filter,workflow-discovery Q4)/ source / mode 給 KBDB /entries/search。
|
||||
kbdbProxyRouter.get('/kbdb/search', async (c) => {
|
||||
const owner = tenant(c);
|
||||
if (!owner) return c.json(NEED_KEY, 401);
|
||||
const q = c.req.query('q');
|
||||
if (!q) return c.json({ error: 'q 必填' }, 400);
|
||||
const { base, headers } = kbdbBase(c.env);
|
||||
const res = await fetch(
|
||||
`${base}/entries/search?q=${encodeURIComponent(q)}&owner_id=${encodeURIComponent(owner)}`,
|
||||
{ headers },
|
||||
);
|
||||
const params = new URLSearchParams({ q, owner_id: owner });
|
||||
for (const k of ['entry_type', 'source', 'mode']) {
|
||||
const v = c.req.query(k);
|
||||
if (v) params.set(k, v);
|
||||
}
|
||||
const res = await fetch(`${base}/entries/search?${params.toString()}`, { headers });
|
||||
return new Response(res.body, { status: res.status, headers: { 'Content-Type': 'application/json' } });
|
||||
});
|
||||
|
||||
@@ -165,7 +168,7 @@ kbdbProxyRouter.get('/kbdb/entries', async (c) => {
|
||||
const { base, headers } = kbdbBase(c.env);
|
||||
const params = new URLSearchParams();
|
||||
params.set('owner_id', owner); // 強制本租戶,不接受 caller 覆寫
|
||||
for (const k of ['entry_type', 'parent_id', 'page_name', 'limit', 'offset']) {
|
||||
for (const k of ['entry_type', 'parent_id', 'page_name', 'source', 'limit', 'offset']) {
|
||||
const v = c.req.query(k);
|
||||
if (v) params.set(k, v);
|
||||
}
|
||||
|
||||
@@ -53,6 +53,44 @@ function kvKey(apiKey: string, name: string): string {
|
||||
return `${apiKey}:wf:${name}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* workflow-discovery R2/Phase 2.1:部署時雙寫一個 embeddable entry 到 KBDB,讓 workflow 可被語意搜尋。
|
||||
*
|
||||
* 雙寫(design 方案 C):WEBHOOKS KV record 照舊(list/get/trigger 不動),另寫 entry_type=workflow 的
|
||||
* entry 供 search。owner_id = api_key(租戶隔離,與 kbdb-proxy 同身份模型)。
|
||||
* content = description(被 embed 的主體);metadata.embed:true → 命中 #7 精耕條件進 Vectorize(模組開時)。
|
||||
*
|
||||
* 非阻塞 + 失敗不致命(waitUntil + catch):search 可發現性是加值,不該擋部署成功(對齊 #7 embedOnWrite 慣例)。
|
||||
* KBDB 連法沿用既有慣例(KBDB_BASE_URL fetch + 選用 token),不新增 service binding(rule 02 §3.1)。
|
||||
*/
|
||||
async function writeWorkflowSearchEntry(
|
||||
env: Bindings,
|
||||
apiKey: string,
|
||||
name: string,
|
||||
description: string,
|
||||
workflowId?: string,
|
||||
): Promise<void> {
|
||||
const base = (env.KBDB_BASE_URL ?? 'https://arcrun-kbdb.uncle6-me.workers.dev').replace(/\/$/, '');
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (env.KBDB_INTERNAL_TOKEN) headers['Authorization'] = `Bearer ${env.KBDB_INTERNAL_TOKEN}`;
|
||||
await fetch(`${base}/entries`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
entry_type: 'workflow',
|
||||
owner_id: apiKey, // 租戶隔離(與 kbdb-proxy 同身份)
|
||||
page_name: name,
|
||||
content: description, // 被 embed / LIKE 命中的主體
|
||||
// KBDB createEntry 吃 metadata_json(TEXT),embed.ts isEmbeddable 讀 metadata_json.embed === true。
|
||||
metadata_json: JSON.stringify({
|
||||
embed: true, // #7 精耕開關:標 true 才進 Vectorize
|
||||
workflow_name: name,
|
||||
workflow_id: workflowId ?? name,
|
||||
}),
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
// POST /webhooks/named — 部署(acr push 呼叫)
|
||||
webhooksNamedRouter.post('/webhooks/named', async (c) => {
|
||||
const apiKey = c.req.header('X-Arcrun-API-Key');
|
||||
@@ -72,6 +110,16 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
|
||||
return c.json({ error: '缺少必要欄位:name, graph' }, 400);
|
||||
}
|
||||
|
||||
// workflow-discovery R1:description 強制非空(供語意搜尋,工作流可被發現)。
|
||||
// 定位(Q2 定案):要求操盤的 AI 據實寫一句「這工作流能做什麼」,非逼 low-code 用戶手填、
|
||||
// 非介面層機械塞佔位。空 → 擋下,由操盤 CC 據實補一句再部署(用戶可改)。
|
||||
if (typeof body.description !== 'string' || body.description.trim() === '') {
|
||||
return c.json({
|
||||
error: 'description 必填:請操盤的 AI 據實寫一句「這工作流能做什麼」(如「呼叫可 Upsert Google Sheets」),用戶可再改。供語意搜尋用,不是寫文章。',
|
||||
requires: 'description',
|
||||
}, 400);
|
||||
}
|
||||
|
||||
const name = body.name.trim();
|
||||
if (!/^[\w-]+$/.test(name)) {
|
||||
return c.json({ error: 'workflow name 只能包含英文字母、數字、底線和連字號' }, 400);
|
||||
@@ -93,7 +141,7 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
|
||||
name,
|
||||
graph: body.graph,
|
||||
config: body.config,
|
||||
description: typeof body.description === 'string' ? body.description : '',
|
||||
description: body.description.trim(), // R1:已驗非空(見上),存 trim 後的值
|
||||
created_at: new Date().toISOString(),
|
||||
cron_expr: cronExpr ?? undefined,
|
||||
// 法律憑證:存人類明示同意(本次新同意或沿用既有)
|
||||
@@ -107,6 +155,12 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
|
||||
// (避免 push 改 yaml 拿掉 cron 後殘留)。scheduled() 每分鐘只 get 這一個 key。
|
||||
await updateCronIndexEntry(c.env.WEBHOOKS, apiKey, name, cronExpr);
|
||||
|
||||
// workflow-discovery Phase 2.1:雙寫 embeddable search-entry(讓此 workflow 可被語意搜尋)。
|
||||
// 非阻塞(waitUntil)+ 失敗不致命(catch):可發現性是加值,不擋部署成功(對齊 #7 embedOnWrite 慣例)。
|
||||
c.executionCtx.waitUntil(
|
||||
writeWorkflowSearchEntry(c.env, apiKey, name, record.description).catch(() => {}),
|
||||
);
|
||||
|
||||
// Implicit telemetry (LI M1.2)
|
||||
recordTelemetry(c.env, apiKey, {
|
||||
event_type: 'deploy_success',
|
||||
@@ -124,6 +178,75 @@ webhooksNamedRouter.post('/webhooks/named', async (c) => {
|
||||
}, 201);
|
||||
});
|
||||
|
||||
// GET /workflows/search?q=&mode= — workflow-discovery R2:語意搜尋本租戶的工作流。
|
||||
// 轉發 KBDB /entries/search(限 entry_type=workflow + 本租戶 owner_id)。優先語意、未開 Vectorize
|
||||
// 降級 keyword + capability_hint(KBDB 端已實作 #7 閉環,本端純轉發 + 注 entry_type/owner_id)。
|
||||
// 形態對齊 u6u_search_components:自然語言 q 進、結果 + capability_hint 出。flag 安全:AI 主動 pull,無輪詢。
|
||||
webhooksNamedRouter.get('/workflows/search', async (c) => {
|
||||
const apiKey = c.req.header('X-Arcrun-API-Key');
|
||||
if (!apiKey) return c.json({ error: '缺少 X-Arcrun-API-Key header' }, 401);
|
||||
const q = c.req.query('q');
|
||||
if (!q) return c.json({ error: 'q 必填:用自然語言描述要找的工作流(如「把資料寫進 Google Sheets」)' }, 400);
|
||||
// 預設優先語意;caller 傳 mode=keyword 才強制關鍵字。KBDB 端未開 Vectorize 會自動降級。
|
||||
const mode = c.req.query('mode') === 'keyword' ? 'keyword' : 'semantic';
|
||||
|
||||
const base = (c.env.KBDB_BASE_URL ?? 'https://arcrun-kbdb.uncle6-me.workers.dev').replace(/\/$/, '');
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (c.env.KBDB_INTERNAL_TOKEN) headers['Authorization'] = `Bearer ${c.env.KBDB_INTERNAL_TOKEN}`;
|
||||
const params = new URLSearchParams({
|
||||
q,
|
||||
owner_id: apiKey, // 租戶隔離(只搜本租戶的 workflow)
|
||||
entry_type: 'workflow', // base 通用 filter(Q4),只回 workflow entry
|
||||
mode,
|
||||
});
|
||||
const res = await fetch(`${base}/entries/search?${params.toString()}`, { headers });
|
||||
return new Response(res.body, { status: res.status, headers: { 'Content-Type': 'application/json' } });
|
||||
});
|
||||
|
||||
// POST /workflows/backfill-search-entries — workflow-discovery R3:把既有 workflow 補成可搜的 search-entry。
|
||||
// 有 description 的 → 補寫 entry(讓它們可被 u6u_search_workflows 搜到);無 description 的 → 列出待 re-deploy。
|
||||
// 誠實:不自動編造 description(無 desc 的只列出、不假裝)。flag 安全:人/AI 主動呼叫一次,非 cron/輪詢。
|
||||
webhooksNamedRouter.post('/workflows/backfill-search-entries', async (c) => {
|
||||
const apiKey = c.req.header('X-Arcrun-API-Key');
|
||||
if (!apiKey) return c.json({ error: '缺少 X-Arcrun-API-Key header' }, 401);
|
||||
|
||||
const prefix = `${apiKey}:wf:`;
|
||||
const list = await c.env.WEBHOOKS.list({ prefix });
|
||||
const backfilled: string[] = [];
|
||||
const needsDescription: string[] = [];
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const k of list.keys) {
|
||||
const name = k.name.slice(prefix.length);
|
||||
const raw = await c.env.WEBHOOKS.get(k.name, 'text');
|
||||
if (!raw) continue;
|
||||
const rec = JSON.parse(raw) as NamedWorkflowRecord;
|
||||
const desc = rec.description?.trim();
|
||||
if (!desc) {
|
||||
// 不自動編造:無 description 的列出來,請操盤 CC re-deploy 時據實補(誠實,mindset §7)。
|
||||
needsDescription.push(name);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
await writeWorkflowSearchEntry(c.env, apiKey, name, desc);
|
||||
backfilled.push(name);
|
||||
} catch (e) {
|
||||
errors.push(`${name}: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
}
|
||||
|
||||
return c.json({
|
||||
backfilled,
|
||||
backfilled_count: backfilled.length,
|
||||
needs_description: needsDescription,
|
||||
needs_description_count: needsDescription.length,
|
||||
errors,
|
||||
hint: needsDescription.length > 0
|
||||
? `${needsDescription.length} 個工作流缺 description 無法被搜尋。請操盤的 AI re-deploy 它們時據實補一句「能做什麼」(不自動編造)。`
|
||||
: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
// POST /webhooks/named/migrate-cron-index — 一次性 migration(8.P0):把舊的 per-key
|
||||
// cron-idx:{apiKey}:{name} 折進單一 cron-idx:_all(這裡才 list 一次,非每分鐘 tick)。
|
||||
// 增量寫、不刪舊 key(重跑安全、冪等)。部署 8.P0 後跑一次,讓既有 cron workflow 不漏掉。
|
||||
@@ -241,16 +364,23 @@ webhooksNamedRouter.get('/webhooks/named', async (c) => {
|
||||
const prefix = `${apiKey}:wf:`;
|
||||
const list = await c.env.WEBHOOKS.list({ prefix });
|
||||
|
||||
const workflows = list.keys.map(k => {
|
||||
const name = k.name.slice(prefix.length);
|
||||
return { name };
|
||||
});
|
||||
|
||||
// workflow-discovery 方向①:list 回完整欄位(description/created_at),讓 MCP u6u_list_workflows
|
||||
// 改讀本端點時欄位齊(取代舊的讀 workflow_metadata record)。需 get 每個 record 取 description。
|
||||
const baseUrl = new URL(c.req.url).origin;
|
||||
const result = workflows.map(w => ({
|
||||
name: w.name,
|
||||
webhook_url: `${baseUrl}/webhooks/named/${w.name}/trigger`,
|
||||
}));
|
||||
const result = await Promise.all(
|
||||
list.keys.map(async (k) => {
|
||||
const name = k.name.slice(prefix.length);
|
||||
const raw = await c.env.WEBHOOKS.get(k.name, 'text');
|
||||
const rec = raw ? (JSON.parse(raw) as NamedWorkflowRecord) : null;
|
||||
return {
|
||||
name,
|
||||
description: rec?.description ?? '',
|
||||
created_at: rec?.created_at ?? '',
|
||||
cron_expr: rec?.cron_expr,
|
||||
webhook_url: `${baseUrl}/webhooks/named/${name}/trigger`,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
return c.json({ workflows: result, total: result.length });
|
||||
});
|
||||
|
||||
@@ -57,6 +57,7 @@ export interface ListEntriesFilter {
|
||||
owner_id?: string;
|
||||
parent_id?: string;
|
||||
page_name?: string; // exact-match lookup (e.g. skill-/example- idempotency key)
|
||||
source?: string; // filter by metadata_json.$.source (ingest envelope source.uri). issue #5.1
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}
|
||||
@@ -68,6 +69,9 @@ export async function listEntries(db: D1Database, f: ListEntriesFilter = {}): Pr
|
||||
if (f.owner_id) { conds.push('owner_id = ?'); params.push(f.owner_id); }
|
||||
if (f.parent_id) { conds.push('parent_id = ?'); params.push(f.parent_id); }
|
||||
if (f.page_name) { conds.push('page_name = ?'); params.push(f.page_name); }
|
||||
// source is queryable via SQLite json_extract on the existing metadata_json TEXT column —
|
||||
// no new column / no migration (表不變鐵律). Per issue #5.1 (頂層化 source 成可查 filter).
|
||||
if (f.source) { conds.push("json_extract(metadata_json, '$.source') = ?"); params.push(f.source); }
|
||||
const where = conds.length ? `WHERE ${conds.join(' AND ')}` : '';
|
||||
const limit = Math.min(f.limit ?? 100, 1000);
|
||||
const offset = f.offset ?? 0;
|
||||
@@ -107,10 +111,18 @@ export async function deleteEntry(db: D1Database, id: string): Promise<void> {
|
||||
}
|
||||
|
||||
// D1 LIKE keyword search (base; semantic search is the optional embed module).
|
||||
export async function searchEntries(db: D1Database, q: string, owner_id?: string, limit = 50): Promise<Entry[]> {
|
||||
// entry_type: optional base filter (generic — caller passes any type, base stays type-agnostic).
|
||||
export async function searchEntries(
|
||||
db: D1Database,
|
||||
q: string,
|
||||
owner_id?: string,
|
||||
entry_type?: string,
|
||||
limit = 50,
|
||||
): Promise<Entry[]> {
|
||||
const conds = ['content LIKE ?'];
|
||||
const params: unknown[] = [`%${q}%`];
|
||||
if (owner_id) { conds.push('owner_id = ?'); params.push(owner_id); }
|
||||
if (entry_type) { conds.push('entry_type = ?'); params.push(entry_type); }
|
||||
const res = await db
|
||||
.prepare(`SELECT * FROM entries WHERE ${conds.join(' AND ')} ORDER BY updated_at DESC LIMIT ?`)
|
||||
.bind(...params, Math.min(limit, 200))
|
||||
|
||||
@@ -88,6 +88,52 @@ export async function createRecord(db: D1Database, input: CreateRecordInput): Pr
|
||||
return { record_id: recordId, template_id: tpl.id, values: input.values };
|
||||
}
|
||||
|
||||
// Update an existing record's slot values (mira-dissolve T2.1, issue #6).
|
||||
// "Deprecate by flipping a slot value" — base append-only is NOT broken: we change the
|
||||
// underlying entries.content of the slot's entry, we do not alter table structure / add columns / delete rows.
|
||||
// - slot already on the record → UPDATE the linked entries.content.
|
||||
// - slot valid for the record's template but not yet present → create entry + entry_value (idempotent grow).
|
||||
// - slot not in the template's slots_json → reject (records must stay template-shaped).
|
||||
// Returns null if the record does not exist.
|
||||
export async function updateRecord(
|
||||
db: D1Database,
|
||||
recordId: string,
|
||||
values: Record<string, string>,
|
||||
): Promise<RecordResult | null> {
|
||||
// Existing slot → entry_id + template_id for this record.
|
||||
const evRes = await db
|
||||
.prepare(`SELECT slot_name, entry_id, template_id FROM entry_values WHERE record_id = ?`)
|
||||
.bind(recordId)
|
||||
.all<{ slot_name: string; entry_id: string; template_id: string }>();
|
||||
const evRows = evRes.results ?? [];
|
||||
if (evRows.length === 0) return null; // record does not exist
|
||||
|
||||
const templateId = evRows[0].template_id;
|
||||
const slotToEntry = new Map(evRows.map((r) => [r.slot_name, r.entry_id]));
|
||||
|
||||
const tpl = await getTemplate(db, templateId);
|
||||
const allowed: string[] = tpl ? JSON.parse(tpl.slots_json) : [...slotToEntry.keys()];
|
||||
|
||||
for (const [slot, content] of Object.entries(values)) {
|
||||
if (!allowed.includes(slot)) {
|
||||
throw new Error(`slot not in template: ${slot}`);
|
||||
}
|
||||
const entryId = slotToEntry.get(slot);
|
||||
if (entryId) {
|
||||
// flip the slot value: update the linked entry's content (table structure untouched)
|
||||
await db.prepare(`UPDATE entries SET content = ?, updated_at = unixepoch() WHERE id = ?`).bind(content, entryId).run();
|
||||
} else {
|
||||
// valid template slot not yet on this record → grow it (create entry + link)
|
||||
const entry = await createEntry(db, { content, entry_type: 'value' });
|
||||
await db
|
||||
.prepare(`INSERT INTO entry_values (id, record_id, template_id, slot_name, entry_id) VALUES (?, ?, ?, ?, ?)`)
|
||||
.bind(uid('ev'), recordId, templateId, slot, entry.id)
|
||||
.run();
|
||||
}
|
||||
}
|
||||
return getRecord(db, recordId);
|
||||
}
|
||||
|
||||
export async function getRecord(db: D1Database, recordId: string): Promise<RecordResult | null> {
|
||||
const res = await db
|
||||
.prepare(
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
// KBDB optional embed module (issue #7 / mira-dissolve SDD T2.4).
|
||||
//
|
||||
// 鐵律對齊:
|
||||
// - embedding 屬 **base 的 optional 模組**(非 graph/ingest)。CF 內建(Vectorize+AI),程式薄。
|
||||
// - **不拆 repo,binding 開/關**:有 env.VECTORIZE + env.AI 才啟用;沒有 → base 維持 LIKE keyword,API 不變。
|
||||
// - 不動三表結構(只標既有 entries.is_embedded / content_hash bookkeeping 欄;那些 base 從不讀,embed 才寫)。
|
||||
// - 不對每個 block 地毯式 embed(精耕,非 RAG 一股腦灌):只 embed「被標記為 embeddable」的 entry
|
||||
// (wiki 段落 + graph node gloss)。標記方式=寫入時 metadata_json.embed === true(caller 顯式標)。
|
||||
//
|
||||
// 為何用 metadata flag 而非 entry_type 白名單:base 不該寫死「哪些 entry_type 該 embed」(那是上游語意,
|
||||
// 會讓 base 知道 wiki/graph 概念,破壞解耦)。改由 caller(wiki/gloss 寫入端)顯式標 embed:true,
|
||||
// base 只認這個通用旗標 → base 維持對內容語意無知。
|
||||
|
||||
import type { Bindings, Entry } from './types';
|
||||
|
||||
const EMBED_MODEL = '@cf/baai/bge-base-en-v1.5'; // 768-dim,與 Vectorize index dimensions=768 對齊
|
||||
|
||||
/** embed 模組是否啟用(binding 都在才算開)。base 一切 embed 動作先過這關。 */
|
||||
export function embedEnabled(env: Bindings): boolean {
|
||||
return !!(env.VECTORIZE && env.AI);
|
||||
}
|
||||
|
||||
/** 一段文字 → 768 維向量(Workers AI bge)。空字串回 null(不 embed)。 */
|
||||
async function embedText(env: Bindings, text: string): Promise<number[] | null> {
|
||||
const t = (text ?? '').trim();
|
||||
if (!t || !env.AI) return null;
|
||||
const res = (await env.AI.run(EMBED_MODEL, { text: [t] })) as { data: number[][] };
|
||||
return res?.data?.[0] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 寫入時選擇性 embed(embed-on-write,#5 第4點併入此)。
|
||||
* - 模組未開 → no-op(base 輕量)。
|
||||
* - 只 embed 被標 embeddable 的 entry(metadata_json.embed === true)。其餘略過(非地毯式)。
|
||||
* 失敗不致命(fire-and-forget 由 caller 用 waitUntil 包;這裡只負責「能 embed 就 embed」)。
|
||||
* 回傳是否真的 embed 了(讓 caller 決定要不要標 is_embedded)。
|
||||
*/
|
||||
export async function embedOnWrite(env: Bindings, entry: Entry): Promise<boolean> {
|
||||
if (!embedEnabled(env)) return false;
|
||||
if (!isEmbeddable(entry)) return false;
|
||||
const vec = await embedText(env, entry.content ?? '');
|
||||
if (!vec) return false;
|
||||
await env.VECTORIZE!.upsert([
|
||||
{
|
||||
id: entry.id,
|
||||
values: vec,
|
||||
// metadata 走 indexed 範圍:owner_id(租戶隔離)、entry_type、source(#5.1 過濾與語義共用)。
|
||||
metadata: {
|
||||
owner_id: entry.owner_id ?? '',
|
||||
entry_type: entry.entry_type,
|
||||
source: readSource(entry) ?? '',
|
||||
},
|
||||
},
|
||||
]);
|
||||
// 標記 bookkeeping(既有欄,base 不讀、僅供「已 embed」可查)。不動表結構。
|
||||
await env.DB.prepare('UPDATE entries SET is_embedded = 1 WHERE id = ?').bind(entry.id).run();
|
||||
return true;
|
||||
}
|
||||
|
||||
/** entry 是否該被 embed:caller 在 metadata_json 標 embed:true(精耕,非地毯式)。 */
|
||||
function isEmbeddable(entry: Entry): boolean {
|
||||
const meta = parseMeta(entry.metadata_json);
|
||||
return meta?.embed === true;
|
||||
}
|
||||
|
||||
function readSource(entry: Entry): string | null {
|
||||
const meta = parseMeta(entry.metadata_json);
|
||||
const s = meta?.source;
|
||||
return typeof s === 'string' ? s : null;
|
||||
}
|
||||
|
||||
function parseMeta(json: string | null): Record<string, unknown> | null {
|
||||
if (!json) return null;
|
||||
try {
|
||||
const p = JSON.parse(json);
|
||||
return p && typeof p === 'object' ? (p as Record<string, unknown>) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export interface SemanticHit {
|
||||
id: string;
|
||||
score: number;
|
||||
owner_id?: string;
|
||||
entry_type?: string;
|
||||
source?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 語義搜尋(mode:'semantic')。模組未開 → 回 null(caller 降級 keyword + 告知缺能力)。
|
||||
* owner_id / source / entry_type 過濾走 Vectorize metadata filter(entry_type 已 index,見上 upsert metadata)。
|
||||
* entry_type 是 base 通用 filter(caller 傳任意 type,base 不寫死語意)。
|
||||
*/
|
||||
export async function semanticSearch(
|
||||
env: Bindings,
|
||||
q: string,
|
||||
opts: { owner_id?: string; source?: string; entry_type?: string; topK?: number } = {},
|
||||
): Promise<SemanticHit[] | null> {
|
||||
if (!embedEnabled(env)) return null;
|
||||
const vec = await embedText(env, q);
|
||||
if (!vec) return [];
|
||||
const filter: Record<string, string> = {};
|
||||
if (opts.owner_id) filter.owner_id = opts.owner_id;
|
||||
if (opts.source) filter.source = opts.source;
|
||||
if (opts.entry_type) filter.entry_type = opts.entry_type;
|
||||
const res = await env.VECTORIZE!.query(vec, {
|
||||
topK: Math.min(opts.topK ?? 20, 100),
|
||||
returnMetadata: 'indexed',
|
||||
...(Object.keys(filter).length ? { filter } : {}),
|
||||
});
|
||||
return (res.matches ?? []).map((m) => ({
|
||||
id: m.id,
|
||||
score: m.score,
|
||||
owner_id: m.metadata?.owner_id as string | undefined,
|
||||
entry_type: m.metadata?.entry_type as string | undefined,
|
||||
source: m.metadata?.source as string | undefined,
|
||||
}));
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Entries route — atomic data + tree (project/workflow). Base, no embed/triplet.
|
||||
// Entries route — atomic data + tree (project/workflow). Base; embed is OPTIONAL (issue #7).
|
||||
import { Hono } from 'hono';
|
||||
import type { Bindings } from '../types';
|
||||
import {
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
deleteEntry,
|
||||
searchEntries,
|
||||
} from '../actions/entry-crud';
|
||||
import { embedEnabled, embedOnWrite, semanticSearch } from '../embed';
|
||||
|
||||
export const entryRoutes = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
@@ -17,29 +18,63 @@ entryRoutes.post('/', async (c) => {
|
||||
const body = await c.req.json().catch(() => null);
|
||||
if (!body || !body.entry_type) return c.json({ success: false, error: 'entry_type required' }, 400);
|
||||
const entry = await createEntry(c.env.DB, body);
|
||||
// embed-on-write (#7 / #5 第4點):模組開 + entry 標 embed:true 才做;fire-and-forget,不阻塞回應、失敗不致命。
|
||||
if (embedEnabled(c.env)) c.executionCtx.waitUntil(embedOnWrite(c.env, entry).catch(() => {}));
|
||||
return c.json({ success: true, entry });
|
||||
});
|
||||
|
||||
// GET /entries — list with filters (entry_type, owner_id, parent_id, page_name)
|
||||
// GET /entries — list with filters (entry_type, owner_id, parent_id, page_name, source)
|
||||
// e.g. list workflows under a project: ?parent_id=PROJECT&entry_type=workflow
|
||||
// e.g. get one by idempotency key: ?page_name=skill-rag_with_arcrun
|
||||
// e.g. filter by ingest source: ?source=logseq://vault/foo.md (issue #5.1)
|
||||
entryRoutes.get('/', async (c) => {
|
||||
const entries = await listEntries(c.env.DB, {
|
||||
entry_type: c.req.query('entry_type') || undefined,
|
||||
owner_id: c.req.query('owner_id') || undefined,
|
||||
parent_id: c.req.query('parent_id') || undefined,
|
||||
page_name: c.req.query('page_name') || undefined,
|
||||
source: c.req.query('source') || undefined,
|
||||
limit: c.req.query('limit') ? Number(c.req.query('limit')) : undefined,
|
||||
offset: c.req.query('offset') ? Number(c.req.query('offset')) : undefined,
|
||||
});
|
||||
return c.json({ success: true, entries, count: entries.length });
|
||||
});
|
||||
|
||||
// GET /entries/search?q=...&owner_id=... — D1 LIKE keyword search (base)
|
||||
// GET /entries/search?q=...&owner_id=...&source=...&entry_type=...&mode=keyword|semantic
|
||||
// - mode=keyword(預設):D1 LIKE(base,永遠可用)。
|
||||
// - mode=semantic:需 embed 模組開(Vectorize+AI binding)。未開 → 降級 keyword + capability_hint 告知缺能力(#7 發現閉環)。
|
||||
// - entry_type:base 通用 filter(caller 傳任意 type,如 workflow;base 不寫死語意,workflow-discovery Q4)。
|
||||
entryRoutes.get('/search', async (c) => {
|
||||
const q = c.req.query('q');
|
||||
if (!q) return c.json({ success: false, error: 'q required' }, 400);
|
||||
const entries = await searchEntries(c.env.DB, q, c.req.query('owner_id') || undefined);
|
||||
const owner_id = c.req.query('owner_id') || undefined;
|
||||
const source = c.req.query('source') || undefined;
|
||||
const entry_type = c.req.query('entry_type') || undefined;
|
||||
const mode = c.req.query('mode') === 'semantic' ? 'semantic' : 'keyword';
|
||||
|
||||
if (mode === 'semantic') {
|
||||
const hits = await semanticSearch(c.env, q, { owner_id, source, entry_type });
|
||||
if (hits === null) {
|
||||
// 模組沒開:誠實降級 keyword + 告知「叫 CC 幫你開 vectorize」(不假裝有語義)。
|
||||
const entries = await searchEntries(c.env.DB, q, owner_id, entry_type);
|
||||
return c.json({
|
||||
success: true,
|
||||
entries,
|
||||
count: entries.length,
|
||||
mode: 'keyword',
|
||||
requested_mode: 'semantic',
|
||||
capability_hint:
|
||||
'語義查詢需先開 vectorize(embed 模組)。叫 CC「幫我開語義查詢」即可(設 kbdb_embed:true + redeploy)。本次已降級關鍵字搜尋。',
|
||||
});
|
||||
}
|
||||
// hydrate vector hits → 完整 entry(保持回應形狀與 keyword 一致)。
|
||||
const entries = (await Promise.all(hits.map((h) => getEntry(c.env.DB, h.id)))).filter(
|
||||
(e): e is NonNullable<typeof e> => e !== null,
|
||||
);
|
||||
return c.json({ success: true, entries, count: entries.length, mode: 'semantic' });
|
||||
}
|
||||
|
||||
const entries = await searchEntries(c.env.DB, q, owner_id, entry_type);
|
||||
return c.json({ success: true, entries, count: entries.length, mode: 'keyword' });
|
||||
});
|
||||
|
||||
@@ -55,11 +90,19 @@ entryRoutes.patch('/:id', async (c) => {
|
||||
const body = await c.req.json().catch(() => ({}));
|
||||
const entry = await updateEntry(c.env.DB, c.req.param('id'), body);
|
||||
if (!entry) return c.json({ success: false, error: 'not found' }, 404);
|
||||
// 內容改了 → 重 embed(保持向量新鮮)。embedOnWrite 內部自會檢查模組開 + entry 是否 embeddable。
|
||||
if (embedEnabled(c.env) && body.content !== undefined) {
|
||||
c.executionCtx.waitUntil(embedOnWrite(c.env, entry).catch(() => {}));
|
||||
}
|
||||
return c.json({ success: true, entry });
|
||||
});
|
||||
|
||||
// DELETE /entries/:id
|
||||
entryRoutes.delete('/:id', async (c) => {
|
||||
// 模組開 → 連帶刪向量(避免孤兒向量)。失敗不致命。
|
||||
if (embedEnabled(c.env)) {
|
||||
c.executionCtx.waitUntil(c.env.VECTORIZE!.deleteByIds([c.req.param('id')]).then(() => {}).catch(() => {}));
|
||||
}
|
||||
await deleteEntry(c.env.DB, c.req.param('id'));
|
||||
return c.json({ success: true });
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Records route — structured records (entry_values composed by a template).
|
||||
import { Hono } from 'hono';
|
||||
import type { Bindings } from '../types';
|
||||
import { createRecord, getRecord, searchByTemplate } from '../actions/record-crud';
|
||||
import { createRecord, getRecord, searchByTemplate, updateRecord } from '../actions/record-crud';
|
||||
|
||||
export const recordRoutes = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
@@ -31,3 +31,19 @@ recordRoutes.get('/:recordId', async (c) => {
|
||||
if (!rec) return c.json({ success: false, error: 'not found' }, 404);
|
||||
return c.json({ success: true, record: rec });
|
||||
});
|
||||
|
||||
// PATCH /records/:recordId — { values:{slot:content} } update existing record slot values
|
||||
// (mira-dissolve T2.1 / issue #6; deprecate = flip a slot value, append-only tables untouched).
|
||||
recordRoutes.patch('/:recordId', async (c) => {
|
||||
const body = await c.req.json().catch(() => null);
|
||||
if (!body || !body.values || typeof body.values !== 'object') {
|
||||
return c.json({ success: false, error: 'values required' }, 400);
|
||||
}
|
||||
try {
|
||||
const rec = await updateRecord(c.env.DB, c.req.param('recordId'), body.values);
|
||||
if (!rec) return c.json({ success: false, error: 'not found' }, 404);
|
||||
return c.json({ success: true, record: rec });
|
||||
} catch (e) {
|
||||
return c.json({ success: false, error: e instanceof Error ? e.message : String(e) }, 400);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
export type Bindings = {
|
||||
DB: D1Database;
|
||||
ENVIRONMENT: string;
|
||||
// Optional embed module (issue #7 / SDD T2.4). Present ONLY when the self-host opened
|
||||
// semantic search (kbdb_embed:true → deploy injects [[vectorize]] + [ai]). Base never
|
||||
// requires them; code checks `if (env.VECTORIZE && env.AI)` before touching embed.
|
||||
VECTORIZE?: VectorizeIndex;
|
||||
AI?: Ai;
|
||||
};
|
||||
|
||||
export type EntryType =
|
||||
|
||||
@@ -14,3 +14,16 @@ database_id = "0c580910-e00b-4f8e-9c57-ac54ea52242f" # 官方 prod D1(arcrun-
|
||||
|
||||
[vars]
|
||||
ENVIRONMENT = "production"
|
||||
|
||||
# ── Optional embed module (issue #7 / SDD T2.4) ────────────────────────────────
|
||||
# Base 預設不開(free-tier 友善)。self-host 開語義查詢時,deploy.ts 偵測 config kbdb_embed:true
|
||||
# → 取消下面兩段註解(注入 active binding)並 `wrangler vectorize create arcrun-kbdb-embed
|
||||
# --dimensions=768 --metric=cosine`(bge-base-en-v1.5 = 768 維)。官方帳號同理由 deploy 注入。
|
||||
# 沒有這兩個 binding 時,kbdb/src/embed.ts 的 embedEnabled() 回 false → 維持 LIKE keyword、API 不變。
|
||||
#
|
||||
# [[vectorize]]
|
||||
# binding = "VECTORIZE"
|
||||
# index_name = "arcrun-kbdb-embed"
|
||||
#
|
||||
# [ai]
|
||||
# binding = "AI"
|
||||
|
||||
@@ -170,23 +170,39 @@ export function registerQuery(server: McpServer, env: Env) {
|
||||
);
|
||||
}
|
||||
|
||||
/** kbdb_search — 對 entries 做 D1 LIKE 關鍵字搜尋(基本盤,非語義)。 */
|
||||
/**
|
||||
* kbdb_search — 對 entries 做搜尋。mode=keyword(D1 LIKE,基本盤永遠可用)或 semantic(向量,需開 embed 模組)。
|
||||
* 語義/關鍵字都在同一 KBDB MCP(用戶資料 RAG),不分散(issue #7 / D17 邊界)。
|
||||
* mode=semantic 但沒開 vectorize → base 自動降級 keyword + 回 capability_hint(發現閉環,叫 CC 幫開)。
|
||||
*/
|
||||
export function registerSearch(server: McpServer, env: Env) {
|
||||
server.tool(
|
||||
"kbdb_search",
|
||||
"對 KBDB 內容做關鍵字搜尋(D1 LIKE,基本盤層;語義搜尋是另外的 embed 模組,基本盤沒有)。" +
|
||||
"回命中的 entries。要按 template 取整批結構化資料用 kbdb_query。",
|
||||
"搜尋 KBDB 內容。mode='keyword'(預設,D1 LIKE 關鍵字,基本盤永遠可用)或 'semantic'(AI 向量語義搜尋," +
|
||||
"需先開 embed 模組)。語義沒開時會自動降級關鍵字並告訴你怎麼開。要按 template 取整批結構化資料用 kbdb_query。",
|
||||
{
|
||||
q: z.string().min(1).describe("搜尋關鍵字"),
|
||||
q: z.string().min(1).describe("搜尋關鍵字 / 語義查詢句"),
|
||||
owner_id: z.string().optional().describe("限定某歸屬範圍內搜(選填)"),
|
||||
source: z.string().optional().describe("只搜某來源(ingest source.uri,選填)"),
|
||||
mode: z.enum(["keyword", "semantic"]).optional().describe("keyword(預設)或 semantic(需開 vectorize)"),
|
||||
},
|
||||
async ({ q, owner_id }) => {
|
||||
async ({ q, owner_id, source, mode }) => {
|
||||
try {
|
||||
const path = `/entries/search?q=${encodeURIComponent(q)}` + (owner_id ? `&owner_id=${encodeURIComponent(owner_id)}` : "");
|
||||
const res = await kbdbFetch(env, path);
|
||||
const qs = new URLSearchParams({ q });
|
||||
if (owner_id) qs.set("owner_id", owner_id);
|
||||
if (source) qs.set("source", source);
|
||||
if (mode) qs.set("mode", mode);
|
||||
const res = await kbdbFetch(env, `/entries/search?${qs.toString()}`);
|
||||
if (!res.ok) return errorResponse("search_failed", `搜尋失敗`, ["稍後重試"], await res.text().catch(() => ""));
|
||||
const data = await res.json();
|
||||
return successResponse(data, ["mode:keyword = D1 LIKE(基本盤)", "找不到時換個關鍵字,或用 kbdb_query 按 template 列出"]);
|
||||
const data = (await res.json()) as { mode?: string; capability_hint?: string };
|
||||
// base 回 capability_hint → 語義沒開、已降級 keyword。把它當 next-step 傳給 AI(發現閉環)。
|
||||
const hints =
|
||||
data.capability_hint
|
||||
? [data.capability_hint, "要開:跟用戶確認後,CC 可代開(寫 config kbdb_embed:true + acr update)"]
|
||||
: data.mode === "semantic"
|
||||
? ["mode:semantic = AI 向量語義搜尋"]
|
||||
: ["mode:keyword = D1 LIKE(基本盤)", "想要語義搜尋:mode='semantic'(需先開 vectorize)"];
|
||||
return successResponse(data, hints);
|
||||
} catch (e) {
|
||||
return errorResponse("internal_error", e instanceof Error ? e.message : String(e), ["稍後重試"]);
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import { registerDeployWorkflow } from "./u6u_deploy_workflow.js";
|
||||
import { registerPublishComponent } from "./u6u_publish_component.js";
|
||||
import { registerListWorkflows } from "./u6u_list_workflows.js";
|
||||
import { registerGetWorkflow } from "./u6u_get_workflow.js";
|
||||
import { registerSearchWorkflows } from "./u6u_search_workflows.js";
|
||||
import { registerListComponents } from "./u6u_list_components.js";
|
||||
import { registerGetComponent } from "./u6u_get_component.js";
|
||||
import { registerGetComponentGuide } from "./u6u_get_component_guide.js";
|
||||
@@ -30,6 +31,7 @@ export function registerAllTools(server: McpServer, env: Env, orgNamespace: stri
|
||||
registerPublishComponent(server, env, orgNamespace);
|
||||
registerListWorkflows(server, env, orgNamespace);
|
||||
registerGetWorkflow(server, env, orgNamespace);
|
||||
registerSearchWorkflows(server, env, orgNamespace, partnerToken); // workflow-discovery R2
|
||||
registerListComponents(server, env, orgNamespace);
|
||||
registerGetComponent(server, env, orgNamespace);
|
||||
registerGetComponentGuide(server, env, orgNamespace);
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { z } from "zod";
|
||||
import { Env } from "../types.js";
|
||||
|
||||
/**
|
||||
* u6u_search_workflows — 用自然語言找現成工作流(workflow-discovery R2)
|
||||
*
|
||||
* 北極星入口:AI 先查「有沒有現成工作流能做這件事」→ 找到就執行,別重造。
|
||||
* 呼叫 cypher GET /workflows/search → 轉發 KBDB /entries/search(entry_type=workflow + 本租戶)。
|
||||
* 優先語意搜尋;KBDB 未開 Vectorize → 自動降級關鍵字 + 回 capability_hint(不假裝語義)。
|
||||
*
|
||||
* 薄殼(rule 07):只做參數轉換 + 呼叫 + 格式化,零業務邏輯。形態對齊 u6u_search_components。
|
||||
* flag 安全:AI 收到意圖時主動 call 一次,無輪詢/排程。
|
||||
*/
|
||||
export function registerSearchWorkflows(
|
||||
server: McpServer,
|
||||
env: Env,
|
||||
orgNamespace: string,
|
||||
partnerToken: string,
|
||||
) {
|
||||
server.tool(
|
||||
"u6u_search_workflows",
|
||||
"用自然語言找現成的工作流(先查有沒有現成的能做這件事,找到就用,別重造)。例如:「把資料寫進 Google Sheets」、「每天抓 RSS 發通知」、「webhook 轉發到別的 API」。回傳本帳號下符合的工作流清單。",
|
||||
{
|
||||
query: z.string().describe("自然語言描述要找的工作流,如「把資料寫進 Google Sheets」"),
|
||||
},
|
||||
async ({ query }) => {
|
||||
try {
|
||||
if (!env.CYPHER_EXECUTOR) {
|
||||
return {
|
||||
content: [{ type: "text", text: "Error: CYPHER_EXECUTOR service binding is not configured." }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const response = await env.CYPHER_EXECUTOR.fetch(
|
||||
`http://cypher-executor/workflows/search?q=${encodeURIComponent(query)}`,
|
||||
{ method: "GET", headers: { "X-Arcrun-API-Key": partnerToken } },
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
return {
|
||||
content: [{ type: "text", text: `Search failed: ${errorText}` }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const result = await response.json() as {
|
||||
entries?: Array<{ page_name?: string; content?: string }>;
|
||||
count?: number;
|
||||
mode?: string;
|
||||
capability_hint?: string;
|
||||
};
|
||||
const entries = result.entries ?? [];
|
||||
const count = result.count ?? entries.length;
|
||||
|
||||
if (count === 0) {
|
||||
const hint = result.capability_hint ? `\n\n(${result.capability_hint})` : "";
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `找不到符合「${query}」的現成工作流。可以用 u6u_deploy_workflow 部署一個新的。${hint}`,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
// capability_hint 透傳給 AI:未開語義時 AI 看到就能主動問用戶要不要開 Vectorize(R2.3 閉環)。
|
||||
const hintLine = result.capability_hint
|
||||
? `\n\n⚠️ ${result.capability_hint}`
|
||||
: "";
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `找到 ${count} 個工作流(mode: ${result.mode ?? "keyword"}):\n${JSON.stringify(entries, null, 2)}${hintLine}`,
|
||||
}],
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
content: [{ type: "text", text: `Internal Error: ${error instanceof Error ? error.message : String(error)}` }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user