From b9bf3ec3d52c047c1e67687bc842a49fb52d427e Mon Sep 17 00:00:00 2001 From: uncle6me-web Date: Sun, 14 Jun 2026 22:12:11 +0800 Subject: [PATCH] =?UTF-8?q?fix(mcp,kbdb):=20LI=20M3=20skills/examples=20?= =?UTF-8?q?=E6=94=B9=E6=89=93=E5=9F=BA=E6=9C=AC=E7=9B=A4=20/entries?= =?UTF-8?q?=EF=BC=88=E4=BF=AE=E6=AD=BB=20route=20=E5=81=87=E7=B6=A0?= =?UTF-8?q?=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skills/examples 整條從舊 v3 /blocks /search 改打 KBDB 基本盤 /entries (entry_type 對應)。5 個已上線 MCP 工具原本對死 route 回 404(假綠), 現修正;sync-registry-to-kbdb.py 改打 /entries idempotent upsert。 誠實降級:基本盤無語義 search → LIKE 關鍵字(embed 模組上線再換回語義)。 順手 gitignore scripts/__pycache__/。 對應 kbdb-base tasks 9.4 / llm-interface M3.2/M3.4。mcp + kbdb tsc exit 0。 Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 1 + mcp/src/tools/arcrun_skills_examples.ts | 116 ++++++++------- scripts/sync-registry-to-kbdb.py | 185 ++++++++++++------------ 3 files changed, 159 insertions(+), 143 deletions(-) diff --git a/.gitignore b/.gitignore index c8283c4..f68c125 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ CONTRIBUTING.md ruvector.db **/.swarm/ **/ruvector.db +scripts/__pycache__/ diff --git a/mcp/src/tools/arcrun_skills_examples.ts b/mcp/src/tools/arcrun_skills_examples.ts index eb6edcc..943ea92 100644 --- a/mcp/src/tools/arcrun_skills_examples.ts +++ b/mcp/src/tools/arcrun_skills_examples.ts @@ -1,18 +1,27 @@ /** - * Skills + Examples lookup MCP tools — LI SDD M3.2 + * Skills + Examples lookup MCP tools — LI SDD M3.2 / M3.4 * - * 對應 .agents/specs/llm-interface/ Milestone 3.2 + 3.4。 + * 對應 docs/3-specs/llm-interface/ Milestone 3.2 + 3.4。 * - * - arcrun_list_skills — 列 KBDB type=agent-skill 全部 + * - arcrun_list_skills — 列 KBDB entry_type=agent-skill 全部 * - arcrun_get_skill — 用 slug 拿 skill markdown 全文 - * - arcrun_list_examples — 列 KBDB type=workflow-example 全部 + * - arcrun_list_examples — 列 KBDB entry_type=workflow-example 全部 * - arcrun_get_example — 用 slug 拿 example yaml + description + tags - * - arcrun_search_examples — 自然語言 use case → 命中相關 example + * - arcrun_search_examples — use case 關鍵字 → 命中相關 example * * Skills / examples 由 arcrun/scripts/sync-registry-to-kbdb.py 從 * arcrun/registry/{skills,examples} 同步進 KBDB。 * * 直接走 KBDB service binding(既有 pattern),不經 cypher-executor。 + * + * 2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、 + * 無語義 search),原打 /blocks /search 的舊路徑全失效。改打基本盤 /entries: + * - entry_type 取代 blocks 的 type 欄(entries 表原生有 entry_type/page_name/tags_json/metadata_json) + * - GET /blocks?type=X → GET /entries?entry_type=X + * - GET /blocks?page_name=Y → GET /entries?page_name=Y(base listEntries 加了 page_name 過濾) + * - POST /search(語義) → GET /entries/search?q=(D1 LIKE 關鍵字,基本盤無語義; + * 誠實降級:search_examples 現在是「關鍵字」非「語義」。embed 模組(kbdb-base Phase 1) + * 上線後只換內部、工具簽名不變。 */ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; @@ -21,29 +30,30 @@ import type { Env } from "../types.js"; import { kbdbFetch } from "../lib/kbdb-client.js"; import { errorResponse, successResponse } from "../lib/cypher-client.js"; +// 基本盤 entries row(與舊 v3 block 欄位 1:1,差別只在 type→entry_type) interface KbdbBlock { id: string; page_name?: string | null; content?: string | null; - type?: string; + entry_type?: string; tags_json?: string; metadata_json?: string | null; source?: string | null; updated_at?: number; } -async function kbdbList(env: Env, type: string, limit = 100): Promise { - const resp = await kbdbFetch(env, `/blocks?type=${type}&limit=${limit}`); - if (!resp.ok) throw new Error(`KBDB list type=${type} HTTP ${resp.status}`); - const data = await resp.json<{ blocks?: KbdbBlock[] }>(); - return data.blocks ?? []; +async function kbdbList(env: Env, entryType: string, limit = 100): Promise { + const resp = await kbdbFetch(env, `/entries?entry_type=${encodeURIComponent(entryType)}&limit=${limit}`); + if (!resp.ok) throw new Error(`KBDB list entry_type=${entryType} HTTP ${resp.status}`); + const data = await resp.json<{ entries?: KbdbBlock[] }>(); + return data.entries ?? []; } async function kbdbGetByPageName(env: Env, pageName: string): Promise { - const resp = await kbdbFetch(env, `/blocks?page_name=${encodeURIComponent(pageName)}&limit=1`); + const resp = await kbdbFetch(env, `/entries?page_name=${encodeURIComponent(pageName)}&limit=1`); if (!resp.ok) return null; - const data = await resp.json<{ blocks?: KbdbBlock[] }>(); - return data.blocks?.[0] ?? null; + const data = await resp.json<{ entries?: KbdbBlock[] }>(); + return data.entries?.[0] ?? null; } function parseTags(tagsJson?: string): string[] { @@ -234,54 +244,55 @@ export function registerGetExample(server: McpServer, env: Env) { export function registerSearchExamples(server: McpServer, env: Env) { server.tool( "arcrun_search_examples", - "用自然語言 use case 搜 workflow examples。回最相關 N 個。內部走 KBDB semantic search(embedding 比對)+ tag 過濾。", + "用 use case 關鍵字搜 workflow examples,回最相關 N 個。" + + "注意:基本盤目前是 D1 LIKE 關鍵字搜尋(非語義 embedding;語義是 kbdb-base Phase 1 的 embed 模組,尚未上)。" + + "→ 用具體詞('email'、'cron'、'rag')比整句自然語言命中率高。也會比對 slug/tag。", { - query: z.string().min(3).describe("用 use case 描述,例如 '每天早上發 email 摘要' / 'RAG 從文件回答問題'"), + query: z.string().min(2).describe("use case 關鍵字,例如 'email 摘要' / 'cron 排程' / 'rag'。基本盤是關鍵字非語義,用詞要具體"), top_k: z.number().int().min(1).max(20).optional().describe("回幾個結果(預設 5)"), }, async ({ query, top_k }) => { try { const k = top_k ?? 5; - // KBDB /search 是 unified semantic search(既有),filter type=workflow-example - const resp = await kbdbFetch(env, `/search`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - query, - topK: k * 3, // overfetch 後 filter type - }), - }); + const q = query.trim(); - if (!resp.ok) { - return errorResponse( - "fetch_failed", - `KBDB search HTTP ${resp.status}`, - ["稍後重試", "改用 arcrun_list_examples(tag=...) 過濾"], - await resp.text().catch(() => ""), - ); - } + // 基本盤無語義 search:撈全部 workflow-example,用 query 對 content/slug/tag 做關鍵字比對排序。 + // (examples 只有 ~10 筆,client 端過濾零負擔;embed 模組上線後可改打語義 search) + const blocks = await kbdbList(env, "workflow-example", 200); + const ql = q.toLowerCase(); + const terms = ql.split(/\s+/).filter(Boolean); - const data = await resp.json<{ results?: Array<{ block?: KbdbBlock; score?: number }> }>(); - const all = data.results ?? []; - const examples = all - .filter((r) => r.block?.type === "workflow-example") - .slice(0, k) - .map((r) => { - const b = r.block!; - return { - slug: b.page_name?.replace(/^example-/, "") ?? "", - page_name: b.page_name, - score: r.score, - tags: parseTags(b.tags_json), - preview: (b.content ?? "").slice(0, 200), - }; - }); + const scored = blocks + .map((b) => { + const slug = b.page_name?.replace(/^example-/, "") ?? ""; + const tags = parseTags(b.tags_json); + const hay = `${slug} ${tags.join(" ")} ${(b.content ?? "")}`.toLowerCase(); + // 每個 term 命中 +1;slug/tag 命中額外加權 + let score = 0; + for (const t of terms) { + if (hay.includes(t)) score += 1; + if (slug.toLowerCase().includes(t)) score += 2; + if (tags.some((tag) => tag.toLowerCase().includes(t))) score += 2; + } + return { b, slug, tags, score }; + }) + .filter((r) => r.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, k); + + const examples = scored.map((r) => ({ + slug: r.slug, + page_name: r.b.page_name, + score: r.score, + tags: r.tags, + preview: (r.b.content ?? "").slice(0, 200), + })); if (examples.length === 0) { return successResponse( - { count: 0, examples: [], query }, + { count: 0, examples: [], query: q }, [ - "沒命中。可能 KBDB /search 還在等 embedding 建好(剛 sync 完要 1-5 分鐘)", + "關鍵字沒命中(基本盤是 LIKE 非語義,換更具體/不同的詞再試)", "改用 arcrun_list_examples(tag='...') 走 tag 過濾", "或 arcrun_list_examples() 看全部清單自己挑", ], @@ -289,10 +300,11 @@ export function registerSearchExamples(server: McpServer, env: Env) { } return successResponse( - { count: examples.length, examples, query }, + { count: examples.length, examples, query: q, search_mode: "keyword" }, [ "call arcrun_get_example(slug) 拿完整 YAML", - "score 高 = 跟你 query 更相關", + "score 高 = 關鍵字命中越多(slug/tag 命中加權)", + "search_mode:keyword — 基本盤無語義,命中靠字面;換具體詞可改善", ], ); } catch (e) { diff --git a/scripts/sync-registry-to-kbdb.py b/scripts/sync-registry-to-kbdb.py index 44fa196..454da02 100644 --- a/scripts/sync-registry-to-kbdb.py +++ b/scripts/sync-registry-to-kbdb.py @@ -4,27 +4,31 @@ 對應 LI SDD M3.4。examples / skills 在 git 是 source of truth, KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。 -對 KBDB block: -- examples → type=workflow-example - content = workflow.yaml 全文 - metadata_json = { description, tags } - tags_json = [...tags.json] - page_name = example-{slug} (idempotency key,重複 sync 走 upsert) +2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、 +無 kbdb-upsert-block 零件 worker),原打 https://kbdb-upsert-block.arcrun.dev/ 全失效。 +改打基本盤 KBDB Worker 的 /entries: +- examples → entry_type=workflow-example + content = workflow.yaml 全文 + metadata_json = { slug, description_md, tags } + tags_json = ["workflow-example", "example:{slug}", *tags] + page_name = example-{slug} (idempotency key) +- skills → entry_type=agent-skill + content = {slug}.md 全文 + metadata_json = { slug, title } + tags_json = ["agent-skill", "skill:{slug}"] + page_name = skill-{slug} (idempotency key) -- skills → type=agent-skill - content = {slug}.md 全文 - page_name = skill-{slug} (idempotency key) - tags_json = ["agent-skill", "skill:{slug}"] +基本盤無 upsert 端點 → 本腳本自己做 idempotency(GET ?page_name= 找到則 PATCH /entries/:id, +否則 POST /entries)。這是 ops 同步腳本(非 CLI/MCP 薄殼),自行編排不違反 rule 07 薄殼原則。 執行: cd matrix/arcrun - python3 scripts/sync-registry-to-kbdb.py # 上傳所有 - python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫 + KBDB_BASE_URL=https://arcrun-kbdb..workers.dev python3 scripts/sync-registry-to-kbdb.py + python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫 -需求: -- mira tools/_kbdb_client.py 風格 (urllib + ak_) -- ARCRUN_API_KEY 從 .env 或 env var -- 走 kbdb-*.arcrun.dev 零件 worker endpoints (符合 mira CLAUDE.md §1.7) +設定: +- KBDB_BASE_URL KBDB 基本盤 Worker 的 base URL(必填,無預設——避免誤打到別的環境) +- KBDB_OWNER_ID 資料歸屬標記(選填,預設 'registry';基本盤多租戶用 owner_id) """ import argparse @@ -39,42 +43,30 @@ ARCRUN_ROOT = Path(__file__).resolve().parent.parent EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples" SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills" -KBDB_UPSERT_URL = "https://kbdb-upsert-block.arcrun.dev/" -USER_AGENT = "arcrun-registry-sync/1.0" -USER_ID = "inkstone_platform_registry" # 需符合 KBDB partner namespace prefix(inkstone_*) +USER_AGENT = "arcrun-registry-sync/2.0" +OWNER_ID = os.environ.get("KBDB_OWNER_ID", "registry") SOURCE = "registry-git-sync" -def get_api_key() -> str: - """從 env var 或 polaris/mira/.env 取 ARCRUN_API_KEY。""" - key = os.environ.get("ARCRUN_API_KEY", "") - if key: - return key - # fallback:找 polaris/mira/.env(leo 既有約定位置) - mira_env = ARCRUN_ROOT.parent.parent / "polaris" / "mira" / ".env" - if mira_env.exists(): - for line in mira_env.read_text(encoding="utf-8").splitlines(): - line = line.strip() - if line.startswith("ARCRUN_API_KEY="): - return line.split("=", 1)[1].strip() +def get_base_url() -> str: + """KBDB 基本盤 Worker base URL。無預設(避免誤打環境)。""" + url = os.environ.get("KBDB_BASE_URL", "").rstrip("/") + if url: + return url raise SystemExit( - "ARCRUN_API_KEY 未設定。export ARCRUN_API_KEY=ak_... 或加到 polaris/mira/.env" + "KBDB_BASE_URL 未設定。\n" + " export KBDB_BASE_URL=https://arcrun-kbdb..workers.dev\n" + " (self-hosted 用自己部署的 KBDB Worker URL)" ) -def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict: - """POST kbdb-upsert-block.arcrun.dev — page_name 當 idempotency key""" - if dry_run: - return {"dry_run": True, "would_upsert": payload.get("page_name")} - data = json.dumps(payload, ensure_ascii=False).encode("utf-8") +def _req(method: str, url: str, payload: dict | None = None) -> dict: + data = json.dumps(payload, ensure_ascii=False).encode("utf-8") if payload is not None else None req = urllib.request.Request( - KBDB_UPSERT_URL, + url, data=data, - headers={ - "Content-Type": "application/json", - "User-Agent": USER_AGENT, - }, - method="POST", + headers={"Content-Type": "application/json", "User-Agent": USER_AGENT}, + method=method, ) try: with urllib.request.urlopen(req, timeout=30) as resp: @@ -82,10 +74,43 @@ def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict: except urllib.error.HTTPError as e: body = e.read().decode("utf-8", errors="replace") return {"error": f"HTTP {e.code}: {body[:200]}"} + except urllib.error.URLError as e: + return {"error": f"URL error: {e}"} -def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]: - """同步 registry/examples/{slug}/ 進 KBDB""" +def find_entry_id_by_page_name(base_url: str, page_name: str) -> str | None: + """GET /entries?page_name= → 回既有 entry id(idempotency 用),無則 None。""" + from urllib.parse import quote + res = _req("GET", f"{base_url}/entries?page_name={quote(page_name)}&limit=1") + if "error" in res: + return None + entries = res.get("entries") or [] + return entries[0].get("id") if entries else None + + +def upsert_entry(base_url: str, payload: dict, dry_run: bool) -> dict: + """page_name 當 idempotency key:找到則 PATCH /entries/:id,否則 POST /entries。""" + page_name = payload.get("page_name") + if dry_run: + existing = None if base_url == "DRY" else find_entry_id_by_page_name(base_url, page_name) + return {"dry_run": True, "would": "patch" if existing else "post", "page_name": page_name} + + existing_id = find_entry_id_by_page_name(base_url, page_name) + if existing_id: + # PATCH 只送可變欄位(entry_type/page_name 不變) + patch = {k: payload[k] for k in ("content", "tags_json", "metadata_json") if k in payload} + res = _req("PATCH", f"{base_url}/entries/{existing_id}", patch) + if "error" not in res: + res.setdefault("action", "update") + return res + res = _req("POST", f"{base_url}/entries", payload) + if "error" not in res: + res.setdefault("action", "create") + return res + + +def sync_examples(base_url: str, dry_run: bool) -> tuple[int, int]: + """同步 registry/examples/{slug}/ 進 KBDB(entry_type=workflow-example)""" if not EXAMPLES_DIR.exists(): print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步") return 0, 0 @@ -104,50 +129,36 @@ def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]: continue yaml_content = workflow_yaml.read_text(encoding="utf-8") - description = ( - description_md.read_text(encoding="utf-8") if description_md.exists() else "" - ) - tags = ( - json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else [] - ) + description = description_md.read_text(encoding="utf-8") if description_md.exists() else "" + tags = json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else [] - # content = workflow YAML(讓 AI semantic search 命中 YAML 內容) - # metadata_json = description + tags 結構化 payload = { - "api_key": api_key, - "type": "workflow-example", + "entry_type": "workflow-example", "page_name": f"example-{slug}", - "source": SOURCE, - "user_id": USER_ID, + "owner_id": OWNER_ID, "content": yaml_content, "metadata_json": json.dumps( - { - "slug": slug, - "description_md": description, - "tags": tags, - }, + {"slug": slug, "description_md": description, "tags": tags, "source": SOURCE}, ensure_ascii=False, ), "tags_json": json.dumps( - ["workflow-example", f"example:{slug}", *tags], - ensure_ascii=False, + ["workflow-example", f"example:{slug}", *tags], ensure_ascii=False ), } - result = kbdb_upsert(api_key, payload, dry_run) + result = upsert_entry(base_url, payload, dry_run) if "error" in result: print(f" ❌ {slug}: {result['error']}") fail += 1 else: - action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?" - print(f" ✅ {slug} → {action}") + print(f" ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}") ok += 1 return ok, fail -def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]: - """同步 registry/skills/*.md 進 KBDB""" +def sync_skills(base_url: str, dry_run: bool) -> tuple[int, int]: + """同步 registry/skills/*.md 進 KBDB(entry_type=agent-skill)""" if not SKILLS_DIR.exists(): print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步") return 0, 0 @@ -159,7 +170,6 @@ def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]: slug = md_file.stem content = md_file.read_text(encoding="utf-8") - # 簡單抓首行 # X 當 title title = slug for line in content.splitlines(): line = line.strip() @@ -168,44 +178,37 @@ def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]: break payload = { - "api_key": api_key, - "type": "agent-skill", + "entry_type": "agent-skill", "page_name": f"skill-{slug}", - "source": SOURCE, - "user_id": USER_ID, + "owner_id": OWNER_ID, "content": content, "metadata_json": json.dumps( - {"slug": slug, "title": title}, - ensure_ascii=False, - ), - "tags_json": json.dumps( - ["agent-skill", f"skill:{slug}"], - ensure_ascii=False, + {"slug": slug, "title": title, "source": SOURCE}, ensure_ascii=False ), + "tags_json": json.dumps(["agent-skill", f"skill:{slug}"], ensure_ascii=False), } - result = kbdb_upsert(api_key, payload, dry_run) + result = upsert_entry(base_url, payload, dry_run) if "error" in result: print(f" ❌ {slug}: {result['error']}") fail += 1 else: - action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?" - print(f" ✅ {slug} → {action}") + print(f" ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}") ok += 1 return ok, fail def main(): - p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB") + p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB base (/entries)") p.add_argument("--dry-run", action="store_true", help="只 list 不寫") p.add_argument("--examples-only", action="store_true") p.add_argument("--skills-only", action="store_true") args = p.parse_args() - api_key = get_api_key() - print(f"🔑 api_key: {api_key[:12]}... (len={len(api_key)})") - print(f"📂 root: {ARCRUN_ROOT}") + base_url = "DRY" if args.dry_run and not os.environ.get("KBDB_BASE_URL") else get_base_url() + print(f"🌐 KBDB base: {base_url}") + print(f"📂 root: {ARCRUN_ROOT} (owner_id={OWNER_ID})") if args.dry_run: print("(dry-run,不實際寫 KBDB)") print() @@ -214,13 +217,13 @@ def main(): skills_ok = skills_fail = 0 if not args.skills_only: - print("📋 Syncing examples → type=workflow-example ...") - examples_ok, examples_fail = sync_examples(api_key, args.dry_run) + print("📋 Syncing examples → entry_type=workflow-example ...") + examples_ok, examples_fail = sync_examples(base_url, args.dry_run) print(f" examples: {examples_ok} ok / {examples_fail} fail\n") if not args.examples_only: - print("📋 Syncing skills → type=agent-skill ...") - skills_ok, skills_fail = sync_skills(api_key, args.dry_run) + print("📋 Syncing skills → entry_type=agent-skill ...") + skills_ok, skills_fail = sync_skills(base_url, args.dry_run) print(f" skills: {skills_ok} ok / {skills_fail} fail\n") total_fail = examples_fail + skills_fail