fix(mcp,kbdb): LI M3 skills/examples 改打基本盤 /entries（修死 route 假綠）

skills/examples 整條從舊 v3 /blocks /search 改打 KBDB 基本盤 /entries （entry_type 對應）。5 個已上線 MCP 工具原本對死 route 回 404（假綠），現修正；sync-registry-to-kbdb.py 改打 /entries idempotent upsert。誠實降級：基本盤無語義 search → LIKE 關鍵字（embed 模組上線再換回語義）。順手 gitignore scripts/__pycache__/。對應 kbdb-base tasks 9.4 / llm-interface M3.2/M3.4。mcp + kbdb tsc exit 0。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 22:12:11 +08:00
parent ef1f789525
commit b9bf3ec3d5
3 changed files with 159 additions and 143 deletions
@@ -47,3 +47,4 @@ CONTRIBUTING.md
 ruvector.db
 **/.swarm/
 **/ruvector.db
+scripts/__pycache__/
@@ -1,18 +1,27 @@
 /**
- * Skills + Examples lookup MCP tools — LI SDD M3.2
+ * Skills + Examples lookup MCP tools — LI SDD M3.2 / M3.4
 *
- * 對應 .agents/specs/llm-interface/ Milestone 3.2 + 3.4。
+ * 對應 docs/3-specs/llm-interface/ Milestone 3.2 + 3.4。
 *
- * - arcrun_list_skills      — 列 KBDB type=agent-skill 全部
+ * - arcrun_list_skills      — 列 KBDB entry_type=agent-skill 全部
 * - arcrun_get_skill        — 用 slug 拿 skill markdown 全文
- * - arcrun_list_examples    — 列 KBDB type=workflow-example 全部
+ * - arcrun_list_examples    — 列 KBDB entry_type=workflow-example 全部
 * - arcrun_get_example      — 用 slug 拿 example yaml + description + tags
- * - arcrun_search_examples  — 自然語言 use case → 命中相關 example
+ * - arcrun_search_examples  — use case 關鍵字 → 命中相關 example
 *
 * Skills / examples 由 arcrun/scripts/sync-registry-to-kbdb.py 從
 * arcrun/registry/{skills,examples} 同步進 KBDB。
 *
 * 直接走 KBDB service binding（既有 pattern），不經 cypher-executor。
+ *
+ * 2026-06-14 重寫：KBDB 降基本盤後（三表 entries/templates/records，無 v3 blocks 表、
+ * 無語義 search），原打 /blocks /search 的舊路徑全失效。改打基本盤 /entries：
+ *   - entry_type 取代 blocks 的 type 欄（entries 表原生有 entry_type/page_name/tags_json/metadata_json）
+ *   - GET /blocks?type=X       → GET /entries?entry_type=X
+ *   - GET /blocks?page_name=Y  → GET /entries?page_name=Y（base listEntries 加了 page_name 過濾）
+ *   - POST /search（語義）      → GET /entries/search?q=（D1 LIKE 關鍵字，基本盤無語義；
+ *       誠實降級：search_examples 現在是「關鍵字」非「語義」。embed 模組（kbdb-base Phase 1）
+ *       上線後只換內部、工具簽名不變。
 */

 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -21,29 +30,30 @@ import type { Env } from "../types.js";
 import { kbdbFetch } from "../lib/kbdb-client.js";
 import { errorResponse, successResponse } from "../lib/cypher-client.js";

+// 基本盤 entries row（與舊 v3 block 欄位 1:1，差別只在 type→entry_type）
 interface KbdbBlock {
  id: string;
  page_name?: string | null;
  content?: string | null;
-  type?: string;
+  entry_type?: string;
  tags_json?: string;
  metadata_json?: string | null;
  source?: string | null;
  updated_at?: number;
 }

-async function kbdbList(env: Env, type: string, limit = 100): Promise<KbdbBlock[]> {
-  const resp = await kbdbFetch(env, `/blocks?type=${type}&limit=${limit}`);
-  if (!resp.ok) throw new Error(`KBDB list type=${type} HTTP ${resp.status}`);
-  const data = await resp.json<{ blocks?: KbdbBlock[] }>();
-  return data.blocks ?? [];
+async function kbdbList(env: Env, entryType: string, limit = 100): Promise<KbdbBlock[]> {
+  const resp = await kbdbFetch(env, `/entries?entry_type=${encodeURIComponent(entryType)}&limit=${limit}`);
+  if (!resp.ok) throw new Error(`KBDB list entry_type=${entryType} HTTP ${resp.status}`);
+  const data = await resp.json<{ entries?: KbdbBlock[] }>();
+  return data.entries ?? [];
 }

 async function kbdbGetByPageName(env: Env, pageName: string): Promise<KbdbBlock | null> {
-  const resp = await kbdbFetch(env, `/blocks?page_name=${encodeURIComponent(pageName)}&limit=1`);
+  const resp = await kbdbFetch(env, `/entries?page_name=${encodeURIComponent(pageName)}&limit=1`);
  if (!resp.ok) return null;
-  const data = await resp.json<{ blocks?: KbdbBlock[] }>();
-  return data.blocks?.[0] ?? null;
+  const data = await resp.json<{ entries?: KbdbBlock[] }>();
+  return data.entries?.[0] ?? null;
 }

 function parseTags(tagsJson?: string): string[] {
@@ -234,54 +244,55 @@ export function registerGetExample(server: McpServer, env: Env) {
 export function registerSearchExamples(server: McpServer, env: Env) {
  server.tool(
    "arcrun_search_examples",
-    "用自然語言 use case 搜 workflow examples。回最相關 N 個。內部走 KBDB semantic search（embedding 比對）+ tag 過濾。",
+    "用 use case 關鍵字搜 workflow examples，回最相關 N 個。" +
+      "注意：基本盤目前是 D1 LIKE 關鍵字搜尋（非語義 embedding；語義是 kbdb-base Phase 1 的 embed 模組，尚未上）。" +
+      "→ 用具體詞（'email'、'cron'、'rag'）比整句自然語言命中率高。也會比對 slug/tag。",
    {
-      query: z.string().min(3).describe("用 use case 描述，例如 '每天早上發 email 摘要' / 'RAG 從文件回答問題'"),
+      query: z.string().min(2).describe("use case 關鍵字，例如 'email 摘要' / 'cron 排程' / 'rag'。基本盤是關鍵字非語義，用詞要具體"),
      top_k: z.number().int().min(1).max(20).optional().describe("回幾個結果（預設 5）"),
    },
    async ({ query, top_k }) => {
      try {
        const k = top_k ?? 5;
-        // KBDB /search 是 unified semantic search（既有），filter type=workflow-example
-        const resp = await kbdbFetch(env, `/search`, {
-          method: "POST",
-          headers: { "Content-Type": "application/json" },
-          body: JSON.stringify({
-            query,
-            topK: k * 3,  // overfetch 後 filter type
-          }),
-        });
+        const q = query.trim();

-        if (!resp.ok) {
-          return errorResponse(
-            "fetch_failed",
-            `KBDB search HTTP ${resp.status}`,
-            ["稍後重試", "改用 arcrun_list_examples(tag=...) 過濾"],
-            await resp.text().catch(() => ""),
-          );
+        // 基本盤無語義 search：撈全部 workflow-example，用 query 對 content/slug/tag 做關鍵字比對排序。
+        // （examples 只有 ~10 筆，client 端過濾零負擔；embed 模組上線後可改打語義 search）
+        const blocks = await kbdbList(env, "workflow-example", 200);
+        const ql = q.toLowerCase();
+        const terms = ql.split(/\s+/).filter(Boolean);
+
+        const scored = blocks
+          .map((b) => {
+            const slug = b.page_name?.replace(/^example-/, "") ?? "";
+            const tags = parseTags(b.tags_json);
+            const hay = `${slug} ${tags.join(" ")} ${(b.content ?? "")}`.toLowerCase();
+            // 每個 term 命中 +1；slug/tag 命中額外加權
+            let score = 0;
+            for (const t of terms) {
+              if (hay.includes(t)) score += 1;
+              if (slug.toLowerCase().includes(t)) score += 2;
+              if (tags.some((tag) => tag.toLowerCase().includes(t))) score += 2;
            }
+            return { b, slug, tags, score };
+          })
+          .filter((r) => r.score > 0)
+          .sort((a, b) => b.score - a.score)
+          .slice(0, k);

-        const data = await resp.json<{ results?: Array<{ block?: KbdbBlock; score?: number }> }>();
-        const all = data.results ?? [];
-        const examples = all
-          .filter((r) => r.block?.type === "workflow-example")
-          .slice(0, k)
-          .map((r) => {
-            const b = r.block!;
-            return {
-              slug: b.page_name?.replace(/^example-/, "") ?? "",
-              page_name: b.page_name,
+        const examples = scored.map((r) => ({
+          slug: r.slug,
+          page_name: r.b.page_name,
          score: r.score,
-              tags: parseTags(b.tags_json),
-              preview: (b.content ?? "").slice(0, 200),
-            };
-          });
+          tags: r.tags,
+          preview: (r.b.content ?? "").slice(0, 200),
+        }));

        if (examples.length === 0) {
          return successResponse(
-            { count: 0, examples: [], query },
+            { count: 0, examples: [], query: q },
            [
-              "沒命中。可能 KBDB /search 還在等 embedding 建好（剛 sync 完要 1-5 分鐘）",
+              "關鍵字沒命中（基本盤是 LIKE 非語義，換更具體/不同的詞再試）",
              "改用 arcrun_list_examples(tag='...') 走 tag 過濾",
              "或 arcrun_list_examples() 看全部清單自己挑",
            ],
@@ -289,10 +300,11 @@ export function registerSearchExamples(server: McpServer, env: Env) {
        }

        return successResponse(
-          { count: examples.length, examples, query },
+          { count: examples.length, examples, query: q, search_mode: "keyword" },
          [
            "call arcrun_get_example(slug) 拿完整 YAML",
-            "score 高 = 跟你 query 更相關",
+            "score 高 = 關鍵字命中越多（slug/tag 命中加權）",
+            "search_mode:keyword — 基本盤無語義，命中靠字面；換具體詞可改善",
          ],
        );
      } catch (e) {
@@ -4,27 +4,31 @@
 對應 LI SDD M3.4。examples / skills 在 git 是 source of truth，
 KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。

-對 KBDB block：
- examples → type=workflow-example
+2026-06-14 重寫：KBDB 降基本盤後（三表 entries/templates/records，無 v3 blocks 表、
+無 kbdb-upsert-block 零件 worker），原打 https://kbdb-upsert-block.arcrun.dev/ 全失效。
+改打基本盤 KBDB Worker 的 /entries：
+- examples → entry_type=workflow-example
    content = workflow.yaml 全文
-  metadata_json = { description, tags }
-  tags_json = [...tags.json]
-  page_name = example-{slug}    (idempotency key，重複 sync 走 upsert)
-
- skills → type=agent-skill
+    metadata_json = { slug, description_md, tags }
+    tags_json = ["workflow-example", "example:{slug}", *tags]
+    page_name = example-{slug}    (idempotency key)
+- skills → entry_type=agent-skill
    content = {slug}.md 全文
-  page_name = skill-{slug}     (idempotency key)
+    metadata_json = { slug, title }
    tags_json = ["agent-skill", "skill:{slug}"]
+    page_name = skill-{slug}      (idempotency key)
+
+基本盤無 upsert 端點 → 本腳本自己做 idempotency（GET ?page_name= 找到則 PATCH /entries/:id，
+否則 POST /entries）。這是 ops 同步腳本（非 CLI/MCP 薄殼），自行編排不違反 rule 07 薄殼原則。

 執行：
    cd matrix/arcrun
-    python3 scripts/sync-registry-to-kbdb.py            # 上傳所有
+    KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev python3 scripts/sync-registry-to-kbdb.py
    python3 scripts/sync-registry-to-kbdb.py --dry-run    # 只 list 不寫

-需求：
- mira tools/_kbdb_client.py 風格 (urllib + ak_)
- ARCRUN_API_KEY 從 .env 或 env var
- 走 kbdb-*.arcrun.dev 零件 worker endpoints (符合 mira CLAUDE.md §1.7)
+設定：
+- KBDB_BASE_URL  KBDB 基本盤 Worker 的 base URL（必填，無預設——避免誤打到別的環境）
+- KBDB_OWNER_ID  資料歸屬標記（選填，預設 'registry'；基本盤多租戶用 owner_id）
 """

 import argparse
@@ -39,42 +43,30 @@ ARCRUN_ROOT = Path(__file__).resolve().parent.parent
 EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples"
 SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills"

-KBDB_UPSERT_URL = "https://kbdb-upsert-block.arcrun.dev/"
-USER_AGENT = "arcrun-registry-sync/1.0"
-USER_ID = "inkstone_platform_registry"  # 需符合 KBDB partner namespace prefix（inkstone_*）
+USER_AGENT = "arcrun-registry-sync/2.0"
+OWNER_ID = os.environ.get("KBDB_OWNER_ID", "registry")
 SOURCE = "registry-git-sync"


-def get_api_key() -> str:
-    """從 env var 或 polaris/mira/.env 取 ARCRUN_API_KEY。"""
-    key = os.environ.get("ARCRUN_API_KEY", "")
-    if key:
-        return key
-    # fallback：找 polaris/mira/.env（leo 既有約定位置）
-    mira_env = ARCRUN_ROOT.parent.parent / "polaris" / "mira" / ".env"
-    if mira_env.exists():
-        for line in mira_env.read_text(encoding="utf-8").splitlines():
-            line = line.strip()
-            if line.startswith("ARCRUN_API_KEY="):
-                return line.split("=", 1)[1].strip()
+def get_base_url() -> str:
+    """KBDB 基本盤 Worker base URL。無預設（避免誤打環境）。"""
+    url = os.environ.get("KBDB_BASE_URL", "").rstrip("/")
+    if url:
+        return url
    raise SystemExit(
-        "ARCRUN_API_KEY 未設定。export ARCRUN_API_KEY=ak_... 或加到 polaris/mira/.env"
+        "KBDB_BASE_URL 未設定。\n"
+        "  export KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev\n"
+        "  （self-hosted 用自己部署的 KBDB Worker URL）"
    )


-def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict:
-    """POST kbdb-upsert-block.arcrun.dev — page_name 當 idempotency key"""
-    if dry_run:
-        return {"dry_run": True, "would_upsert": payload.get("page_name")}
-    data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+def _req(method: str, url: str, payload: dict | None = None) -> dict:
+    data = json.dumps(payload, ensure_ascii=False).encode("utf-8") if payload is not None else None
    req = urllib.request.Request(
-        KBDB_UPSERT_URL,
+        url,
        data=data,
-        headers={
-            "Content-Type": "application/json",
-            "User-Agent": USER_AGENT,
-        },
-        method="POST",
+        headers={"Content-Type": "application/json", "User-Agent": USER_AGENT},
+        method=method,
    )
    try:
        with urllib.request.urlopen(req, timeout=30) as resp:
@@ -82,10 +74,43 @@ def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict:
    except urllib.error.HTTPError as e:
        body = e.read().decode("utf-8", errors="replace")
        return {"error": f"HTTP {e.code}: {body[:200]}"}
+    except urllib.error.URLError as e:
+        return {"error": f"URL error: {e}"}


-def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]:
-    """同步 registry/examples/{slug}/ 進 KBDB"""
+def find_entry_id_by_page_name(base_url: str, page_name: str) -> str | None:
+    """GET /entries?page_name= → 回既有 entry id（idempotency 用），無則 None。"""
+    from urllib.parse import quote
+    res = _req("GET", f"{base_url}/entries?page_name={quote(page_name)}&limit=1")
+    if "error" in res:
+        return None
+    entries = res.get("entries") or []
+    return entries[0].get("id") if entries else None
+
+
+def upsert_entry(base_url: str, payload: dict, dry_run: bool) -> dict:
+    """page_name 當 idempotency key：找到則 PATCH /entries/:id，否則 POST /entries。"""
+    page_name = payload.get("page_name")
+    if dry_run:
+        existing = None if base_url == "DRY" else find_entry_id_by_page_name(base_url, page_name)
+        return {"dry_run": True, "would": "patch" if existing else "post", "page_name": page_name}
+
+    existing_id = find_entry_id_by_page_name(base_url, page_name)
+    if existing_id:
+        # PATCH 只送可變欄位（entry_type/page_name 不變）
+        patch = {k: payload[k] for k in ("content", "tags_json", "metadata_json") if k in payload}
+        res = _req("PATCH", f"{base_url}/entries/{existing_id}", patch)
+        if "error" not in res:
+            res.setdefault("action", "update")
+        return res
+    res = _req("POST", f"{base_url}/entries", payload)
+    if "error" not in res:
+        res.setdefault("action", "create")
+    return res
+
+
+def sync_examples(base_url: str, dry_run: bool) -> tuple[int, int]:
+    """同步 registry/examples/{slug}/ 進 KBDB（entry_type=workflow-example）"""
    if not EXAMPLES_DIR.exists():
        print(f"⚠️  {EXAMPLES_DIR} 不存在，跳過 examples 同步")
        return 0, 0
@@ -104,50 +129,36 @@ def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]:
            continue

        yaml_content = workflow_yaml.read_text(encoding="utf-8")
-        description = (
-            description_md.read_text(encoding="utf-8") if description_md.exists() else ""
-        )
-        tags = (
-            json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
-        )
+        description = description_md.read_text(encoding="utf-8") if description_md.exists() else ""
+        tags = json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []

-        # content = workflow YAML（讓 AI semantic search 命中 YAML 內容）
-        # metadata_json = description + tags 結構化
        payload = {
-            "api_key": api_key,
-            "type": "workflow-example",
+            "entry_type": "workflow-example",
            "page_name": f"example-{slug}",
-            "source": SOURCE,
-            "user_id": USER_ID,
+            "owner_id": OWNER_ID,
            "content": yaml_content,
            "metadata_json": json.dumps(
-                {
-                    "slug": slug,
-                    "description_md": description,
-                    "tags": tags,
-                },
+                {"slug": slug, "description_md": description, "tags": tags, "source": SOURCE},
                ensure_ascii=False,
            ),
            "tags_json": json.dumps(
-                ["workflow-example", f"example:{slug}", *tags],
-                ensure_ascii=False,
+                ["workflow-example", f"example:{slug}", *tags], ensure_ascii=False
            ),
        }

-        result = kbdb_upsert(api_key, payload, dry_run)
+        result = upsert_entry(base_url, payload, dry_run)
        if "error" in result:
            print(f"  ❌ {slug}: {result['error']}")
            fail += 1
        else:
-            action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
-            print(f"  ✅ {slug} → {action}")
+            print(f"  ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}")
            ok += 1

    return ok, fail


-def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
-    """同步 registry/skills/*.md 進 KBDB"""
+def sync_skills(base_url: str, dry_run: bool) -> tuple[int, int]:
+    """同步 registry/skills/*.md 進 KBDB（entry_type=agent-skill）"""
    if not SKILLS_DIR.exists():
        print(f"⚠️  {SKILLS_DIR} 不存在，跳過 skills 同步")
        return 0, 0
@@ -159,7 +170,6 @@ def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
        slug = md_file.stem
        content = md_file.read_text(encoding="utf-8")

-        # 簡單抓首行 # X 當 title
        title = slug
        for line in content.splitlines():
            line = line.strip()
@@ -168,44 +178,37 @@ def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
                break

        payload = {
-            "api_key": api_key,
-            "type": "agent-skill",
+            "entry_type": "agent-skill",
            "page_name": f"skill-{slug}",
-            "source": SOURCE,
-            "user_id": USER_ID,
+            "owner_id": OWNER_ID,
            "content": content,
            "metadata_json": json.dumps(
-                {"slug": slug, "title": title},
-                ensure_ascii=False,
-            ),
-            "tags_json": json.dumps(
-                ["agent-skill", f"skill:{slug}"],
-                ensure_ascii=False,
+                {"slug": slug, "title": title, "source": SOURCE}, ensure_ascii=False
            ),
+            "tags_json": json.dumps(["agent-skill", f"skill:{slug}"], ensure_ascii=False),
        }

-        result = kbdb_upsert(api_key, payload, dry_run)
+        result = upsert_entry(base_url, payload, dry_run)
        if "error" in result:
            print(f"  ❌ {slug}: {result['error']}")
            fail += 1
        else:
-            action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
-            print(f"  ✅ {slug} → {action}")
+            print(f"  ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}")
            ok += 1

    return ok, fail


 def main():
-    p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB")
+    p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB base (/entries)")
    p.add_argument("--dry-run", action="store_true", help="只 list 不寫")
    p.add_argument("--examples-only", action="store_true")
    p.add_argument("--skills-only", action="store_true")
    args = p.parse_args()

-    api_key = get_api_key()
-    print(f"🔑 api_key: {api_key[:12]}... (len={len(api_key)})")
-    print(f"📂 root: {ARCRUN_ROOT}")
+    base_url = "DRY" if args.dry_run and not os.environ.get("KBDB_BASE_URL") else get_base_url()
+    print(f"🌐 KBDB base: {base_url}")
+    print(f"📂 root: {ARCRUN_ROOT}  (owner_id={OWNER_ID})")
    if args.dry_run:
        print("(dry-run，不實際寫 KBDB)")
    print()
@@ -214,13 +217,13 @@ def main():
    skills_ok = skills_fail = 0

    if not args.skills_only:
-        print("📋 Syncing examples → type=workflow-example ...")
-        examples_ok, examples_fail = sync_examples(api_key, args.dry_run)
+        print("📋 Syncing examples → entry_type=workflow-example ...")
+        examples_ok, examples_fail = sync_examples(base_url, args.dry_run)
        print(f"   examples: {examples_ok} ok / {examples_fail} fail\n")

    if not args.examples_only:
-        print("📋 Syncing skills → type=agent-skill ...")
-        skills_ok, skills_fail = sync_skills(api_key, args.dry_run)
+        print("📋 Syncing skills → entry_type=agent-skill ...")
+        skills_ok, skills_fail = sync_skills(base_url, args.dry_run)
        print(f"   skills: {skills_ok} ok / {skills_fail} fail\n")

    total_fail = examples_fail + skills_fail