fix(mcp,kbdb): LI M3 skills/examples 改打基本盤 /entries(修死 route 假綠)

skills/examples 整條從舊 v3 /blocks /search 改打 KBDB 基本盤 /entries
(entry_type 對應)。5 個已上線 MCP 工具原本對死 route 回 404(假綠),
現修正;sync-registry-to-kbdb.py 改打 /entries idempotent upsert。
誠實降級:基本盤無語義 search → LIKE 關鍵字(embed 模組上線再換回語義)。
順手 gitignore scripts/__pycache__/。

對應 kbdb-base tasks 9.4 / llm-interface M3.2/M3.4。mcp + kbdb tsc exit 0。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
uncle6me-web
2026-06-14 22:12:11 +08:00
parent ef1f789525
commit b9bf3ec3d5
3 changed files with 159 additions and 143 deletions
+1
View File
@@ -47,3 +47,4 @@ CONTRIBUTING.md
ruvector.db
**/.swarm/
**/ruvector.db
scripts/__pycache__/
+62 -50
View File
@@ -1,18 +1,27 @@
/**
* Skills + Examples lookup MCP tools — LI SDD M3.2
* Skills + Examples lookup MCP tools — LI SDD M3.2 / M3.4
*
* 對應 .agents/specs/llm-interface/ Milestone 3.2 + 3.4。
* 對應 docs/3-specs/llm-interface/ Milestone 3.2 + 3.4。
*
* - arcrun_list_skills — 列 KBDB type=agent-skill 全部
* - arcrun_list_skills — 列 KBDB entry_type=agent-skill 全部
* - arcrun_get_skill — 用 slug 拿 skill markdown 全文
* - arcrun_list_examples — 列 KBDB type=workflow-example 全部
* - arcrun_list_examples — 列 KBDB entry_type=workflow-example 全部
* - arcrun_get_example — 用 slug 拿 example yaml + description + tags
* - arcrun_search_examples — 自然語言 use case → 命中相關 example
* - arcrun_search_examples — use case 關鍵字 → 命中相關 example
*
* Skills / examples 由 arcrun/scripts/sync-registry-to-kbdb.py 從
* arcrun/registry/{skills,examples} 同步進 KBDB。
*
* 直接走 KBDB service binding(既有 pattern),不經 cypher-executor。
*
* 2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、
* 無語義 search),原打 /blocks /search 的舊路徑全失效。改打基本盤 /entries:
* - entry_type 取代 blocks 的 type 欄(entries 表原生有 entry_type/page_name/tags_json/metadata_json
* - GET /blocks?type=X → GET /entries?entry_type=X
* - GET /blocks?page_name=Y → GET /entries?page_name=Ybase listEntries 加了 page_name 過濾)
* - POST /search(語義) → GET /entries/search?q=D1 LIKE 關鍵字,基本盤無語義;
* 誠實降級:search_examples 現在是「關鍵字」非「語義」。embed 模組(kbdb-base Phase 1
* 上線後只換內部、工具簽名不變。
*/
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -21,29 +30,30 @@ import type { Env } from "../types.js";
import { kbdbFetch } from "../lib/kbdb-client.js";
import { errorResponse, successResponse } from "../lib/cypher-client.js";
// 基本盤 entries row(與舊 v3 block 欄位 1:1,差別只在 type→entry_type
interface KbdbBlock {
id: string;
page_name?: string | null;
content?: string | null;
type?: string;
entry_type?: string;
tags_json?: string;
metadata_json?: string | null;
source?: string | null;
updated_at?: number;
}
async function kbdbList(env: Env, type: string, limit = 100): Promise<KbdbBlock[]> {
const resp = await kbdbFetch(env, `/blocks?type=${type}&limit=${limit}`);
if (!resp.ok) throw new Error(`KBDB list type=${type} HTTP ${resp.status}`);
const data = await resp.json<{ blocks?: KbdbBlock[] }>();
return data.blocks ?? [];
async function kbdbList(env: Env, entryType: string, limit = 100): Promise<KbdbBlock[]> {
const resp = await kbdbFetch(env, `/entries?entry_type=${encodeURIComponent(entryType)}&limit=${limit}`);
if (!resp.ok) throw new Error(`KBDB list entry_type=${entryType} HTTP ${resp.status}`);
const data = await resp.json<{ entries?: KbdbBlock[] }>();
return data.entries ?? [];
}
async function kbdbGetByPageName(env: Env, pageName: string): Promise<KbdbBlock | null> {
const resp = await kbdbFetch(env, `/blocks?page_name=${encodeURIComponent(pageName)}&limit=1`);
const resp = await kbdbFetch(env, `/entries?page_name=${encodeURIComponent(pageName)}&limit=1`);
if (!resp.ok) return null;
const data = await resp.json<{ blocks?: KbdbBlock[] }>();
return data.blocks?.[0] ?? null;
const data = await resp.json<{ entries?: KbdbBlock[] }>();
return data.entries?.[0] ?? null;
}
function parseTags(tagsJson?: string): string[] {
@@ -234,54 +244,55 @@ export function registerGetExample(server: McpServer, env: Env) {
export function registerSearchExamples(server: McpServer, env: Env) {
server.tool(
"arcrun_search_examples",
"用自然語言 use case 搜 workflow examples回最相關 N 個。內部走 KBDB semantic searchembedding 比對)+ tag 過濾。",
"用 use case 關鍵字搜 workflow examples回最相關 N 個。" +
"注意:基本盤目前是 D1 LIKE 關鍵字搜尋(非語義 embedding;語義是 kbdb-base Phase 1 的 embed 模組,尚未上)。" +
"→ 用具體詞('email'、'cron'、'rag')比整句自然語言命中率高。也會比對 slug/tag。",
{
query: z.string().min(3).describe("use case 描述,例如 '每天早上發 email 摘要' / 'RAG 從文件回答問題'"),
query: z.string().min(2).describe("use case 關鍵字,例如 'email 摘要' / 'cron 排程' / 'rag'。基本盤是關鍵字非語義,用詞要具體"),
top_k: z.number().int().min(1).max(20).optional().describe("回幾個結果(預設 5"),
},
async ({ query, top_k }) => {
try {
const k = top_k ?? 5;
// KBDB /search 是 unified semantic search(既有),filter type=workflow-example
const resp = await kbdbFetch(env, `/search`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
query,
topK: k * 3, // overfetch 後 filter type
}),
});
const q = query.trim();
if (!resp.ok) {
return errorResponse(
"fetch_failed",
`KBDB search HTTP ${resp.status}`,
["稍後重試", "改用 arcrun_list_examples(tag=...) 過濾"],
await resp.text().catch(() => ""),
);
// 基本盤無語義 search:撈全部 workflow-example,用 query 對 content/slug/tag 做關鍵字比對排序。
// examples 只有 ~10 筆,client 端過濾零負擔;embed 模組上線後可改打語義 search)
const blocks = await kbdbList(env, "workflow-example", 200);
const ql = q.toLowerCase();
const terms = ql.split(/\s+/).filter(Boolean);
const scored = blocks
.map((b) => {
const slug = b.page_name?.replace(/^example-/, "") ?? "";
const tags = parseTags(b.tags_json);
const hay = `${slug} ${tags.join(" ")} ${(b.content ?? "")}`.toLowerCase();
// 每個 term 命中 +1slug/tag 命中額外加權
let score = 0;
for (const t of terms) {
if (hay.includes(t)) score += 1;
if (slug.toLowerCase().includes(t)) score += 2;
if (tags.some((tag) => tag.toLowerCase().includes(t))) score += 2;
}
return { b, slug, tags, score };
})
.filter((r) => r.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, k);
const data = await resp.json<{ results?: Array<{ block?: KbdbBlock; score?: number }> }>();
const all = data.results ?? [];
const examples = all
.filter((r) => r.block?.type === "workflow-example")
.slice(0, k)
.map((r) => {
const b = r.block!;
return {
slug: b.page_name?.replace(/^example-/, "") ?? "",
page_name: b.page_name,
const examples = scored.map((r) => ({
slug: r.slug,
page_name: r.b.page_name,
score: r.score,
tags: parseTags(b.tags_json),
preview: (b.content ?? "").slice(0, 200),
};
});
tags: r.tags,
preview: (r.b.content ?? "").slice(0, 200),
}));
if (examples.length === 0) {
return successResponse(
{ count: 0, examples: [], query },
{ count: 0, examples: [], query: q },
[
"沒命中。可能 KBDB /search 還在等 embedding 建好(剛 sync 完要 1-5 分鐘",
"關鍵字沒命中(基本盤是 LIKE 非語義,換更具體/不同的詞再試",
"改用 arcrun_list_examples(tag='...') 走 tag 過濾",
"或 arcrun_list_examples() 看全部清單自己挑",
],
@@ -289,10 +300,11 @@ export function registerSearchExamples(server: McpServer, env: Env) {
}
return successResponse(
{ count: examples.length, examples, query },
{ count: examples.length, examples, query: q, search_mode: "keyword" },
[
"call arcrun_get_example(slug) 拿完整 YAML",
"score 高 = 跟你 query 更相關",
"score 高 = 關鍵字命中越多(slug/tag 命中加權)",
"search_mode:keyword — 基本盤無語義,命中靠字面;換具體詞可改善",
],
);
} catch (e) {
+91 -88
View File
@@ -4,27 +4,31 @@
對應 LI SDD M3.4。examples / skills 在 git 是 source of truth
KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。
對 KBDB block
- examples → type=workflow-example
2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、
無 kbdb-upsert-block 零件 worker),原打 https://kbdb-upsert-block.arcrun.dev/ 全失效。
改打基本盤 KBDB Worker 的 /entries
- examples → entry_type=workflow-example
content = workflow.yaml 全文
metadata_json = { description, tags }
tags_json = [...tags.json]
page_name = example-{slug} (idempotency key,重複 sync 走 upsert)
- skills → type=agent-skill
metadata_json = { slug, description_md, tags }
tags_json = ["workflow-example", "example:{slug}", *tags]
page_name = example-{slug} (idempotency key)
- skills → entry_type=agent-skill
content = {slug}.md 全文
page_name = skill-{slug} (idempotency key)
metadata_json = { slug, title }
tags_json = ["agent-skill", "skill:{slug}"]
page_name = skill-{slug} (idempotency key)
基本盤無 upsert 端點 → 本腳本自己做 idempotencyGET ?page_name= 找到則 PATCH /entries/:id
否則 POST /entries)。這是 ops 同步腳本(非 CLI/MCP 薄殼),自行編排不違反 rule 07 薄殼原則。
執行:
cd matrix/arcrun
python3 scripts/sync-registry-to-kbdb.py # 上傳所有
KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev python3 scripts/sync-registry-to-kbdb.py
python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫
需求
- mira tools/_kbdb_client.py 風格 (urllib + ak_)
- ARCRUN_API_KEY 從 .env 或 env var
- 走 kbdb-*.arcrun.dev 零件 worker endpoints (符合 mira CLAUDE.md §1.7)
設定
- KBDB_BASE_URL KBDB 基本盤 Worker 的 base URL(必填,無預設——避免誤打到別的環境)
- KBDB_OWNER_ID 資料歸屬標記(選填,預設 'registry';基本盤多租戶用 owner_id
"""
import argparse
@@ -39,42 +43,30 @@ ARCRUN_ROOT = Path(__file__).resolve().parent.parent
EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples"
SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills"
KBDB_UPSERT_URL = "https://kbdb-upsert-block.arcrun.dev/"
USER_AGENT = "arcrun-registry-sync/1.0"
USER_ID = "inkstone_platform_registry" # 需符合 KBDB partner namespace prefixinkstone_*
USER_AGENT = "arcrun-registry-sync/2.0"
OWNER_ID = os.environ.get("KBDB_OWNER_ID", "registry")
SOURCE = "registry-git-sync"
def get_api_key() -> str:
"""從 env var 或 polaris/mira/.env 取 ARCRUN_API_KEY"""
key = os.environ.get("ARCRUN_API_KEY", "")
if key:
return key
# fallback:找 polaris/mira/.envleo 既有約定位置)
mira_env = ARCRUN_ROOT.parent.parent / "polaris" / "mira" / ".env"
if mira_env.exists():
for line in mira_env.read_text(encoding="utf-8").splitlines():
line = line.strip()
if line.startswith("ARCRUN_API_KEY="):
return line.split("=", 1)[1].strip()
def get_base_url() -> str:
"""KBDB 基本盤 Worker base URL。無預設(避免誤打環境)"""
url = os.environ.get("KBDB_BASE_URL", "").rstrip("/")
if url:
return url
raise SystemExit(
"ARCRUN_API_KEY 未設定。export ARCRUN_API_KEY=ak_... 或加到 polaris/mira/.env"
"KBDB_BASE_URL 未設定。\n"
" export KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev\n"
" self-hosted 用自己部署的 KBDB Worker URL"
)
def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict:
"""POST kbdb-upsert-block.arcrun.dev — page_name 當 idempotency key"""
if dry_run:
return {"dry_run": True, "would_upsert": payload.get("page_name")}
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
def _req(method: str, url: str, payload: dict | None = None) -> dict:
data = json.dumps(payload, ensure_ascii=False).encode("utf-8") if payload is not None else None
req = urllib.request.Request(
KBDB_UPSERT_URL,
url,
data=data,
headers={
"Content-Type": "application/json",
"User-Agent": USER_AGENT,
},
method="POST",
headers={"Content-Type": "application/json", "User-Agent": USER_AGENT},
method=method,
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
@@ -82,10 +74,43 @@ def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict:
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
return {"error": f"HTTP {e.code}: {body[:200]}"}
except urllib.error.URLError as e:
return {"error": f"URL error: {e}"}
def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/examples/{slug}/ 進 KBDB"""
def find_entry_id_by_page_name(base_url: str, page_name: str) -> str | None:
"""GET /entries?page_name= → 回既有 entry ididempotency 用),無則 None。"""
from urllib.parse import quote
res = _req("GET", f"{base_url}/entries?page_name={quote(page_name)}&limit=1")
if "error" in res:
return None
entries = res.get("entries") or []
return entries[0].get("id") if entries else None
def upsert_entry(base_url: str, payload: dict, dry_run: bool) -> dict:
"""page_name 當 idempotency key:找到則 PATCH /entries/:id,否則 POST /entries。"""
page_name = payload.get("page_name")
if dry_run:
existing = None if base_url == "DRY" else find_entry_id_by_page_name(base_url, page_name)
return {"dry_run": True, "would": "patch" if existing else "post", "page_name": page_name}
existing_id = find_entry_id_by_page_name(base_url, page_name)
if existing_id:
# PATCH 只送可變欄位(entry_type/page_name 不變)
patch = {k: payload[k] for k in ("content", "tags_json", "metadata_json") if k in payload}
res = _req("PATCH", f"{base_url}/entries/{existing_id}", patch)
if "error" not in res:
res.setdefault("action", "update")
return res
res = _req("POST", f"{base_url}/entries", payload)
if "error" not in res:
res.setdefault("action", "create")
return res
def sync_examples(base_url: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/examples/{slug}/ 進 KBDBentry_type=workflow-example"""
if not EXAMPLES_DIR.exists():
print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步")
return 0, 0
@@ -104,50 +129,36 @@ def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]:
continue
yaml_content = workflow_yaml.read_text(encoding="utf-8")
description = (
description_md.read_text(encoding="utf-8") if description_md.exists() else ""
)
tags = (
json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
)
description = description_md.read_text(encoding="utf-8") if description_md.exists() else ""
tags = json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
# content = workflow YAML(讓 AI semantic search 命中 YAML 內容)
# metadata_json = description + tags 結構化
payload = {
"api_key": api_key,
"type": "workflow-example",
"entry_type": "workflow-example",
"page_name": f"example-{slug}",
"source": SOURCE,
"user_id": USER_ID,
"owner_id": OWNER_ID,
"content": yaml_content,
"metadata_json": json.dumps(
{
"slug": slug,
"description_md": description,
"tags": tags,
},
{"slug": slug, "description_md": description, "tags": tags, "source": SOURCE},
ensure_ascii=False,
),
"tags_json": json.dumps(
["workflow-example", f"example:{slug}", *tags],
ensure_ascii=False,
["workflow-example", f"example:{slug}", *tags], ensure_ascii=False
),
}
result = kbdb_upsert(api_key, payload, dry_run)
result = upsert_entry(base_url, payload, dry_run)
if "error" in result:
print(f"{slug}: {result['error']}")
fail += 1
else:
action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
print(f"{slug}{action}")
print(f"{slug}{result.get('action', 'dry-run:' + result.get('would', '?'))}")
ok += 1
return ok, fail
def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/skills/*.md 進 KBDB"""
def sync_skills(base_url: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/skills/*.md 進 KBDBentry_type=agent-skill"""
if not SKILLS_DIR.exists():
print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步")
return 0, 0
@@ -159,7 +170,6 @@ def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
slug = md_file.stem
content = md_file.read_text(encoding="utf-8")
# 簡單抓首行 # X 當 title
title = slug
for line in content.splitlines():
line = line.strip()
@@ -168,44 +178,37 @@ def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
break
payload = {
"api_key": api_key,
"type": "agent-skill",
"entry_type": "agent-skill",
"page_name": f"skill-{slug}",
"source": SOURCE,
"user_id": USER_ID,
"owner_id": OWNER_ID,
"content": content,
"metadata_json": json.dumps(
{"slug": slug, "title": title},
ensure_ascii=False,
),
"tags_json": json.dumps(
["agent-skill", f"skill:{slug}"],
ensure_ascii=False,
{"slug": slug, "title": title, "source": SOURCE}, ensure_ascii=False
),
"tags_json": json.dumps(["agent-skill", f"skill:{slug}"], ensure_ascii=False),
}
result = kbdb_upsert(api_key, payload, dry_run)
result = upsert_entry(base_url, payload, dry_run)
if "error" in result:
print(f"{slug}: {result['error']}")
fail += 1
else:
action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
print(f"{slug}{action}")
print(f"{slug}{result.get('action', 'dry-run:' + result.get('would', '?'))}")
ok += 1
return ok, fail
def main():
p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB")
p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB base (/entries)")
p.add_argument("--dry-run", action="store_true", help="只 list 不寫")
p.add_argument("--examples-only", action="store_true")
p.add_argument("--skills-only", action="store_true")
args = p.parse_args()
api_key = get_api_key()
print(f"🔑 api_key: {api_key[:12]}... (len={len(api_key)})")
print(f"📂 root: {ARCRUN_ROOT}")
base_url = "DRY" if args.dry_run and not os.environ.get("KBDB_BASE_URL") else get_base_url()
print(f"🌐 KBDB base: {base_url}")
print(f"📂 root: {ARCRUN_ROOT} (owner_id={OWNER_ID})")
if args.dry_run:
print("(dry-run,不實際寫 KBDB)")
print()
@@ -214,13 +217,13 @@ def main():
skills_ok = skills_fail = 0
if not args.skills_only:
print("📋 Syncing examples → type=workflow-example ...")
examples_ok, examples_fail = sync_examples(api_key, args.dry_run)
print("📋 Syncing examples → entry_type=workflow-example ...")
examples_ok, examples_fail = sync_examples(base_url, args.dry_run)
print(f" examples: {examples_ok} ok / {examples_fail} fail\n")
if not args.examples_only:
print("📋 Syncing skills → type=agent-skill ...")
skills_ok, skills_fail = sync_skills(api_key, args.dry_run)
print("📋 Syncing skills → entry_type=agent-skill ...")
skills_ok, skills_fail = sync_skills(base_url, args.dry_run)
print(f" skills: {skills_ok} ok / {skills_fail} fail\n")
total_fail = examples_fail + skills_fail