Files
Arcrun/scripts/sync-registry-to-kbdb.py
uncle6me-web b9bf3ec3d5 fix(mcp,kbdb): LI M3 skills/examples 改打基本盤 /entries(修死 route 假綠)
skills/examples 整條從舊 v3 /blocks /search 改打 KBDB 基本盤 /entries
(entry_type 對應)。5 個已上線 MCP 工具原本對死 route 回 404(假綠),
現修正;sync-registry-to-kbdb.py 改打 /entries idempotent upsert。
誠實降級:基本盤無語義 search → LIKE 關鍵字(embed 模組上線再換回語義)。
順手 gitignore scripts/__pycache__/。

對應 kbdb-base tasks 9.4 / llm-interface M3.2/M3.4。mcp + kbdb tsc exit 0。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 22:12:11 +08:00

238 lines
9.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""sync-registry-to-kbdb.py — 把 registry/examples + registry/skills 同步進 KBDB
對應 LI SDD M3.4。examples / skills 在 git 是 source of truth
KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。
2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、
無 kbdb-upsert-block 零件 worker),原打 https://kbdb-upsert-block.arcrun.dev/ 全失效。
改打基本盤 KBDB Worker 的 /entries
- examples → entry_type=workflow-example
content = workflow.yaml 全文
metadata_json = { slug, description_md, tags }
tags_json = ["workflow-example", "example:{slug}", *tags]
page_name = example-{slug} (idempotency key)
- skills → entry_type=agent-skill
content = {slug}.md 全文
metadata_json = { slug, title }
tags_json = ["agent-skill", "skill:{slug}"]
page_name = skill-{slug} (idempotency key)
基本盤無 upsert 端點 → 本腳本自己做 idempotencyGET ?page_name= 找到則 PATCH /entries/:id
否則 POST /entries)。這是 ops 同步腳本(非 CLI/MCP 薄殼),自行編排不違反 rule 07 薄殼原則。
執行:
cd matrix/arcrun
KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev python3 scripts/sync-registry-to-kbdb.py
python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫
設定:
- KBDB_BASE_URL KBDB 基本盤 Worker 的 base URL(必填,無預設——避免誤打到別的環境)
- KBDB_OWNER_ID 資料歸屬標記(選填,預設 'registry';基本盤多租戶用 owner_id
"""
import argparse
import json
import os
import sys
import urllib.request
import urllib.error
from pathlib import Path
ARCRUN_ROOT = Path(__file__).resolve().parent.parent
EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples"
SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills"
USER_AGENT = "arcrun-registry-sync/2.0"
OWNER_ID = os.environ.get("KBDB_OWNER_ID", "registry")
SOURCE = "registry-git-sync"
def get_base_url() -> str:
"""KBDB 基本盤 Worker base URL。無預設(避免誤打環境)。"""
url = os.environ.get("KBDB_BASE_URL", "").rstrip("/")
if url:
return url
raise SystemExit(
"KBDB_BASE_URL 未設定。\n"
" export KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev\n"
" self-hosted 用自己部署的 KBDB Worker URL"
)
def _req(method: str, url: str, payload: dict | None = None) -> dict:
data = json.dumps(payload, ensure_ascii=False).encode("utf-8") if payload is not None else None
req = urllib.request.Request(
url,
data=data,
headers={"Content-Type": "application/json", "User-Agent": USER_AGENT},
method=method,
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
return {"error": f"HTTP {e.code}: {body[:200]}"}
except urllib.error.URLError as e:
return {"error": f"URL error: {e}"}
def find_entry_id_by_page_name(base_url: str, page_name: str) -> str | None:
"""GET /entries?page_name= → 回既有 entry ididempotency 用),無則 None。"""
from urllib.parse import quote
res = _req("GET", f"{base_url}/entries?page_name={quote(page_name)}&limit=1")
if "error" in res:
return None
entries = res.get("entries") or []
return entries[0].get("id") if entries else None
def upsert_entry(base_url: str, payload: dict, dry_run: bool) -> dict:
"""page_name 當 idempotency key:找到則 PATCH /entries/:id,否則 POST /entries。"""
page_name = payload.get("page_name")
if dry_run:
existing = None if base_url == "DRY" else find_entry_id_by_page_name(base_url, page_name)
return {"dry_run": True, "would": "patch" if existing else "post", "page_name": page_name}
existing_id = find_entry_id_by_page_name(base_url, page_name)
if existing_id:
# PATCH 只送可變欄位(entry_type/page_name 不變)
patch = {k: payload[k] for k in ("content", "tags_json", "metadata_json") if k in payload}
res = _req("PATCH", f"{base_url}/entries/{existing_id}", patch)
if "error" not in res:
res.setdefault("action", "update")
return res
res = _req("POST", f"{base_url}/entries", payload)
if "error" not in res:
res.setdefault("action", "create")
return res
def sync_examples(base_url: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/examples/{slug}/ 進 KBDBentry_type=workflow-example"""
if not EXAMPLES_DIR.exists():
print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步")
return 0, 0
ok, fail = 0, 0
for slug_dir in sorted(EXAMPLES_DIR.iterdir()):
if not slug_dir.is_dir():
continue
slug = slug_dir.name
workflow_yaml = slug_dir / "workflow.yaml"
description_md = slug_dir / "description.md"
tags_json = slug_dir / "tags.json"
if not workflow_yaml.exists():
print(f" ⚠️ {slug}: 缺 workflow.yaml,跳過")
continue
yaml_content = workflow_yaml.read_text(encoding="utf-8")
description = description_md.read_text(encoding="utf-8") if description_md.exists() else ""
tags = json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
payload = {
"entry_type": "workflow-example",
"page_name": f"example-{slug}",
"owner_id": OWNER_ID,
"content": yaml_content,
"metadata_json": json.dumps(
{"slug": slug, "description_md": description, "tags": tags, "source": SOURCE},
ensure_ascii=False,
),
"tags_json": json.dumps(
["workflow-example", f"example:{slug}", *tags], ensure_ascii=False
),
}
result = upsert_entry(base_url, payload, dry_run)
if "error" in result:
print(f" ❌ {slug}: {result['error']}")
fail += 1
else:
print(f" ✅ {slug}{result.get('action', 'dry-run:' + result.get('would', '?'))}")
ok += 1
return ok, fail
def sync_skills(base_url: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/skills/*.md 進 KBDBentry_type=agent-skill"""
if not SKILLS_DIR.exists():
print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步")
return 0, 0
ok, fail = 0, 0
for md_file in sorted(SKILLS_DIR.glob("*.md")):
if md_file.name == "README.md":
continue
slug = md_file.stem
content = md_file.read_text(encoding="utf-8")
title = slug
for line in content.splitlines():
line = line.strip()
if line.startswith("# "):
title = line[2:].strip()
break
payload = {
"entry_type": "agent-skill",
"page_name": f"skill-{slug}",
"owner_id": OWNER_ID,
"content": content,
"metadata_json": json.dumps(
{"slug": slug, "title": title, "source": SOURCE}, ensure_ascii=False
),
"tags_json": json.dumps(["agent-skill", f"skill:{slug}"], ensure_ascii=False),
}
result = upsert_entry(base_url, payload, dry_run)
if "error" in result:
print(f" ❌ {slug}: {result['error']}")
fail += 1
else:
print(f" ✅ {slug}{result.get('action', 'dry-run:' + result.get('would', '?'))}")
ok += 1
return ok, fail
def main():
p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB base (/entries)")
p.add_argument("--dry-run", action="store_true", help="只 list 不寫")
p.add_argument("--examples-only", action="store_true")
p.add_argument("--skills-only", action="store_true")
args = p.parse_args()
base_url = "DRY" if args.dry_run and not os.environ.get("KBDB_BASE_URL") else get_base_url()
print(f"🌐 KBDB base: {base_url}")
print(f"📂 root: {ARCRUN_ROOT} (owner_id={OWNER_ID})")
if args.dry_run:
print("(dry-run,不實際寫 KBDB)")
print()
examples_ok = examples_fail = 0
skills_ok = skills_fail = 0
if not args.skills_only:
print("📋 Syncing examples → entry_type=workflow-example ...")
examples_ok, examples_fail = sync_examples(base_url, args.dry_run)
print(f" examples: {examples_ok} ok / {examples_fail} fail\n")
if not args.examples_only:
print("📋 Syncing skills → entry_type=agent-skill ...")
skills_ok, skills_fail = sync_skills(base_url, args.dry_run)
print(f" skills: {skills_ok} ok / {skills_fail} fail\n")
total_fail = examples_fail + skills_fail
if total_fail > 0:
print(f"⚠️ 共 {total_fail} 個項目失敗")
sys.exit(1)
print(f"✅ Done. examples={examples_ok}, skills={skills_ok}")
if __name__ == "__main__":
main()