b9bf3ec3d5
skills/examples 整條從舊 v3 /blocks /search 改打 KBDB 基本盤 /entries (entry_type 對應)。5 個已上線 MCP 工具原本對死 route 回 404(假綠), 現修正;sync-registry-to-kbdb.py 改打 /entries idempotent upsert。 誠實降級:基本盤無語義 search → LIKE 關鍵字(embed 模組上線再換回語義)。 順手 gitignore scripts/__pycache__/。 對應 kbdb-base tasks 9.4 / llm-interface M3.2/M3.4。mcp + kbdb tsc exit 0。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
238 lines
9.0 KiB
Python
238 lines
9.0 KiB
Python
#!/usr/bin/env python3
|
||
"""sync-registry-to-kbdb.py — 把 registry/examples + registry/skills 同步進 KBDB
|
||
|
||
對應 LI SDD M3.4。examples / skills 在 git 是 source of truth,
|
||
KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。
|
||
|
||
2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、
|
||
無 kbdb-upsert-block 零件 worker),原打 https://kbdb-upsert-block.arcrun.dev/ 全失效。
|
||
改打基本盤 KBDB Worker 的 /entries:
|
||
- examples → entry_type=workflow-example
|
||
content = workflow.yaml 全文
|
||
metadata_json = { slug, description_md, tags }
|
||
tags_json = ["workflow-example", "example:{slug}", *tags]
|
||
page_name = example-{slug} (idempotency key)
|
||
- skills → entry_type=agent-skill
|
||
content = {slug}.md 全文
|
||
metadata_json = { slug, title }
|
||
tags_json = ["agent-skill", "skill:{slug}"]
|
||
page_name = skill-{slug} (idempotency key)
|
||
|
||
基本盤無 upsert 端點 → 本腳本自己做 idempotency(GET ?page_name= 找到則 PATCH /entries/:id,
|
||
否則 POST /entries)。這是 ops 同步腳本(非 CLI/MCP 薄殼),自行編排不違反 rule 07 薄殼原則。
|
||
|
||
執行:
|
||
cd matrix/arcrun
|
||
KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev python3 scripts/sync-registry-to-kbdb.py
|
||
python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫
|
||
|
||
設定:
|
||
- KBDB_BASE_URL KBDB 基本盤 Worker 的 base URL(必填,無預設——避免誤打到別的環境)
|
||
- KBDB_OWNER_ID 資料歸屬標記(選填,預設 'registry';基本盤多租戶用 owner_id)
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import sys
|
||
import urllib.request
|
||
import urllib.error
|
||
from pathlib import Path
|
||
|
||
ARCRUN_ROOT = Path(__file__).resolve().parent.parent
|
||
EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples"
|
||
SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills"
|
||
|
||
USER_AGENT = "arcrun-registry-sync/2.0"
|
||
OWNER_ID = os.environ.get("KBDB_OWNER_ID", "registry")
|
||
SOURCE = "registry-git-sync"
|
||
|
||
|
||
def get_base_url() -> str:
|
||
"""KBDB 基本盤 Worker base URL。無預設(避免誤打環境)。"""
|
||
url = os.environ.get("KBDB_BASE_URL", "").rstrip("/")
|
||
if url:
|
||
return url
|
||
raise SystemExit(
|
||
"KBDB_BASE_URL 未設定。\n"
|
||
" export KBDB_BASE_URL=https://arcrun-kbdb.<subdomain>.workers.dev\n"
|
||
" (self-hosted 用自己部署的 KBDB Worker URL)"
|
||
)
|
||
|
||
|
||
def _req(method: str, url: str, payload: dict | None = None) -> dict:
|
||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8") if payload is not None else None
|
||
req = urllib.request.Request(
|
||
url,
|
||
data=data,
|
||
headers={"Content-Type": "application/json", "User-Agent": USER_AGENT},
|
||
method=method,
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||
return json.loads(resp.read().decode("utf-8"))
|
||
except urllib.error.HTTPError as e:
|
||
body = e.read().decode("utf-8", errors="replace")
|
||
return {"error": f"HTTP {e.code}: {body[:200]}"}
|
||
except urllib.error.URLError as e:
|
||
return {"error": f"URL error: {e}"}
|
||
|
||
|
||
def find_entry_id_by_page_name(base_url: str, page_name: str) -> str | None:
|
||
"""GET /entries?page_name= → 回既有 entry id(idempotency 用),無則 None。"""
|
||
from urllib.parse import quote
|
||
res = _req("GET", f"{base_url}/entries?page_name={quote(page_name)}&limit=1")
|
||
if "error" in res:
|
||
return None
|
||
entries = res.get("entries") or []
|
||
return entries[0].get("id") if entries else None
|
||
|
||
|
||
def upsert_entry(base_url: str, payload: dict, dry_run: bool) -> dict:
|
||
"""page_name 當 idempotency key:找到則 PATCH /entries/:id,否則 POST /entries。"""
|
||
page_name = payload.get("page_name")
|
||
if dry_run:
|
||
existing = None if base_url == "DRY" else find_entry_id_by_page_name(base_url, page_name)
|
||
return {"dry_run": True, "would": "patch" if existing else "post", "page_name": page_name}
|
||
|
||
existing_id = find_entry_id_by_page_name(base_url, page_name)
|
||
if existing_id:
|
||
# PATCH 只送可變欄位(entry_type/page_name 不變)
|
||
patch = {k: payload[k] for k in ("content", "tags_json", "metadata_json") if k in payload}
|
||
res = _req("PATCH", f"{base_url}/entries/{existing_id}", patch)
|
||
if "error" not in res:
|
||
res.setdefault("action", "update")
|
||
return res
|
||
res = _req("POST", f"{base_url}/entries", payload)
|
||
if "error" not in res:
|
||
res.setdefault("action", "create")
|
||
return res
|
||
|
||
|
||
def sync_examples(base_url: str, dry_run: bool) -> tuple[int, int]:
|
||
"""同步 registry/examples/{slug}/ 進 KBDB(entry_type=workflow-example)"""
|
||
if not EXAMPLES_DIR.exists():
|
||
print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步")
|
||
return 0, 0
|
||
|
||
ok, fail = 0, 0
|
||
for slug_dir in sorted(EXAMPLES_DIR.iterdir()):
|
||
if not slug_dir.is_dir():
|
||
continue
|
||
slug = slug_dir.name
|
||
workflow_yaml = slug_dir / "workflow.yaml"
|
||
description_md = slug_dir / "description.md"
|
||
tags_json = slug_dir / "tags.json"
|
||
|
||
if not workflow_yaml.exists():
|
||
print(f" ⚠️ {slug}: 缺 workflow.yaml,跳過")
|
||
continue
|
||
|
||
yaml_content = workflow_yaml.read_text(encoding="utf-8")
|
||
description = description_md.read_text(encoding="utf-8") if description_md.exists() else ""
|
||
tags = json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
|
||
|
||
payload = {
|
||
"entry_type": "workflow-example",
|
||
"page_name": f"example-{slug}",
|
||
"owner_id": OWNER_ID,
|
||
"content": yaml_content,
|
||
"metadata_json": json.dumps(
|
||
{"slug": slug, "description_md": description, "tags": tags, "source": SOURCE},
|
||
ensure_ascii=False,
|
||
),
|
||
"tags_json": json.dumps(
|
||
["workflow-example", f"example:{slug}", *tags], ensure_ascii=False
|
||
),
|
||
}
|
||
|
||
result = upsert_entry(base_url, payload, dry_run)
|
||
if "error" in result:
|
||
print(f" ❌ {slug}: {result['error']}")
|
||
fail += 1
|
||
else:
|
||
print(f" ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}")
|
||
ok += 1
|
||
|
||
return ok, fail
|
||
|
||
|
||
def sync_skills(base_url: str, dry_run: bool) -> tuple[int, int]:
|
||
"""同步 registry/skills/*.md 進 KBDB(entry_type=agent-skill)"""
|
||
if not SKILLS_DIR.exists():
|
||
print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步")
|
||
return 0, 0
|
||
|
||
ok, fail = 0, 0
|
||
for md_file in sorted(SKILLS_DIR.glob("*.md")):
|
||
if md_file.name == "README.md":
|
||
continue
|
||
slug = md_file.stem
|
||
content = md_file.read_text(encoding="utf-8")
|
||
|
||
title = slug
|
||
for line in content.splitlines():
|
||
line = line.strip()
|
||
if line.startswith("# "):
|
||
title = line[2:].strip()
|
||
break
|
||
|
||
payload = {
|
||
"entry_type": "agent-skill",
|
||
"page_name": f"skill-{slug}",
|
||
"owner_id": OWNER_ID,
|
||
"content": content,
|
||
"metadata_json": json.dumps(
|
||
{"slug": slug, "title": title, "source": SOURCE}, ensure_ascii=False
|
||
),
|
||
"tags_json": json.dumps(["agent-skill", f"skill:{slug}"], ensure_ascii=False),
|
||
}
|
||
|
||
result = upsert_entry(base_url, payload, dry_run)
|
||
if "error" in result:
|
||
print(f" ❌ {slug}: {result['error']}")
|
||
fail += 1
|
||
else:
|
||
print(f" ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}")
|
||
ok += 1
|
||
|
||
return ok, fail
|
||
|
||
|
||
def main():
|
||
p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB base (/entries)")
|
||
p.add_argument("--dry-run", action="store_true", help="只 list 不寫")
|
||
p.add_argument("--examples-only", action="store_true")
|
||
p.add_argument("--skills-only", action="store_true")
|
||
args = p.parse_args()
|
||
|
||
base_url = "DRY" if args.dry_run and not os.environ.get("KBDB_BASE_URL") else get_base_url()
|
||
print(f"🌐 KBDB base: {base_url}")
|
||
print(f"📂 root: {ARCRUN_ROOT} (owner_id={OWNER_ID})")
|
||
if args.dry_run:
|
||
print("(dry-run,不實際寫 KBDB)")
|
||
print()
|
||
|
||
examples_ok = examples_fail = 0
|
||
skills_ok = skills_fail = 0
|
||
|
||
if not args.skills_only:
|
||
print("📋 Syncing examples → entry_type=workflow-example ...")
|
||
examples_ok, examples_fail = sync_examples(base_url, args.dry_run)
|
||
print(f" examples: {examples_ok} ok / {examples_fail} fail\n")
|
||
|
||
if not args.examples_only:
|
||
print("📋 Syncing skills → entry_type=agent-skill ...")
|
||
skills_ok, skills_fail = sync_skills(base_url, args.dry_run)
|
||
print(f" skills: {skills_ok} ok / {skills_fail} fail\n")
|
||
|
||
total_fail = examples_fail + skills_fail
|
||
if total_fail > 0:
|
||
print(f"⚠️ 共 {total_fail} 個項目失敗")
|
||
sys.exit(1)
|
||
print(f"✅ Done. examples={examples_ok}, skills={skills_ok}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|