#!/usr/bin/env python3 """sync-registry-to-kbdb.py — 把 registry/examples + registry/skills 同步進 KBDB 對應 LI SDD M3.4。examples / skills 在 git 是 source of truth, KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。 2026-06-14 重寫:KBDB 降基本盤後(三表 entries/templates/records,無 v3 blocks 表、 無 kbdb-upsert-block 零件 worker),原打 https://kbdb-upsert-block.arcrun.dev/ 全失效。 改打基本盤 KBDB Worker 的 /entries: - examples → entry_type=workflow-example content = workflow.yaml 全文 metadata_json = { slug, description_md, tags } tags_json = ["workflow-example", "example:{slug}", *tags] page_name = example-{slug} (idempotency key) - skills → entry_type=agent-skill content = {slug}.md 全文 metadata_json = { slug, title } tags_json = ["agent-skill", "skill:{slug}"] page_name = skill-{slug} (idempotency key) 基本盤無 upsert 端點 → 本腳本自己做 idempotency(GET ?page_name= 找到則 PATCH /entries/:id, 否則 POST /entries)。這是 ops 同步腳本(非 CLI/MCP 薄殼),自行編排不違反 rule 07 薄殼原則。 執行: cd matrix/arcrun KBDB_BASE_URL=https://arcrun-kbdb..workers.dev python3 scripts/sync-registry-to-kbdb.py python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫 設定: - KBDB_BASE_URL KBDB 基本盤 Worker 的 base URL(必填,無預設——避免誤打到別的環境) - KBDB_OWNER_ID 資料歸屬標記(選填,預設 'registry';基本盤多租戶用 owner_id) """ import argparse import json import os import sys import urllib.request import urllib.error from pathlib import Path ARCRUN_ROOT = Path(__file__).resolve().parent.parent EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples" SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills" USER_AGENT = "arcrun-registry-sync/2.0" OWNER_ID = os.environ.get("KBDB_OWNER_ID", "registry") SOURCE = "registry-git-sync" def get_base_url() -> str: """KBDB 基本盤 Worker base URL。無預設(避免誤打環境)。""" url = os.environ.get("KBDB_BASE_URL", "").rstrip("/") if url: return url raise SystemExit( "KBDB_BASE_URL 未設定。\n" " export KBDB_BASE_URL=https://arcrun-kbdb..workers.dev\n" " (self-hosted 用自己部署的 KBDB Worker URL)" ) def _req(method: str, url: str, payload: dict | None = None) -> dict: data = json.dumps(payload, ensure_ascii=False).encode("utf-8") if payload is not None else None req = urllib.request.Request( url, data=data, headers={"Content-Type": "application/json", "User-Agent": USER_AGENT}, method=method, ) try: with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read().decode("utf-8")) except urllib.error.HTTPError as e: body = e.read().decode("utf-8", errors="replace") return {"error": f"HTTP {e.code}: {body[:200]}"} except urllib.error.URLError as e: return {"error": f"URL error: {e}"} def find_entry_id_by_page_name(base_url: str, page_name: str) -> str | None: """GET /entries?page_name= → 回既有 entry id(idempotency 用),無則 None。""" from urllib.parse import quote res = _req("GET", f"{base_url}/entries?page_name={quote(page_name)}&limit=1") if "error" in res: return None entries = res.get("entries") or [] return entries[0].get("id") if entries else None def upsert_entry(base_url: str, payload: dict, dry_run: bool) -> dict: """page_name 當 idempotency key:找到則 PATCH /entries/:id,否則 POST /entries。""" page_name = payload.get("page_name") if dry_run: existing = None if base_url == "DRY" else find_entry_id_by_page_name(base_url, page_name) return {"dry_run": True, "would": "patch" if existing else "post", "page_name": page_name} existing_id = find_entry_id_by_page_name(base_url, page_name) if existing_id: # PATCH 只送可變欄位(entry_type/page_name 不變) patch = {k: payload[k] for k in ("content", "tags_json", "metadata_json") if k in payload} res = _req("PATCH", f"{base_url}/entries/{existing_id}", patch) if "error" not in res: res.setdefault("action", "update") return res res = _req("POST", f"{base_url}/entries", payload) if "error" not in res: res.setdefault("action", "create") return res def sync_examples(base_url: str, dry_run: bool) -> tuple[int, int]: """同步 registry/examples/{slug}/ 進 KBDB(entry_type=workflow-example)""" if not EXAMPLES_DIR.exists(): print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步") return 0, 0 ok, fail = 0, 0 for slug_dir in sorted(EXAMPLES_DIR.iterdir()): if not slug_dir.is_dir(): continue slug = slug_dir.name workflow_yaml = slug_dir / "workflow.yaml" description_md = slug_dir / "description.md" tags_json = slug_dir / "tags.json" if not workflow_yaml.exists(): print(f" ⚠️ {slug}: 缺 workflow.yaml,跳過") continue yaml_content = workflow_yaml.read_text(encoding="utf-8") description = description_md.read_text(encoding="utf-8") if description_md.exists() else "" tags = json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else [] payload = { "entry_type": "workflow-example", "page_name": f"example-{slug}", "owner_id": OWNER_ID, "content": yaml_content, "metadata_json": json.dumps( {"slug": slug, "description_md": description, "tags": tags, "source": SOURCE}, ensure_ascii=False, ), "tags_json": json.dumps( ["workflow-example", f"example:{slug}", *tags], ensure_ascii=False ), } result = upsert_entry(base_url, payload, dry_run) if "error" in result: print(f" ❌ {slug}: {result['error']}") fail += 1 else: print(f" ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}") ok += 1 return ok, fail def sync_skills(base_url: str, dry_run: bool) -> tuple[int, int]: """同步 registry/skills/*.md 進 KBDB(entry_type=agent-skill)""" if not SKILLS_DIR.exists(): print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步") return 0, 0 ok, fail = 0, 0 for md_file in sorted(SKILLS_DIR.glob("*.md")): if md_file.name == "README.md": continue slug = md_file.stem content = md_file.read_text(encoding="utf-8") title = slug for line in content.splitlines(): line = line.strip() if line.startswith("# "): title = line[2:].strip() break payload = { "entry_type": "agent-skill", "page_name": f"skill-{slug}", "owner_id": OWNER_ID, "content": content, "metadata_json": json.dumps( {"slug": slug, "title": title, "source": SOURCE}, ensure_ascii=False ), "tags_json": json.dumps(["agent-skill", f"skill:{slug}"], ensure_ascii=False), } result = upsert_entry(base_url, payload, dry_run) if "error" in result: print(f" ❌ {slug}: {result['error']}") fail += 1 else: print(f" ✅ {slug} → {result.get('action', 'dry-run:' + result.get('would', '?'))}") ok += 1 return ok, fail def main(): p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB base (/entries)") p.add_argument("--dry-run", action="store_true", help="只 list 不寫") p.add_argument("--examples-only", action="store_true") p.add_argument("--skills-only", action="store_true") args = p.parse_args() base_url = "DRY" if args.dry_run and not os.environ.get("KBDB_BASE_URL") else get_base_url() print(f"🌐 KBDB base: {base_url}") print(f"📂 root: {ARCRUN_ROOT} (owner_id={OWNER_ID})") if args.dry_run: print("(dry-run,不實際寫 KBDB)") print() examples_ok = examples_fail = 0 skills_ok = skills_fail = 0 if not args.skills_only: print("📋 Syncing examples → entry_type=workflow-example ...") examples_ok, examples_fail = sync_examples(base_url, args.dry_run) print(f" examples: {examples_ok} ok / {examples_fail} fail\n") if not args.examples_only: print("📋 Syncing skills → entry_type=agent-skill ...") skills_ok, skills_fail = sync_skills(base_url, args.dry_run) print(f" skills: {skills_ok} ok / {skills_fail} fail\n") total_fail = examples_fail + skills_fail if total_fail > 0: print(f"⚠️ 共 {total_fail} 個項目失敗") sys.exit(1) print(f"✅ Done. examples={examples_ok}, skills={skills_ok}") if __name__ == "__main__": main()