Files
Arcrun/scripts/sync-registry-to-kbdb.py
uncle6me-web 922a57fe34 arcrun — AI workflow execution engine (clean history)
Self-hosted 開源:WASM 零件 + recipe + cypher-executor,跑在你自己的 Cloudflare。

此為重建的乾淨歷史起點(移除曾誤 commit 的 GCP SA 金鑰,舊歷史保留在
richblack/arcrun 與本地 backup 分支)。含:
- acr init --self-hosted installer(建 KV/R2 + codeload 拉預編譯 wasm + wrangler deploy + seed recipe)
- recipe push 把關(資料外流提醒 + 打通檢查)
- 19 個正當零件預編譯 wasm(claude_api/km_writer/kbdb_upsert_block 排除:違反 DECISIONS §1)
- CLI / cypher-executor / registry / 完整 SDD

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 15:52:38 +08:00

235 lines
7.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""sync-registry-to-kbdb.py — 把 registry/examples + registry/skills 同步進 KBDB
對應 LI SDD M3.4。examples / skills 在 git 是 source of truth
KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。
對 KBDB block
- examples → type=workflow-example
content = workflow.yaml 全文
metadata_json = { description, tags }
tags_json = [...tags.json]
page_name = example-{slug} (idempotency key,重複 sync 走 upsert)
- skills → type=agent-skill
content = {slug}.md 全文
page_name = skill-{slug} (idempotency key)
tags_json = ["agent-skill", "skill:{slug}"]
執行:
cd matrix/arcrun
python3 scripts/sync-registry-to-kbdb.py # 上傳所有
python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫
需求:
- mira tools/_kbdb_client.py 風格 (urllib + ak_)
- ARCRUN_API_KEY 從 .env 或 env var
- 走 kbdb-*.arcrun.dev 零件 worker endpoints (符合 mira CLAUDE.md §1.7)
"""
import argparse
import json
import os
import sys
import urllib.request
import urllib.error
from pathlib import Path
ARCRUN_ROOT = Path(__file__).resolve().parent.parent
EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples"
SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills"
KBDB_UPSERT_URL = "https://kbdb-upsert-block.arcrun.dev/"
USER_AGENT = "arcrun-registry-sync/1.0"
USER_ID = "inkstone_platform_registry" # 需符合 KBDB partner namespace prefixinkstone_*
SOURCE = "registry-git-sync"
def get_api_key() -> str:
"""從 env var 或 polaris/mira/.env 取 ARCRUN_API_KEY。"""
key = os.environ.get("ARCRUN_API_KEY", "")
if key:
return key
# fallback:找 polaris/mira/.envleo 既有約定位置)
mira_env = ARCRUN_ROOT.parent.parent / "polaris" / "mira" / ".env"
if mira_env.exists():
for line in mira_env.read_text(encoding="utf-8").splitlines():
line = line.strip()
if line.startswith("ARCRUN_API_KEY="):
return line.split("=", 1)[1].strip()
raise SystemExit(
"ARCRUN_API_KEY 未設定。export ARCRUN_API_KEY=ak_... 或加到 polaris/mira/.env"
)
def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict:
"""POST kbdb-upsert-block.arcrun.dev — page_name 當 idempotency key"""
if dry_run:
return {"dry_run": True, "would_upsert": payload.get("page_name")}
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
req = urllib.request.Request(
KBDB_UPSERT_URL,
data=data,
headers={
"Content-Type": "application/json",
"User-Agent": USER_AGENT,
},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
return {"error": f"HTTP {e.code}: {body[:200]}"}
def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/examples/{slug}/ 進 KBDB"""
if not EXAMPLES_DIR.exists():
print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步")
return 0, 0
ok, fail = 0, 0
for slug_dir in sorted(EXAMPLES_DIR.iterdir()):
if not slug_dir.is_dir():
continue
slug = slug_dir.name
workflow_yaml = slug_dir / "workflow.yaml"
description_md = slug_dir / "description.md"
tags_json = slug_dir / "tags.json"
if not workflow_yaml.exists():
print(f" ⚠️ {slug}: 缺 workflow.yaml,跳過")
continue
yaml_content = workflow_yaml.read_text(encoding="utf-8")
description = (
description_md.read_text(encoding="utf-8") if description_md.exists() else ""
)
tags = (
json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
)
# content = workflow YAML(讓 AI semantic search 命中 YAML 內容)
# metadata_json = description + tags 結構化
payload = {
"api_key": api_key,
"type": "workflow-example",
"page_name": f"example-{slug}",
"source": SOURCE,
"user_id": USER_ID,
"content": yaml_content,
"metadata_json": json.dumps(
{
"slug": slug,
"description_md": description,
"tags": tags,
},
ensure_ascii=False,
),
"tags_json": json.dumps(
["workflow-example", f"example:{slug}", *tags],
ensure_ascii=False,
),
}
result = kbdb_upsert(api_key, payload, dry_run)
if "error" in result:
print(f" ❌ {slug}: {result['error']}")
fail += 1
else:
action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
print(f" ✅ {slug}{action}")
ok += 1
return ok, fail
def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
"""同步 registry/skills/*.md 進 KBDB"""
if not SKILLS_DIR.exists():
print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步")
return 0, 0
ok, fail = 0, 0
for md_file in sorted(SKILLS_DIR.glob("*.md")):
if md_file.name == "README.md":
continue
slug = md_file.stem
content = md_file.read_text(encoding="utf-8")
# 簡單抓首行 # X 當 title
title = slug
for line in content.splitlines():
line = line.strip()
if line.startswith("# "):
title = line[2:].strip()
break
payload = {
"api_key": api_key,
"type": "agent-skill",
"page_name": f"skill-{slug}",
"source": SOURCE,
"user_id": USER_ID,
"content": content,
"metadata_json": json.dumps(
{"slug": slug, "title": title},
ensure_ascii=False,
),
"tags_json": json.dumps(
["agent-skill", f"skill:{slug}"],
ensure_ascii=False,
),
}
result = kbdb_upsert(api_key, payload, dry_run)
if "error" in result:
print(f" ❌ {slug}: {result['error']}")
fail += 1
else:
action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
print(f" ✅ {slug}{action}")
ok += 1
return ok, fail
def main():
p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB")
p.add_argument("--dry-run", action="store_true", help="只 list 不寫")
p.add_argument("--examples-only", action="store_true")
p.add_argument("--skills-only", action="store_true")
args = p.parse_args()
api_key = get_api_key()
print(f"🔑 api_key: {api_key[:12]}... (len={len(api_key)})")
print(f"📂 root: {ARCRUN_ROOT}")
if args.dry_run:
print("(dry-run,不實際寫 KBDB)")
print()
examples_ok = examples_fail = 0
skills_ok = skills_fail = 0
if not args.skills_only:
print("📋 Syncing examples → type=workflow-example ...")
examples_ok, examples_fail = sync_examples(api_key, args.dry_run)
print(f" examples: {examples_ok} ok / {examples_fail} fail\n")
if not args.examples_only:
print("📋 Syncing skills → type=agent-skill ...")
skills_ok, skills_fail = sync_skills(api_key, args.dry_run)
print(f" skills: {skills_ok} ok / {skills_fail} fail\n")
total_fail = examples_fail + skills_fail
if total_fail > 0:
print(f"⚠️ 共 {total_fail} 個項目失敗")
sys.exit(1)
print(f"✅ Done. examples={examples_ok}, skills={skills_ok}")
if __name__ == "__main__":
main()