922a57fe34
Self-hosted 開源:WASM 零件 + recipe + cypher-executor,跑在你自己的 Cloudflare。 此為重建的乾淨歷史起點(移除曾誤 commit 的 GCP SA 金鑰,舊歷史保留在 richblack/arcrun 與本地 backup 分支)。含: - acr init --self-hosted installer(建 KV/R2 + codeload 拉預編譯 wasm + wrangler deploy + seed recipe) - recipe push 把關(資料外流提醒 + 打通檢查) - 19 個正當零件預編譯 wasm(claude_api/km_writer/kbdb_upsert_block 排除:違反 DECISIONS §1) - CLI / cypher-executor / registry / 完整 SDD Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
235 lines
7.8 KiB
Python
235 lines
7.8 KiB
Python
#!/usr/bin/env python3
|
||
"""sync-registry-to-kbdb.py — 把 registry/examples + registry/skills 同步進 KBDB
|
||
|
||
對應 LI SDD M3.4。examples / skills 在 git 是 source of truth,
|
||
KBDB 是「給 AI 搜尋 / get」的 query-friendly mirror。
|
||
|
||
對 KBDB block:
|
||
- examples → type=workflow-example
|
||
content = workflow.yaml 全文
|
||
metadata_json = { description, tags }
|
||
tags_json = [...tags.json]
|
||
page_name = example-{slug} (idempotency key,重複 sync 走 upsert)
|
||
|
||
- skills → type=agent-skill
|
||
content = {slug}.md 全文
|
||
page_name = skill-{slug} (idempotency key)
|
||
tags_json = ["agent-skill", "skill:{slug}"]
|
||
|
||
執行:
|
||
cd matrix/arcrun
|
||
python3 scripts/sync-registry-to-kbdb.py # 上傳所有
|
||
python3 scripts/sync-registry-to-kbdb.py --dry-run # 只 list 不寫
|
||
|
||
需求:
|
||
- mira tools/_kbdb_client.py 風格 (urllib + ak_)
|
||
- ARCRUN_API_KEY 從 .env 或 env var
|
||
- 走 kbdb-*.arcrun.dev 零件 worker endpoints (符合 mira CLAUDE.md §1.7)
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import sys
|
||
import urllib.request
|
||
import urllib.error
|
||
from pathlib import Path
|
||
|
||
ARCRUN_ROOT = Path(__file__).resolve().parent.parent
|
||
EXAMPLES_DIR = ARCRUN_ROOT / "registry" / "examples"
|
||
SKILLS_DIR = ARCRUN_ROOT / "registry" / "skills"
|
||
|
||
KBDB_UPSERT_URL = "https://kbdb-upsert-block.arcrun.dev/"
|
||
USER_AGENT = "arcrun-registry-sync/1.0"
|
||
USER_ID = "inkstone_platform_registry" # 需符合 KBDB partner namespace prefix(inkstone_*)
|
||
SOURCE = "registry-git-sync"
|
||
|
||
|
||
def get_api_key() -> str:
|
||
"""從 env var 或 polaris/mira/.env 取 ARCRUN_API_KEY。"""
|
||
key = os.environ.get("ARCRUN_API_KEY", "")
|
||
if key:
|
||
return key
|
||
# fallback:找 polaris/mira/.env(leo 既有約定位置)
|
||
mira_env = ARCRUN_ROOT.parent.parent / "polaris" / "mira" / ".env"
|
||
if mira_env.exists():
|
||
for line in mira_env.read_text(encoding="utf-8").splitlines():
|
||
line = line.strip()
|
||
if line.startswith("ARCRUN_API_KEY="):
|
||
return line.split("=", 1)[1].strip()
|
||
raise SystemExit(
|
||
"ARCRUN_API_KEY 未設定。export ARCRUN_API_KEY=ak_... 或加到 polaris/mira/.env"
|
||
)
|
||
|
||
|
||
def kbdb_upsert(api_key: str, payload: dict, dry_run: bool) -> dict:
|
||
"""POST kbdb-upsert-block.arcrun.dev — page_name 當 idempotency key"""
|
||
if dry_run:
|
||
return {"dry_run": True, "would_upsert": payload.get("page_name")}
|
||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||
req = urllib.request.Request(
|
||
KBDB_UPSERT_URL,
|
||
data=data,
|
||
headers={
|
||
"Content-Type": "application/json",
|
||
"User-Agent": USER_AGENT,
|
||
},
|
||
method="POST",
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||
return json.loads(resp.read().decode("utf-8"))
|
||
except urllib.error.HTTPError as e:
|
||
body = e.read().decode("utf-8", errors="replace")
|
||
return {"error": f"HTTP {e.code}: {body[:200]}"}
|
||
|
||
|
||
def sync_examples(api_key: str, dry_run: bool) -> tuple[int, int]:
|
||
"""同步 registry/examples/{slug}/ 進 KBDB"""
|
||
if not EXAMPLES_DIR.exists():
|
||
print(f"⚠️ {EXAMPLES_DIR} 不存在,跳過 examples 同步")
|
||
return 0, 0
|
||
|
||
ok, fail = 0, 0
|
||
for slug_dir in sorted(EXAMPLES_DIR.iterdir()):
|
||
if not slug_dir.is_dir():
|
||
continue
|
||
slug = slug_dir.name
|
||
workflow_yaml = slug_dir / "workflow.yaml"
|
||
description_md = slug_dir / "description.md"
|
||
tags_json = slug_dir / "tags.json"
|
||
|
||
if not workflow_yaml.exists():
|
||
print(f" ⚠️ {slug}: 缺 workflow.yaml,跳過")
|
||
continue
|
||
|
||
yaml_content = workflow_yaml.read_text(encoding="utf-8")
|
||
description = (
|
||
description_md.read_text(encoding="utf-8") if description_md.exists() else ""
|
||
)
|
||
tags = (
|
||
json.loads(tags_json.read_text(encoding="utf-8")) if tags_json.exists() else []
|
||
)
|
||
|
||
# content = workflow YAML(讓 AI semantic search 命中 YAML 內容)
|
||
# metadata_json = description + tags 結構化
|
||
payload = {
|
||
"api_key": api_key,
|
||
"type": "workflow-example",
|
||
"page_name": f"example-{slug}",
|
||
"source": SOURCE,
|
||
"user_id": USER_ID,
|
||
"content": yaml_content,
|
||
"metadata_json": json.dumps(
|
||
{
|
||
"slug": slug,
|
||
"description_md": description,
|
||
"tags": tags,
|
||
},
|
||
ensure_ascii=False,
|
||
),
|
||
"tags_json": json.dumps(
|
||
["workflow-example", f"example:{slug}", *tags],
|
||
ensure_ascii=False,
|
||
),
|
||
}
|
||
|
||
result = kbdb_upsert(api_key, payload, dry_run)
|
||
if "error" in result:
|
||
print(f" ❌ {slug}: {result['error']}")
|
||
fail += 1
|
||
else:
|
||
action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
|
||
print(f" ✅ {slug} → {action}")
|
||
ok += 1
|
||
|
||
return ok, fail
|
||
|
||
|
||
def sync_skills(api_key: str, dry_run: bool) -> tuple[int, int]:
|
||
"""同步 registry/skills/*.md 進 KBDB"""
|
||
if not SKILLS_DIR.exists():
|
||
print(f"⚠️ {SKILLS_DIR} 不存在,跳過 skills 同步")
|
||
return 0, 0
|
||
|
||
ok, fail = 0, 0
|
||
for md_file in sorted(SKILLS_DIR.glob("*.md")):
|
||
if md_file.name == "README.md":
|
||
continue
|
||
slug = md_file.stem
|
||
content = md_file.read_text(encoding="utf-8")
|
||
|
||
# 簡單抓首行 # X 當 title
|
||
title = slug
|
||
for line in content.splitlines():
|
||
line = line.strip()
|
||
if line.startswith("# "):
|
||
title = line[2:].strip()
|
||
break
|
||
|
||
payload = {
|
||
"api_key": api_key,
|
||
"type": "agent-skill",
|
||
"page_name": f"skill-{slug}",
|
||
"source": SOURCE,
|
||
"user_id": USER_ID,
|
||
"content": content,
|
||
"metadata_json": json.dumps(
|
||
{"slug": slug, "title": title},
|
||
ensure_ascii=False,
|
||
),
|
||
"tags_json": json.dumps(
|
||
["agent-skill", f"skill:{slug}"],
|
||
ensure_ascii=False,
|
||
),
|
||
}
|
||
|
||
result = kbdb_upsert(api_key, payload, dry_run)
|
||
if "error" in result:
|
||
print(f" ❌ {slug}: {result['error']}")
|
||
fail += 1
|
||
else:
|
||
action = result.get("data", {}).get("action", "?") if isinstance(result.get("data"), dict) else "?"
|
||
print(f" ✅ {slug} → {action}")
|
||
ok += 1
|
||
|
||
return ok, fail
|
||
|
||
|
||
def main():
|
||
p = argparse.ArgumentParser(description="Sync registry/examples + skills → KBDB")
|
||
p.add_argument("--dry-run", action="store_true", help="只 list 不寫")
|
||
p.add_argument("--examples-only", action="store_true")
|
||
p.add_argument("--skills-only", action="store_true")
|
||
args = p.parse_args()
|
||
|
||
api_key = get_api_key()
|
||
print(f"🔑 api_key: {api_key[:12]}... (len={len(api_key)})")
|
||
print(f"📂 root: {ARCRUN_ROOT}")
|
||
if args.dry_run:
|
||
print("(dry-run,不實際寫 KBDB)")
|
||
print()
|
||
|
||
examples_ok = examples_fail = 0
|
||
skills_ok = skills_fail = 0
|
||
|
||
if not args.skills_only:
|
||
print("📋 Syncing examples → type=workflow-example ...")
|
||
examples_ok, examples_fail = sync_examples(api_key, args.dry_run)
|
||
print(f" examples: {examples_ok} ok / {examples_fail} fail\n")
|
||
|
||
if not args.examples_only:
|
||
print("📋 Syncing skills → type=agent-skill ...")
|
||
skills_ok, skills_fail = sync_skills(api_key, args.dry_run)
|
||
print(f" skills: {skills_ok} ok / {skills_fail} fail\n")
|
||
|
||
total_fail = examples_fail + skills_fail
|
||
if total_fail > 0:
|
||
print(f"⚠️ 共 {total_fail} 個項目失敗")
|
||
sys.exit(1)
|
||
print(f"✅ Done. examples={examples_ok}, skills={skills_ok}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|