#!/usr/bin/env node // 薄 ops CLI(T5.2)— 人手動觸發重萃。不帶查詢 MCP(ambient 餵食器沒人「問」它)。 // // 兩種模式: // ingest refresh 經部署的 Worker /refresh 重萃單一來源 // ingest pull [root] 本地 dry-run:拉 + 列出會送的 envelope(不 POST) // // 設定走 env: // KBDB_INGEST_URL 已部署的 ingest Worker base(refresh 模式用) // GRAPH_BASE_URL graph 寫入端(pull --post 用) // GITHUB_TOKEN 拉私庫用(公庫可空) // // 鐵律:CLI 不碰儲存;refresh 經 Worker、pull --post 經 graph 寫入端。觸發=人手動(無排程)。 import process from 'node:process'; const [, , cmd, arg, arg2] = process.argv; async function sha256hex(text) { const data = new TextEncoder().encode(text); const digest = await crypto.subtle.digest('SHA-256', data); return [...new Uint8Array(digest)].map((b) => b.toString(16).padStart(2, '0')).join(''); } function ghHeaders() { const h = { Accept: 'application/vnd.github+json', 'User-Agent': 'kbdb-ingest-cli' }; if (process.env.GITHUB_TOKEN) h.Authorization = `Bearer ${process.env.GITHUB_TOKEN}`; return h; } async function ghGetFile(owner, repo, path) { const url = `https://api.github.com/repos/${owner}/${repo}/contents/${path}`; const res = await fetch(url, { headers: ghHeaders() }); if (!res.ok) throw new Error(`github ${owner}/${repo}@${path}: ${res.status}`); const body = await res.json(); const text = body.encoding === 'base64' ? Buffer.from(body.content, 'base64').toString('utf-8') : body.content; return { text, commit: body.sha }; } async function ghListMarkdown(owner, repo, root = '') { const res = await fetch(`https://api.github.com/repos/${owner}/${repo}/git/trees/HEAD?recursive=1`, { headers: ghHeaders() }); if (!res.ok) throw new Error(`github list ${owner}/${repo}: ${res.status}`); const body = await res.json(); const prefix = root.replace(/^\/+|\/+$/g, ''); return (body.tree || []) .filter((e) => e.type === 'blob' && e.path.endsWith('.md')) .map((e) => e.path) .filter((p) => (prefix ? p === prefix || p.startsWith(prefix + '/') : true)); } // 極簡採取(鏡射 src/lib/harvest.ts;CLI dry-run 用,不引 TS)。 function harvest(md) { const fm = /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/.exec(md); const body = fm ? fm[2] : md; const gloss = fm && /^gloss:\s*(.+)$/m.exec(fm[1]) ? /^gloss:\s*(.+)$/m.exec(fm[1])[1].trim() : undefined; const title = /^#\s+(.+)$/m.exec(body)?.[1]?.trim(); const sec = (h) => new RegExp(`^##\\s+${h}[^\\n]*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'm').exec(body)?.[1] || ''; const nodes = []; if (title) nodes.push({ name: title, gloss, embed: true }); for (const line of sec('實體').split('\n')) { const m = /^-\s*\*\*(.+?)\*\*\s*(?:((.+?)))?\s*(?:[—-]\s*(.+))?$/.exec(line.trim()); if (m) nodes.push({ name: m[1].trim(), gloss: m[3]?.trim() || undefined, embed: true }); } const triplets = []; for (const line of sec('關聯').split('\n')) { const m = /^(.+?)\s*>>\s*(.+?)\s*>>\s*(.+?)$/.exec(line.replace(/^-\s*/, '').trim()); if (m) { const clean = (s) => s.replace(/\[\[|\]\]|\*\*/g, '').trim(); triplets.push({ subject: clean(m[1]), predicate: m[2].trim(), object: clean(m[3]), predicate_embed: true }); } } return { nodes, triplets }; } async function doRefresh(uri) { const base = process.env.KBDB_INGEST_URL; if (!base) throw new Error('KBDB_INGEST_URL 未設(指向已部署的 ingest Worker)'); const res = await fetch(base.replace(/\/$/, '') + '/refresh', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ uri }), }); console.log(JSON.stringify(await res.json(), null, 2)); } async function doPull(ownerRepo, root) { const [owner, repo] = ownerRepo.split('/'); if (!owner || !repo) throw new Error('用法:ingest pull [root]'); const paths = await ghListMarkdown(owner, repo, root || ''); console.error(`[ingest] ${owner}/${repo}: ${paths.length} 個 MD`); const envelopes = []; for (const path of paths) { const { text, commit } = await ghGetFile(owner, repo, path); const { nodes, triplets } = harvest(text); if (!triplets.length) continue; // 採不到(非 template 卡)→ dry-run 跳過(CLI 不做 extract) envelopes.push({ source: { uri: `github:${owner}/${repo}@${path}`, content_hash: await sha256hex(text), commit }, extractor: { model: 'local-harvest', tier: 'shallow' }, nodes, triplets, }); } console.error(`[ingest] 採取出 ${envelopes.length} 個 envelope(共 ${envelopes.reduce((n, e) => n + e.triplets.length, 0)} 三元組)`); console.log(JSON.stringify(envelopes, null, 2)); } try { if (cmd === 'refresh' && arg) await doRefresh(arg); else if (cmd === 'pull' && arg) await doPull(arg, arg2); else { console.error('用法:\n ingest refresh \n ingest pull [root]'); process.exit(2); } } catch (e) { console.error('[ingest] 錯誤:', e.message); process.exit(1); }