feat(ingest): T0.5–T5 純餵食器管線實作(issue #2)

ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope):
- T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律)
- T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端
- T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge)
- T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標)
- T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域)
- T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP)

envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。

gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。
端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 20:40:53 +08:00
parent dffefdcdc2
commit 16ad1cb208
24 changed files with 4003 additions and 28 deletions
+45
View File
@@ -0,0 +1,45 @@
import { describe, it, expect } from 'vitest';
import { weave, flattenForPost, type RepoEnvelopes } from '../src/lib/weave';
import type { Envelope } from '../src/types';
function env(uri: string, nodes: string[], triplets: Array<[string, string, string]>): Envelope {
return {
source: { uri, content_hash: uri },
extractor: { model: 'local-harvest', tier: 'shallow' },
nodes: nodes.map((n) => ({ name: n, embed: true })),
triplets: triplets.map(([s, p, o]) => ({ subject: s, predicate: p, object: o })),
};
}
const repos: RepoEnvelopes[] = [
{ repo: 'o/repoA', envelopes: [env('github:o/repoA@x.md', ['Arcrun', '餵食器'], [['Arcrun', '包含', '餵食器']])] },
{ repo: 'o/repoB', envelopes: [env('github:o/repoB@y.md', ['Arcrun', '圖層'], [['Arcrun', '依賴', '圖層']])] },
];
describe('weave', () => {
it('偵測跨庫橋(同名節點跨 ≥2 repo)', () => {
const r = weave(repos);
const bridge = r.bridges.find((b) => b.node === 'Arcrun');
expect(bridge?.repos).toEqual(['o/repoA', 'o/repoB']);
expect(r.totalTriplets).toBe(2);
});
it('偵測跨庫異見(同 s/o 對、不同謂詞跨 repo', () => {
const diverge: RepoEnvelopes[] = [
{ repo: 'o/repoA', envelopes: [env('github:o/repoA@x.md', ['X', 'Y'], [['X', '支持', 'Y']])] },
{ repo: 'o/repoB', envelopes: [env('github:o/repoB@y.md', ['X', 'Y'], [['X', '反對', 'Y']])] },
];
const r = weave(diverge);
expect(r.divergences.length).toBe(1);
expect(r.divergences[0].predicatesByRepo.map((p) => p.predicate).sort()).toEqual(['反對', '支持']);
});
it('flattenForPost 攤平所有 envelope(順序穩定)', () => {
expect(flattenForPost(repos).length).toBe(2);
});
it('ingest 不算 bridge_score(橋只標 repos,無分數欄位)', () => {
const r = weave(repos);
expect(r.bridges[0]).not.toHaveProperty('bridge_score');
});
});