feat(ingest): T0.5–T5 純餵食器管線實作(issue #2)

ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope):
- T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律)
- T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端
- T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge)
- T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標)
- T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域)
- T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP)

envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。

gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。
端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 20:40:53 +08:00
parent dffefdcdc2
commit 16ad1cb208
24 changed files with 4003 additions and 28 deletions
+47
View File
@@ -0,0 +1,47 @@
import { describe, it, expect } from 'vitest';
import { buildEnvelope } from '../src/lib/envelope';
const base = {
source: { uri: 'github:o/r@a.md', content_hash: 'abc' },
extractor: { model: 'local-harvest', tier: 'shallow' as const },
triplets: [{ subject: 'A', predicate: 'p', object: 'B', predicate_embed: true }],
};
describe('buildEnvelope', () => {
it('組合法 envelope(含向量化打標欄位)', () => {
const env = buildEnvelope({
...base,
nodes: [{ name: 'A', gloss: 'a', aliases: ['a2'], embed: true, id: 'A' }],
});
expect(env.source.uri).toBe('github:o/r@a.md');
expect(env.nodes?.[0].embed).toBe(true);
expect(env.nodes?.[0].id).toBe('A');
expect(env.triplets[0].predicate_embed).toBe(true);
});
it('node 帶禁送欄位(bridge_score)→ strict throw(本地提早攔,不等 graph 422', () => {
expect(() => buildEnvelope({ ...base, nodes: [{ name: 'A', embed: true }] })).not.toThrow();
expect(() =>
buildEnvelope({ ...base, nodes: [{ name: 'A', bridge_score: 0.5 } as any] }),
).toThrow();
});
it('node 帶 graph 領域 record id(非去重 id)以外的禁送鍵 → strict throw', () => {
// 契約允許 nodes[].id(去重鍵);但 clusters 是 graph 領域 → strict 擋。
expect(() => buildEnvelope({ ...base, nodes: [{ name: 'A', id: 'A', embed: true }] })).not.toThrow();
expect(() => buildEnvelope({ ...base, nodes: [{ name: 'A', clusters: ['c'] } as any] })).toThrow();
});
it('禁送邊上 entity_type → strict throw', () => {
expect(() =>
buildEnvelope({
...base,
triplets: [{ subject: 'A', predicate: 'p', object: 'B', subject_entity_type: 'person' } as any],
}),
).toThrow();
});
it('無 triplets → throw(契約 min 1', () => {
expect(() => buildEnvelope({ ...base, triplets: [] })).toThrow();
});
});