fix(ingest): receiver Zod 追上 contract — 補向量化打標欄位 (issue #1 補對齊)

契約漂移修補:T3 的 strict Zod 鏡射舊 contract,ingest 照新 contract(ingest#1
升格)送向量化打標欄位會被 .strict() 擋成 422。方向 A:顯式加合法新欄位、保留
strict。

- 同步 contracts/ingest-candidate.json 副本到頂層單一真相源(mira-dissolve)。
- NodeSchema 加 id?/aliases?/embed?;EdgeSchema 加 predicate_embed?。strict() 保留
  → bridge_score/clusters 等 graph 領域禁送欄位仍 422。
- 落地:predicate_embed 透傳進 triplet slot;node 打標(embed/gloss/aliases)存進
  entity slot,供 base/KBDB embed 模組讀標執行(graph 不算向量,鐵律一致)。
- id 作 node 去重鍵:同卡多邊指到只存一筆 entity。
- persistNodes 拆成獨立 action(triplet-ingest.ts 回到 95 行,守樂高 100 行限制)。
- 測試 +4:帶向量化欄位通過、bridge_score/clusters 仍 422、同 id 去重。
  vitest 23 passed。零 SQL / 無 D1·Vectorize·AI 綁定 / dry-run 乾淨。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 20:00:09 +08:00
parent 7a29dee357
commit e7a681a989
6 changed files with 168 additions and 7 deletions
+72
View File
@@ -85,6 +85,78 @@ describe('ingestEnvelope — 污染 envelope 422(契約 strict', () => {
});
});
describe('ingestEnvelope — 向量化打標欄位(contract 升格,ingest#1', () => {
it('帶 nodes[].embed/id/aliases + triplets[].predicate_embed → 通過(非 422', async () => {
const c = mockClient();
const env: IngestEnvelope = {
source: { uri: 'github:uncle6me-web/wiki@v.md', content_hash: 'hv' },
extractor: { model: 'claude-sonnet-4-6', tier: 'deep' },
nodes: [
{ name: 'Graph RAG', id: 'graph-rag.md', aliases: ['圖譜 RAG'], gloss: '關係遍歷檢索', embed: true },
{ name: '黃仁勳', id: '黃仁勳', aliases: ['Jensen Huang'], embed: false },
],
triplets: [
{ subject: 'Graph RAG', predicate: '參考', object: '黃仁勳', predicate_embed: true },
],
};
// schema 層先驗:合法新欄位不被 strict 擋。
expect(IngestEnvelopeSchema.safeParse(env).success).toBe(true);
// 落地:triplet 寫入、node 打標存進 entity slot。
const res = await ingestEnvelope(c, env);
expect(res).toEqual({ skipped: false, ingested: 1, deprecated: 0 });
const { triplets } = await queryTriplets(c, {});
expect(triplets.length).toBe(1);
// node 打標落地成 entity recordgloss/aliases/embed 標示透傳供 base 讀)。
const entities = await c.listRecordsByTemplate('entity');
const gr = entities.find((e) => e.values.canonical === 'Graph RAG');
expect(gr?.values.node_id).toBe('graph-rag.md');
expect(gr?.values.embed).toBe('true');
expect(JSON.parse(gr!.values.aliases_json!)).toEqual(['圖譜 RAG']);
expect(gr?.values.gloss).toBe('關係遍歷檢索');
const jensen = entities.find((e) => e.values.canonical === '黃仁勳');
expect(jensen?.values.embed).toBe('false'); // 明確 false 透傳
});
it('同 id 的 node 被多次帶入 → 去重,只存一筆 entity(一卡一 node', async () => {
const c = mockClient();
const env: IngestEnvelope = {
source: { uri: 'github:uncle6me-web/wiki@dup.md', content_hash: 'hd' },
extractor: { model: 'm', tier: 'deep' },
nodes: [
{ name: 'Graph RAG', id: 'graph-rag.md' },
{ name: 'Graph RAG(別名)', id: 'graph-rag.md' }, // 同 id → 同卡,不重建
],
triplets: [{ subject: 'Graph RAG', predicate: 'r', object: 'X' }],
};
await ingestEnvelope(c, env);
const entities = await c.listRecordsByTemplate('entity');
expect(entities.filter((e) => e.values.node_id === 'graph-rag.md').length).toBe(1);
});
it('帶真正 graph 領域禁送欄位(bridge_score)→ 仍 422', () => {
const polluted = {
source: { uri: 'u', content_hash: 'h' },
extractor: { model: 'm', tier: 'deep' },
nodes: [{ name: 'A', embed: true }],
triplets: [{ subject: 'A', predicate: 'r', object: 'B', predicate_embed: true, bridge_score: 3 }],
};
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
});
it('node 帶 graph 領域禁送欄位(clusters)→ 仍 422', () => {
const polluted = {
source: { uri: 'u', content_hash: 'h' },
extractor: { model: 'm', tier: 'deep' },
nodes: [{ name: 'A', embed: true, clusters: ['c1'] }],
triplets: [{ subject: 'A', predicate: 'r', object: 'B' }],
};
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
});
});
describe('ingestEnvelope — rollback(翻回 status', () => {
it('把 deprecated 翻回 active 後,active 查詢重新見到它', async () => {
const c = mockClient();