Files
kbdb-graph-plugin/tests/triplet-ingest.test.ts
uncle6 13db97bb54 fix(ingest): receiver Zod 追上 contract — 補向量化打標欄位 (issue #1 補對齊) (#3)
契約漂移修補:T3 的 strict Zod 鏡射舊 contract,ingest 照新 contract(ingest#1
升格)送向量化打標欄位會被 .strict() 擋成 422。方向 A:顯式加合法新欄位、保留
strict。

- 同步 contracts/ingest-candidate.json 副本到頂層單一真相源(mira-dissolve)。
- NodeSchema 加 id?/aliases?/embed?;EdgeSchema 加 predicate_embed?。strict() 保留
  → bridge_score/clusters 等 graph 領域禁送欄位仍 422。
- 落地:predicate_embed 透傳進 triplet slot;node 打標(embed/gloss/aliases)存進
  entity slot,供 base/KBDB embed 模組讀標執行(graph 不算向量,鐵律一致)。
- id 作 node 去重鍵:同卡多邊指到只存一筆 entity。
- persistNodes 拆成獨立 action(triplet-ingest.ts 回到 95 行,守樂高 100 行限制)。
- 測試 +4:帶向量化欄位通過、bridge_score/clusters 仍 422、同 id 去重。
  vitest 23 passed。零 SQL / 無 D1·Vectorize·AI 綁定 / dry-run 乾淨。

Co-authored-by: richblack <leo21c@gmail.com>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 20:28:13 +08:00

175 lines
7.8 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// ingest 寫入端 — 走 mock KbdbClientAPI-as-Wall),零 SQL、不打網路。
// 覆蓋 T3.4 五案:正常 envelope / 同 hash no-op / 新 hash deprecate / 污染 envelope 422 / rollback。
import { describe, it, expect } from 'vitest';
import { ingestEnvelope, IngestEnvelopeSchema, type IngestEnvelope } from '../src/actions/triplet-ingest';
import { queryTriplets } from '../src/actions/triplet-crud';
import { mockClient } from './mock-client';
function envelope(hash: string, triplets: IngestEnvelope['triplets']): IngestEnvelope {
return {
source: { uri: 'github:uncle6me-web/wiki@a.md', content_hash: hash },
extractor: { model: 'claude-sonnet-4-6', tier: 'deep' },
triplets,
};
}
describe('ingestEnvelope — 正常 envelope', () => {
it('append 全部 triplet 為 active,記 source_uri/content_hash', async () => {
const c = mockClient();
const res = await ingestEnvelope(c, envelope('h1', [
{ subject: 'A', predicate: 'rel', object: 'B' },
{ subject: 'B', predicate: 'rel', object: 'C' },
]));
expect(res).toEqual({ skipped: false, ingested: 2, deprecated: 0 });
const { triplets } = await queryTriplets(c, {});
expect(triplets.length).toBe(2);
expect(triplets.every((t) => t.status === 'active')).toBe(true);
expect(triplets[0].source_uri).toBe('github:uncle6me-web/wiki@a.md');
expect(triplets[0].content_hash).toBe('h1');
});
});
describe('ingestEnvelope — 同 hash no-op', () => {
it('同 uri+hash 再送 → skipped,不新增', async () => {
const c = mockClient();
await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'B' }]));
const res = await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'B' }]));
expect(res.skipped).toBe(true);
const { triplets } = await queryTriplets(c, {});
expect(triplets.length).toBe(1); // 沒有重複 append
});
});
describe('ingestEnvelope — 新 hash deprecate-then-append', () => {
it('同 uri 新 hash → 舊批轉 deprecated、新批 active;查詢 active-only', async () => {
const c = mockClient();
await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'old' }]));
const res = await ingestEnvelope(c, envelope('h2', [{ subject: 'A', predicate: 'r', object: 'new' }]));
expect(res).toEqual({ skipped: false, ingested: 1, deprecated: 1 });
// active-only 查詢只見新批。
const active = await queryTriplets(c, {});
expect(active.triplets.length).toBe(1);
expect(active.triplets[0].object).toBe('new');
// 舊批仍在(deprecated),可考古/rollback。
const all = await queryTriplets(c, { includeDeprecated: true });
expect(all.triplets.length).toBe(2);
const deprecated = all.triplets.find((t) => t.status === 'deprecated');
expect(deprecated?.object).toBe('old');
expect(deprecated?.superseded_by).toBe('h2');
});
});
describe('ingestEnvelope — 污染 envelope 422(契約 strict', () => {
it('triplet 帶 graph 領域欄位 bridge_score → schema 拒收', () => {
const polluted = {
source: { uri: 'u', content_hash: 'h' },
extractor: { model: 'm', tier: 'deep' },
triplets: [{ subject: 'A', predicate: 'r', object: 'B', bridge_score: 3 }],
};
const parsed = IngestEnvelopeSchema.safeParse(polluted);
expect(parsed.success).toBe(false);
});
it('envelope 頂層帶禁止欄位 id → 拒收', () => {
const polluted = {
id: 'should-not-send',
source: { uri: 'u', content_hash: 'h' },
extractor: { model: 'm', tier: 'deep' },
triplets: [{ subject: 'A', predicate: 'r', object: 'B' }],
};
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
});
});
describe('ingestEnvelope — 向量化打標欄位(contract 升格,ingest#1', () => {
it('帶 nodes[].embed/id/aliases + triplets[].predicate_embed → 通過(非 422', async () => {
const c = mockClient();
const env: IngestEnvelope = {
source: { uri: 'github:uncle6me-web/wiki@v.md', content_hash: 'hv' },
extractor: { model: 'claude-sonnet-4-6', tier: 'deep' },
nodes: [
{ name: 'Graph RAG', id: 'graph-rag.md', aliases: ['圖譜 RAG'], gloss: '關係遍歷檢索', embed: true },
{ name: '黃仁勳', id: '黃仁勳', aliases: ['Jensen Huang'], embed: false },
],
triplets: [
{ subject: 'Graph RAG', predicate: '參考', object: '黃仁勳', predicate_embed: true },
],
};
// schema 層先驗:合法新欄位不被 strict 擋。
expect(IngestEnvelopeSchema.safeParse(env).success).toBe(true);
// 落地:triplet 寫入、node 打標存進 entity slot。
const res = await ingestEnvelope(c, env);
expect(res).toEqual({ skipped: false, ingested: 1, deprecated: 0 });
const { triplets } = await queryTriplets(c, {});
expect(triplets.length).toBe(1);
// node 打標落地成 entity recordgloss/aliases/embed 標示透傳供 base 讀)。
const entities = await c.listRecordsByTemplate('entity');
const gr = entities.find((e) => e.values.canonical === 'Graph RAG');
expect(gr?.values.node_id).toBe('graph-rag.md');
expect(gr?.values.embed).toBe('true');
expect(JSON.parse(gr!.values.aliases_json!)).toEqual(['圖譜 RAG']);
expect(gr?.values.gloss).toBe('關係遍歷檢索');
const jensen = entities.find((e) => e.values.canonical === '黃仁勳');
expect(jensen?.values.embed).toBe('false'); // 明確 false 透傳
});
it('同 id 的 node 被多次帶入 → 去重,只存一筆 entity(一卡一 node', async () => {
const c = mockClient();
const env: IngestEnvelope = {
source: { uri: 'github:uncle6me-web/wiki@dup.md', content_hash: 'hd' },
extractor: { model: 'm', tier: 'deep' },
nodes: [
{ name: 'Graph RAG', id: 'graph-rag.md' },
{ name: 'Graph RAG(別名)', id: 'graph-rag.md' }, // 同 id → 同卡,不重建
],
triplets: [{ subject: 'Graph RAG', predicate: 'r', object: 'X' }],
};
await ingestEnvelope(c, env);
const entities = await c.listRecordsByTemplate('entity');
expect(entities.filter((e) => e.values.node_id === 'graph-rag.md').length).toBe(1);
});
it('帶真正 graph 領域禁送欄位(bridge_score)→ 仍 422', () => {
const polluted = {
source: { uri: 'u', content_hash: 'h' },
extractor: { model: 'm', tier: 'deep' },
nodes: [{ name: 'A', embed: true }],
triplets: [{ subject: 'A', predicate: 'r', object: 'B', predicate_embed: true, bridge_score: 3 }],
};
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
});
it('node 帶 graph 領域禁送欄位(clusters)→ 仍 422', () => {
const polluted = {
source: { uri: 'u', content_hash: 'h' },
extractor: { model: 'm', tier: 'deep' },
nodes: [{ name: 'A', embed: true, clusters: ['c1'] }],
triplets: [{ subject: 'A', predicate: 'r', object: 'B' }],
};
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
});
});
describe('ingestEnvelope — rollback(翻回 status', () => {
it('把 deprecated 翻回 active 後,active 查詢重新見到它', async () => {
const c = mockClient();
await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'old' }]));
await ingestEnvelope(c, envelope('h2', [{ subject: 'A', predicate: 'r', object: 'new' }]));
// 取出被 deprecate 的舊批 id,手動 rollback(翻回 active、清 superseded_by)。
const all = await queryTriplets(c, { includeDeprecated: true });
const old = all.triplets.find((t) => t.status === 'deprecated')!;
await c.updateRecord(old.id, { status: 'active', superseded_by: '' });
const active = await queryTriplets(c, {});
expect(active.triplets.map((t) => t.object).sort()).toEqual(['new', 'old']);
});
});