13db97bb54
契約漂移修補:T3 的 strict Zod 鏡射舊 contract,ingest 照新 contract(ingest#1 升格)送向量化打標欄位會被 .strict() 擋成 422。方向 A:顯式加合法新欄位、保留 strict。 - 同步 contracts/ingest-candidate.json 副本到頂層單一真相源(mira-dissolve)。 - NodeSchema 加 id?/aliases?/embed?;EdgeSchema 加 predicate_embed?。strict() 保留 → bridge_score/clusters 等 graph 領域禁送欄位仍 422。 - 落地:predicate_embed 透傳進 triplet slot;node 打標(embed/gloss/aliases)存進 entity slot,供 base/KBDB embed 模組讀標執行(graph 不算向量,鐵律一致)。 - id 作 node 去重鍵:同卡多邊指到只存一筆 entity。 - persistNodes 拆成獨立 action(triplet-ingest.ts 回到 95 行,守樂高 100 行限制)。 - 測試 +4:帶向量化欄位通過、bridge_score/clusters 仍 422、同 id 去重。 vitest 23 passed。零 SQL / 無 D1·Vectorize·AI 綁定 / dry-run 乾淨。 Co-authored-by: richblack <leo21c@gmail.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
175 lines
7.8 KiB
TypeScript
175 lines
7.8 KiB
TypeScript
// ingest 寫入端 — 走 mock KbdbClient(API-as-Wall),零 SQL、不打網路。
|
||
// 覆蓋 T3.4 五案:正常 envelope / 同 hash no-op / 新 hash deprecate / 污染 envelope 422 / rollback。
|
||
import { describe, it, expect } from 'vitest';
|
||
import { ingestEnvelope, IngestEnvelopeSchema, type IngestEnvelope } from '../src/actions/triplet-ingest';
|
||
import { queryTriplets } from '../src/actions/triplet-crud';
|
||
import { mockClient } from './mock-client';
|
||
|
||
function envelope(hash: string, triplets: IngestEnvelope['triplets']): IngestEnvelope {
|
||
return {
|
||
source: { uri: 'github:uncle6me-web/wiki@a.md', content_hash: hash },
|
||
extractor: { model: 'claude-sonnet-4-6', tier: 'deep' },
|
||
triplets,
|
||
};
|
||
}
|
||
|
||
describe('ingestEnvelope — 正常 envelope', () => {
|
||
it('append 全部 triplet 為 active,記 source_uri/content_hash', async () => {
|
||
const c = mockClient();
|
||
const res = await ingestEnvelope(c, envelope('h1', [
|
||
{ subject: 'A', predicate: 'rel', object: 'B' },
|
||
{ subject: 'B', predicate: 'rel', object: 'C' },
|
||
]));
|
||
expect(res).toEqual({ skipped: false, ingested: 2, deprecated: 0 });
|
||
|
||
const { triplets } = await queryTriplets(c, {});
|
||
expect(triplets.length).toBe(2);
|
||
expect(triplets.every((t) => t.status === 'active')).toBe(true);
|
||
expect(triplets[0].source_uri).toBe('github:uncle6me-web/wiki@a.md');
|
||
expect(triplets[0].content_hash).toBe('h1');
|
||
});
|
||
});
|
||
|
||
describe('ingestEnvelope — 同 hash no-op', () => {
|
||
it('同 uri+hash 再送 → skipped,不新增', async () => {
|
||
const c = mockClient();
|
||
await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'B' }]));
|
||
const res = await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'B' }]));
|
||
expect(res.skipped).toBe(true);
|
||
|
||
const { triplets } = await queryTriplets(c, {});
|
||
expect(triplets.length).toBe(1); // 沒有重複 append
|
||
});
|
||
});
|
||
|
||
describe('ingestEnvelope — 新 hash deprecate-then-append', () => {
|
||
it('同 uri 新 hash → 舊批轉 deprecated、新批 active;查詢 active-only', async () => {
|
||
const c = mockClient();
|
||
await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'old' }]));
|
||
const res = await ingestEnvelope(c, envelope('h2', [{ subject: 'A', predicate: 'r', object: 'new' }]));
|
||
expect(res).toEqual({ skipped: false, ingested: 1, deprecated: 1 });
|
||
|
||
// active-only 查詢只見新批。
|
||
const active = await queryTriplets(c, {});
|
||
expect(active.triplets.length).toBe(1);
|
||
expect(active.triplets[0].object).toBe('new');
|
||
|
||
// 舊批仍在(deprecated),可考古/rollback。
|
||
const all = await queryTriplets(c, { includeDeprecated: true });
|
||
expect(all.triplets.length).toBe(2);
|
||
const deprecated = all.triplets.find((t) => t.status === 'deprecated');
|
||
expect(deprecated?.object).toBe('old');
|
||
expect(deprecated?.superseded_by).toBe('h2');
|
||
});
|
||
});
|
||
|
||
describe('ingestEnvelope — 污染 envelope 422(契約 strict)', () => {
|
||
it('triplet 帶 graph 領域欄位 bridge_score → schema 拒收', () => {
|
||
const polluted = {
|
||
source: { uri: 'u', content_hash: 'h' },
|
||
extractor: { model: 'm', tier: 'deep' },
|
||
triplets: [{ subject: 'A', predicate: 'r', object: 'B', bridge_score: 3 }],
|
||
};
|
||
const parsed = IngestEnvelopeSchema.safeParse(polluted);
|
||
expect(parsed.success).toBe(false);
|
||
});
|
||
|
||
it('envelope 頂層帶禁止欄位 id → 拒收', () => {
|
||
const polluted = {
|
||
id: 'should-not-send',
|
||
source: { uri: 'u', content_hash: 'h' },
|
||
extractor: { model: 'm', tier: 'deep' },
|
||
triplets: [{ subject: 'A', predicate: 'r', object: 'B' }],
|
||
};
|
||
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
|
||
});
|
||
});
|
||
|
||
describe('ingestEnvelope — 向量化打標欄位(contract 升格,ingest#1)', () => {
|
||
it('帶 nodes[].embed/id/aliases + triplets[].predicate_embed → 通過(非 422)', async () => {
|
||
const c = mockClient();
|
||
const env: IngestEnvelope = {
|
||
source: { uri: 'github:uncle6me-web/wiki@v.md', content_hash: 'hv' },
|
||
extractor: { model: 'claude-sonnet-4-6', tier: 'deep' },
|
||
nodes: [
|
||
{ name: 'Graph RAG', id: 'graph-rag.md', aliases: ['圖譜 RAG'], gloss: '關係遍歷檢索', embed: true },
|
||
{ name: '黃仁勳', id: '黃仁勳', aliases: ['Jensen Huang'], embed: false },
|
||
],
|
||
triplets: [
|
||
{ subject: 'Graph RAG', predicate: '參考', object: '黃仁勳', predicate_embed: true },
|
||
],
|
||
};
|
||
// schema 層先驗:合法新欄位不被 strict 擋。
|
||
expect(IngestEnvelopeSchema.safeParse(env).success).toBe(true);
|
||
|
||
// 落地:triplet 寫入、node 打標存進 entity slot。
|
||
const res = await ingestEnvelope(c, env);
|
||
expect(res).toEqual({ skipped: false, ingested: 1, deprecated: 0 });
|
||
|
||
const { triplets } = await queryTriplets(c, {});
|
||
expect(triplets.length).toBe(1);
|
||
|
||
// node 打標落地成 entity record(gloss/aliases/embed 標示透傳供 base 讀)。
|
||
const entities = await c.listRecordsByTemplate('entity');
|
||
const gr = entities.find((e) => e.values.canonical === 'Graph RAG');
|
||
expect(gr?.values.node_id).toBe('graph-rag.md');
|
||
expect(gr?.values.embed).toBe('true');
|
||
expect(JSON.parse(gr!.values.aliases_json!)).toEqual(['圖譜 RAG']);
|
||
expect(gr?.values.gloss).toBe('關係遍歷檢索');
|
||
const jensen = entities.find((e) => e.values.canonical === '黃仁勳');
|
||
expect(jensen?.values.embed).toBe('false'); // 明確 false 透傳
|
||
});
|
||
|
||
it('同 id 的 node 被多次帶入 → 去重,只存一筆 entity(一卡一 node)', async () => {
|
||
const c = mockClient();
|
||
const env: IngestEnvelope = {
|
||
source: { uri: 'github:uncle6me-web/wiki@dup.md', content_hash: 'hd' },
|
||
extractor: { model: 'm', tier: 'deep' },
|
||
nodes: [
|
||
{ name: 'Graph RAG', id: 'graph-rag.md' },
|
||
{ name: 'Graph RAG(別名)', id: 'graph-rag.md' }, // 同 id → 同卡,不重建
|
||
],
|
||
triplets: [{ subject: 'Graph RAG', predicate: 'r', object: 'X' }],
|
||
};
|
||
await ingestEnvelope(c, env);
|
||
const entities = await c.listRecordsByTemplate('entity');
|
||
expect(entities.filter((e) => e.values.node_id === 'graph-rag.md').length).toBe(1);
|
||
});
|
||
|
||
it('帶真正 graph 領域禁送欄位(bridge_score)→ 仍 422', () => {
|
||
const polluted = {
|
||
source: { uri: 'u', content_hash: 'h' },
|
||
extractor: { model: 'm', tier: 'deep' },
|
||
nodes: [{ name: 'A', embed: true }],
|
||
triplets: [{ subject: 'A', predicate: 'r', object: 'B', predicate_embed: true, bridge_score: 3 }],
|
||
};
|
||
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
|
||
});
|
||
|
||
it('node 帶 graph 領域禁送欄位(clusters)→ 仍 422', () => {
|
||
const polluted = {
|
||
source: { uri: 'u', content_hash: 'h' },
|
||
extractor: { model: 'm', tier: 'deep' },
|
||
nodes: [{ name: 'A', embed: true, clusters: ['c1'] }],
|
||
triplets: [{ subject: 'A', predicate: 'r', object: 'B' }],
|
||
};
|
||
expect(IngestEnvelopeSchema.safeParse(polluted).success).toBe(false);
|
||
});
|
||
});
|
||
|
||
describe('ingestEnvelope — rollback(翻回 status)', () => {
|
||
it('把 deprecated 翻回 active 後,active 查詢重新見到它', async () => {
|
||
const c = mockClient();
|
||
await ingestEnvelope(c, envelope('h1', [{ subject: 'A', predicate: 'r', object: 'old' }]));
|
||
await ingestEnvelope(c, envelope('h2', [{ subject: 'A', predicate: 'r', object: 'new' }]));
|
||
|
||
// 取出被 deprecate 的舊批 id,手動 rollback(翻回 active、清 superseded_by)。
|
||
const all = await queryTriplets(c, { includeDeprecated: true });
|
||
const old = all.triplets.find((t) => t.status === 'deprecated')!;
|
||
await c.updateRecord(old.id, { status: 'active', superseded_by: '' });
|
||
|
||
const active = await queryTriplets(c, {});
|
||
expect(active.triplets.map((t) => t.object).sort()).toEqual(['new', 'old']);
|
||
});
|
||
});
|