Files
kbdb-ingest-plugin/tests/extract.test.ts
T
Leo 16ad1cb208 feat(ingest): T0.5–T5 純餵食器管線實作(issue #2)
ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope):
- T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律)
- T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端
- T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge)
- T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標)
- T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域)
- T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP)

envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。

gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。
端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 20:40:53 +08:00

59 lines
2.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, it, expect } from 'vitest';
import { extract, parseExtractJson, type LlmCaller } from '../src/lib/extract';
const GOOD_JSON = JSON.stringify({
nodes: [
{ name: '原子筆記', gloss: '一個不可再分論點的記錄單元' },
{ name: '傳統筆記', gloss: '多主題混雜的記錄' },
],
triplets: [{ subject: '原子筆記', predicate: '對立於', object: '傳統筆記', confidence: 0.9 }],
});
function caller(model: string, out: string | (() => Promise<string>)): LlmCaller {
return { model, call: typeof out === 'string' ? async () => out : out };
}
describe('parseExtractJson', () => {
it('解析 fenced JSON + 打標 embed/predicate_embed', () => {
const g = parseExtractJson('```json\n' + GOOD_JSON + '\n```');
expect(g.triplets[0].predicate_embed).toBe(true);
expect(g.nodes[0].embed).toBe(true);
expect(g.triplets[0].confidence).toBe(0.9);
});
it('無 triplets → throw', () => {
expect(() => parseExtractJson(JSON.stringify({ nodes: [], triplets: [] }))).toThrow();
});
});
describe('extract', () => {
it('淺萃成功不升級', async () => {
const r = await extract('原文', caller('haiku', GOOD_JSON));
expect(r.tier).toBe('shallow');
expect(r.escalated).toBe(false);
expect(r.model).toBe('haiku');
});
it('淺萃 JSON-fail → 升 deep(升級閘)', async () => {
const r = await extract('原文', caller('haiku', 'not json at all'), caller('claude', GOOD_JSON));
expect(r.escalated).toBe(true);
expect(r.tier).toBe('deep');
expect(r.model).toBe('claude');
expect(r.triplets.length).toBe(1);
});
it('淺萃失敗且無 deep caller → throw', async () => {
await expect(extract('原文', caller('haiku', 'garbage'))).rejects.toThrow();
});
it('端點對齊護欄:模型吐對不齊端點 → 自動補進 nodes', async () => {
const skewed = JSON.stringify({
nodes: [{ name: 'A' }],
triplets: [{ subject: 'A', predicate: '連到', object: 'B(沒在 nodes' }],
});
const r = await extract('原文', caller('haiku', skewed));
// B 被自動補成 node → 端點全對齊
expect(r.nodes.some((n) => n.name === 'B(沒在 nodes')).toBe(true);
});
});