feat(ingest): T0.5–T5 純餵食器管線實作(issue #2)
ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope): - T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律) - T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端 - T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge) - T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標) - T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域) - T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP) envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。 gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。 端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { extract, parseExtractJson, type LlmCaller } from '../src/lib/extract';
|
||||
|
||||
const GOOD_JSON = JSON.stringify({
|
||||
nodes: [
|
||||
{ name: '原子筆記', gloss: '一個不可再分論點的記錄單元' },
|
||||
{ name: '傳統筆記', gloss: '多主題混雜的記錄' },
|
||||
],
|
||||
triplets: [{ subject: '原子筆記', predicate: '對立於', object: '傳統筆記', confidence: 0.9 }],
|
||||
});
|
||||
|
||||
function caller(model: string, out: string | (() => Promise<string>)): LlmCaller {
|
||||
return { model, call: typeof out === 'string' ? async () => out : out };
|
||||
}
|
||||
|
||||
describe('parseExtractJson', () => {
|
||||
it('解析 fenced JSON + 打標 embed/predicate_embed', () => {
|
||||
const g = parseExtractJson('```json\n' + GOOD_JSON + '\n```');
|
||||
expect(g.triplets[0].predicate_embed).toBe(true);
|
||||
expect(g.nodes[0].embed).toBe(true);
|
||||
expect(g.triplets[0].confidence).toBe(0.9);
|
||||
});
|
||||
|
||||
it('無 triplets → throw', () => {
|
||||
expect(() => parseExtractJson(JSON.stringify({ nodes: [], triplets: [] }))).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('extract', () => {
|
||||
it('淺萃成功不升級', async () => {
|
||||
const r = await extract('原文', caller('haiku', GOOD_JSON));
|
||||
expect(r.tier).toBe('shallow');
|
||||
expect(r.escalated).toBe(false);
|
||||
expect(r.model).toBe('haiku');
|
||||
});
|
||||
|
||||
it('淺萃 JSON-fail → 升 deep(升級閘)', async () => {
|
||||
const r = await extract('原文', caller('haiku', 'not json at all'), caller('claude', GOOD_JSON));
|
||||
expect(r.escalated).toBe(true);
|
||||
expect(r.tier).toBe('deep');
|
||||
expect(r.model).toBe('claude');
|
||||
expect(r.triplets.length).toBe(1);
|
||||
});
|
||||
|
||||
it('淺萃失敗且無 deep caller → throw', async () => {
|
||||
await expect(extract('原文', caller('haiku', 'garbage'))).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('端點對齊護欄:模型吐對不齊端點 → 自動補進 nodes', async () => {
|
||||
const skewed = JSON.stringify({
|
||||
nodes: [{ name: 'A' }],
|
||||
triplets: [{ subject: 'A', predicate: '連到', object: 'B(沒在 nodes)' }],
|
||||
});
|
||||
const r = await extract('原文', caller('haiku', skewed));
|
||||
// B 被自動補成 node → 端點全對齊
|
||||
expect(r.nodes.some((n) => n.name === 'B(沒在 nodes)')).toBe(true);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user