feat(ingest): T0.5–T5 純餵食器管線實作(issue #2)
ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope): - T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律) - T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端 - T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge) - T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標) - T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域) - T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP) envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。 gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。 端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { buildEnvelope } from '../src/lib/envelope';
|
||||
|
||||
const base = {
|
||||
source: { uri: 'github:o/r@a.md', content_hash: 'abc' },
|
||||
extractor: { model: 'local-harvest', tier: 'shallow' as const },
|
||||
triplets: [{ subject: 'A', predicate: 'p', object: 'B', predicate_embed: true }],
|
||||
};
|
||||
|
||||
describe('buildEnvelope', () => {
|
||||
it('組合法 envelope(含向量化打標欄位)', () => {
|
||||
const env = buildEnvelope({
|
||||
...base,
|
||||
nodes: [{ name: 'A', gloss: 'a', aliases: ['a2'], embed: true, id: 'A' }],
|
||||
});
|
||||
expect(env.source.uri).toBe('github:o/r@a.md');
|
||||
expect(env.nodes?.[0].embed).toBe(true);
|
||||
expect(env.nodes?.[0].id).toBe('A');
|
||||
expect(env.triplets[0].predicate_embed).toBe(true);
|
||||
});
|
||||
|
||||
it('node 帶禁送欄位(bridge_score)→ strict throw(本地提早攔,不等 graph 422)', () => {
|
||||
expect(() => buildEnvelope({ ...base, nodes: [{ name: 'A', embed: true }] })).not.toThrow();
|
||||
expect(() =>
|
||||
buildEnvelope({ ...base, nodes: [{ name: 'A', bridge_score: 0.5 } as any] }),
|
||||
).toThrow();
|
||||
});
|
||||
|
||||
it('node 帶 graph 領域 record id(非去重 id)以外的禁送鍵 → strict throw', () => {
|
||||
// 契約允許 nodes[].id(去重鍵);但 clusters 是 graph 領域 → strict 擋。
|
||||
expect(() => buildEnvelope({ ...base, nodes: [{ name: 'A', id: 'A', embed: true }] })).not.toThrow();
|
||||
expect(() => buildEnvelope({ ...base, nodes: [{ name: 'A', clusters: ['c'] } as any] })).toThrow();
|
||||
});
|
||||
|
||||
it('禁送邊上 entity_type → strict throw', () => {
|
||||
expect(() =>
|
||||
buildEnvelope({
|
||||
...base,
|
||||
triplets: [{ subject: 'A', predicate: 'p', object: 'B', subject_entity_type: 'person' } as any],
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
|
||||
it('無 triplets → throw(契約 min 1)', () => {
|
||||
expect(() => buildEnvelope({ ...base, triplets: [] })).toThrow();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,58 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { extract, parseExtractJson, type LlmCaller } from '../src/lib/extract';
|
||||
|
||||
const GOOD_JSON = JSON.stringify({
|
||||
nodes: [
|
||||
{ name: '原子筆記', gloss: '一個不可再分論點的記錄單元' },
|
||||
{ name: '傳統筆記', gloss: '多主題混雜的記錄' },
|
||||
],
|
||||
triplets: [{ subject: '原子筆記', predicate: '對立於', object: '傳統筆記', confidence: 0.9 }],
|
||||
});
|
||||
|
||||
function caller(model: string, out: string | (() => Promise<string>)): LlmCaller {
|
||||
return { model, call: typeof out === 'string' ? async () => out : out };
|
||||
}
|
||||
|
||||
describe('parseExtractJson', () => {
|
||||
it('解析 fenced JSON + 打標 embed/predicate_embed', () => {
|
||||
const g = parseExtractJson('```json\n' + GOOD_JSON + '\n```');
|
||||
expect(g.triplets[0].predicate_embed).toBe(true);
|
||||
expect(g.nodes[0].embed).toBe(true);
|
||||
expect(g.triplets[0].confidence).toBe(0.9);
|
||||
});
|
||||
|
||||
it('無 triplets → throw', () => {
|
||||
expect(() => parseExtractJson(JSON.stringify({ nodes: [], triplets: [] }))).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('extract', () => {
|
||||
it('淺萃成功不升級', async () => {
|
||||
const r = await extract('原文', caller('haiku', GOOD_JSON));
|
||||
expect(r.tier).toBe('shallow');
|
||||
expect(r.escalated).toBe(false);
|
||||
expect(r.model).toBe('haiku');
|
||||
});
|
||||
|
||||
it('淺萃 JSON-fail → 升 deep(升級閘)', async () => {
|
||||
const r = await extract('原文', caller('haiku', 'not json at all'), caller('claude', GOOD_JSON));
|
||||
expect(r.escalated).toBe(true);
|
||||
expect(r.tier).toBe('deep');
|
||||
expect(r.model).toBe('claude');
|
||||
expect(r.triplets.length).toBe(1);
|
||||
});
|
||||
|
||||
it('淺萃失敗且無 deep caller → throw', async () => {
|
||||
await expect(extract('原文', caller('haiku', 'garbage'))).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('端點對齊護欄:模型吐對不齊端點 → 自動補進 nodes', async () => {
|
||||
const skewed = JSON.stringify({
|
||||
nodes: [{ name: 'A' }],
|
||||
triplets: [{ subject: 'A', predicate: '連到', object: 'B(沒在 nodes)' }],
|
||||
});
|
||||
const r = await extract('原文', caller('haiku', skewed));
|
||||
// B 被自動補成 node → 端點全對齊
|
||||
expect(r.nodes.some((n) => n.name === 'B(沒在 nodes)')).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,43 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { makeGraphClient } from '../src/lib/graph-client';
|
||||
import type { Envelope } from '../src/types';
|
||||
|
||||
const env: Envelope = {
|
||||
source: { uri: 'github:o/r@a.md', content_hash: 'abc' },
|
||||
extractor: { model: 'local-harvest', tier: 'shallow' },
|
||||
triplets: [{ subject: 'A', predicate: 'p', object: 'B' }],
|
||||
};
|
||||
|
||||
function mockFetch(status: number, body: unknown): typeof fetch {
|
||||
return (async () =>
|
||||
new Response(JSON.stringify(body), { status, headers: { 'Content-Type': 'application/json' } })) as any;
|
||||
}
|
||||
|
||||
describe('makeGraphClient', () => {
|
||||
it('GRAPH_BASE_URL 未設 → 誠實回 ok:false,不假綠、不打網路', async () => {
|
||||
let called = false;
|
||||
const client = makeGraphClient(undefined, undefined, (async () => {
|
||||
called = true;
|
||||
return new Response('{}');
|
||||
}) as any);
|
||||
const r = await client.postEnvelope(env);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.error).toContain('未設');
|
||||
expect(called).toBe(false);
|
||||
});
|
||||
|
||||
it('200 → ok + 帶 graph 回的 {skipped,ingested,deprecated}', async () => {
|
||||
const client = makeGraphClient('https://graph.example', 'tok', mockFetch(200, { skipped: false, ingested: 1, deprecated: 0 }));
|
||||
const r = await client.postEnvelope(env);
|
||||
expect(r.ok).toBe(true);
|
||||
expect((r.body as any).ingested).toBe(1);
|
||||
});
|
||||
|
||||
it('422 → ok:false 帶 issues(供修禁送欄位)', async () => {
|
||||
const client = makeGraphClient('https://graph.example', undefined, mockFetch(422, { error: 'invalid envelope', issues: [{ path: ['bridge_score'] }] }));
|
||||
const r = await client.postEnvelope(env);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.status).toBe(422);
|
||||
expect((r.body as any).issues).toBeDefined();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,68 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { harvestCard, parseEntities, parseEdges, parseFrontmatter } from '../src/lib/harvest';
|
||||
|
||||
const CARD = `---
|
||||
tags: [掛載架構, 架構設計]
|
||||
gloss: ingest 在 KBDB 堆疊裡的位置。
|
||||
---
|
||||
# 掛載架構
|
||||
|
||||
← [[ingest/00-INDEX]]
|
||||
|
||||
## 摘要
|
||||
KBDB 是三層堆疊。
|
||||
|
||||
## 實體
|
||||
- **kbdb-ingest-plugin**(餵食器) — 最薄一層,純 POST 候選。
|
||||
- **base KBDB**(arcrun/kbdb/基本盤) — 最底儲存層。
|
||||
|
||||
## 關聯
|
||||
### 內文知識關係
|
||||
- kbdb-ingest-plugin >> 掛載於 >> base KBDB
|
||||
### 卡片關係
|
||||
- [[掛載架構]] >> 受約束於 >> [[envelope-契約]]
|
||||
`;
|
||||
|
||||
describe('parseFrontmatter', () => {
|
||||
it('抽出 gloss', () => {
|
||||
const { fm, body } = parseFrontmatter(CARD);
|
||||
expect(fm.gloss).toBe('ingest 在 KBDB 堆疊裡的位置。');
|
||||
expect(body).toContain('# 掛載架構');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseEntities', () => {
|
||||
it('解析正規名 + aliases + gloss', () => {
|
||||
const { body } = parseFrontmatter(CARD);
|
||||
const nodes = parseEntities(body);
|
||||
expect(nodes.map((n) => n.name)).toEqual(['kbdb-ingest-plugin', 'base KBDB']);
|
||||
expect(nodes[1].aliases).toEqual(['arcrun/kbdb', '基本盤']);
|
||||
expect(nodes[0].gloss).toBe('最薄一層,純 POST 候選。');
|
||||
expect(nodes[0].embed).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseEdges', () => {
|
||||
it('解析 typed-edge、去 [[ ]]、標記卡對卡', () => {
|
||||
const { body } = parseFrontmatter(CARD);
|
||||
const edges = parseEdges(body);
|
||||
expect(edges).toContainEqual({ subject: 'kbdb-ingest-plugin', predicate: '掛載於', object: 'base KBDB', predicate_embed: true, subjectIsCard: false, objectIsCard: false });
|
||||
expect(edges).toContainEqual({ subject: '掛載架構', predicate: '受約束於', object: 'envelope-契約', predicate_embed: true, subjectIsCard: true, objectIsCard: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe('harvestCard', () => {
|
||||
it('卡標題 node 帶 frontmatter gloss、含內文 node', () => {
|
||||
const r = harvestCard(CARD);
|
||||
const titleNode = r.nodes.find((n) => n.name === '掛載架構');
|
||||
expect(titleNode?.gloss).toBe('ingest 在 KBDB 堆疊裡的位置。');
|
||||
expect(r.nodes.some((n) => n.name === 'base KBDB')).toBe(true);
|
||||
expect(r.triplets.length).toBe(2);
|
||||
});
|
||||
|
||||
it('內文端點對齊(無對不齊)', () => {
|
||||
const r = harvestCard(CARD);
|
||||
// kbdb-ingest-plugin / base KBDB 都在 ## 實體;卡對卡端點不要求
|
||||
expect(r.unalignedEndpoints).toEqual([]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,73 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { makeSourceUri, parseSourceUri, contentHash, pullRepoMarkdown, type GitHubFetcher } from '../src/lib/source-adapter';
|
||||
import { processSource } from '../src/lib/pipeline';
|
||||
import type { LlmCaller } from '../src/lib/extract';
|
||||
|
||||
describe('source-adapter uri', () => {
|
||||
it('makeSourceUri / parseSourceUri round-trip', () => {
|
||||
const uri = makeSourceUri('uncle6me-web', 'kbdb-ingest-plugin', 'system-dev/wiki/cards/ingest/掛載架構.md');
|
||||
expect(uri).toBe('github:uncle6me-web/kbdb-ingest-plugin@system-dev/wiki/cards/ingest/掛載架構.md');
|
||||
expect(parseSourceUri(uri)).toEqual({
|
||||
owner: 'uncle6me-web',
|
||||
repo: 'kbdb-ingest-plugin',
|
||||
path: 'system-dev/wiki/cards/ingest/掛載架構.md',
|
||||
});
|
||||
});
|
||||
|
||||
it('content-hash 穩定且隨內容變', async () => {
|
||||
const a = await contentHash('hello');
|
||||
expect(a).toBe(await contentHash('hello'));
|
||||
expect(a).not.toBe(await contentHash('world'));
|
||||
});
|
||||
});
|
||||
|
||||
const HARVEST_CARD = `---
|
||||
gloss: 卡標題定義。
|
||||
---
|
||||
# 卡A
|
||||
## 實體
|
||||
- **甲** — 甲的定義。
|
||||
- **乙** — 乙的定義。
|
||||
## 關聯
|
||||
- 甲 >> 連到 >> 乙
|
||||
`;
|
||||
|
||||
function mockFetcher(files: Record<string, string>): GitHubFetcher {
|
||||
return {
|
||||
async listMarkdown() {
|
||||
return Object.keys(files);
|
||||
},
|
||||
async getFile(_o, _r, path) {
|
||||
return { text: files[path], commit: 'sha1' };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe('pullRepoMarkdown + processSource', () => {
|
||||
it('採取路徑 A:拉檔 → harvest → envelope(不 extract)', async () => {
|
||||
const sources = await pullRepoMarkdown(mockFetcher({ 'cards/a.md': HARVEST_CARD }), 'o', 'r');
|
||||
expect(sources.length).toBe(1);
|
||||
const result = await processSource(sources[0]);
|
||||
expect(result.path).toBe('harvest');
|
||||
expect(result.envelope?.triplets).toEqual([{ subject: '甲', predicate: '連到', object: '乙', predicate_embed: true }]);
|
||||
expect(result.envelope?.extractor.model).toBe('local-harvest');
|
||||
});
|
||||
|
||||
it('採不到三元組 + 無萃取模型 → skipped(不假萃)', async () => {
|
||||
const sources = await pullRepoMarkdown(mockFetcher({ 'plain.md': '# 純文字\n沒有三元組。' }), 'o', 'r');
|
||||
const result = await processSource(sources[0]);
|
||||
expect(result.path).toBe('skipped');
|
||||
expect(result.envelope).toBeNull();
|
||||
});
|
||||
|
||||
it('採不到 → fallback extract(路徑 B)', async () => {
|
||||
const caller: LlmCaller = {
|
||||
model: 'haiku',
|
||||
call: async () => JSON.stringify({ nodes: [{ name: '甲' }], triplets: [{ subject: '甲', predicate: '是', object: '乙' }] }),
|
||||
};
|
||||
const sources = await pullRepoMarkdown(mockFetcher({ 'plain.md': '# 純文字\n甲是乙。' }), 'o', 'r');
|
||||
const result = await processSource(sources[0], { shallowCaller: caller });
|
||||
expect(result.path).toBe('extract');
|
||||
expect(result.envelope?.extractor.model).toBe('haiku');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,45 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { weave, flattenForPost, type RepoEnvelopes } from '../src/lib/weave';
|
||||
import type { Envelope } from '../src/types';
|
||||
|
||||
function env(uri: string, nodes: string[], triplets: Array<[string, string, string]>): Envelope {
|
||||
return {
|
||||
source: { uri, content_hash: uri },
|
||||
extractor: { model: 'local-harvest', tier: 'shallow' },
|
||||
nodes: nodes.map((n) => ({ name: n, embed: true })),
|
||||
triplets: triplets.map(([s, p, o]) => ({ subject: s, predicate: p, object: o })),
|
||||
};
|
||||
}
|
||||
|
||||
const repos: RepoEnvelopes[] = [
|
||||
{ repo: 'o/repoA', envelopes: [env('github:o/repoA@x.md', ['Arcrun', '餵食器'], [['Arcrun', '包含', '餵食器']])] },
|
||||
{ repo: 'o/repoB', envelopes: [env('github:o/repoB@y.md', ['Arcrun', '圖層'], [['Arcrun', '依賴', '圖層']])] },
|
||||
];
|
||||
|
||||
describe('weave', () => {
|
||||
it('偵測跨庫橋(同名節點跨 ≥2 repo)', () => {
|
||||
const r = weave(repos);
|
||||
const bridge = r.bridges.find((b) => b.node === 'Arcrun');
|
||||
expect(bridge?.repos).toEqual(['o/repoA', 'o/repoB']);
|
||||
expect(r.totalTriplets).toBe(2);
|
||||
});
|
||||
|
||||
it('偵測跨庫異見(同 s/o 對、不同謂詞跨 repo)', () => {
|
||||
const diverge: RepoEnvelopes[] = [
|
||||
{ repo: 'o/repoA', envelopes: [env('github:o/repoA@x.md', ['X', 'Y'], [['X', '支持', 'Y']])] },
|
||||
{ repo: 'o/repoB', envelopes: [env('github:o/repoB@y.md', ['X', 'Y'], [['X', '反對', 'Y']])] },
|
||||
];
|
||||
const r = weave(diverge);
|
||||
expect(r.divergences.length).toBe(1);
|
||||
expect(r.divergences[0].predicatesByRepo.map((p) => p.predicate).sort()).toEqual(['反對', '支持']);
|
||||
});
|
||||
|
||||
it('flattenForPost 攤平所有 envelope(順序穩定)', () => {
|
||||
expect(flattenForPost(repos).length).toBe(2);
|
||||
});
|
||||
|
||||
it('ingest 不算 bridge_score(橋只標 repos,無分數欄位)', () => {
|
||||
const r = weave(repos);
|
||||
expect(r.bridges[0]).not.toHaveProperty('bridge_score');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user