Files
kbdb-ingest-plugin/tests/source-pipeline.test.ts
T
Leo 16ad1cb208 feat(ingest): T0.5–T5 純餵食器管線實作(issue #2)
ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope):
- T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律)
- T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端
- T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge)
- T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標)
- T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域)
- T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP)

envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。

gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。
端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 20:40:53 +08:00

74 lines
2.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, it, expect } from 'vitest';
import { makeSourceUri, parseSourceUri, contentHash, pullRepoMarkdown, type GitHubFetcher } from '../src/lib/source-adapter';
import { processSource } from '../src/lib/pipeline';
import type { LlmCaller } from '../src/lib/extract';
describe('source-adapter uri', () => {
it('makeSourceUri / parseSourceUri round-trip', () => {
const uri = makeSourceUri('uncle6me-web', 'kbdb-ingest-plugin', 'system-dev/wiki/cards/ingest/掛載架構.md');
expect(uri).toBe('github:uncle6me-web/kbdb-ingest-plugin@system-dev/wiki/cards/ingest/掛載架構.md');
expect(parseSourceUri(uri)).toEqual({
owner: 'uncle6me-web',
repo: 'kbdb-ingest-plugin',
path: 'system-dev/wiki/cards/ingest/掛載架構.md',
});
});
it('content-hash 穩定且隨內容變', async () => {
const a = await contentHash('hello');
expect(a).toBe(await contentHash('hello'));
expect(a).not.toBe(await contentHash('world'));
});
});
const HARVEST_CARD = `---
gloss: 卡標題定義。
---
# 卡A
## 實體
- **甲** — 甲的定義。
- **乙** — 乙的定義。
## 關聯
- 甲 >> 連到 >> 乙
`;
function mockFetcher(files: Record<string, string>): GitHubFetcher {
return {
async listMarkdown() {
return Object.keys(files);
},
async getFile(_o, _r, path) {
return { text: files[path], commit: 'sha1' };
},
};
}
describe('pullRepoMarkdown + processSource', () => {
it('採取路徑 A:拉檔 → harvest → envelope(不 extract', async () => {
const sources = await pullRepoMarkdown(mockFetcher({ 'cards/a.md': HARVEST_CARD }), 'o', 'r');
expect(sources.length).toBe(1);
const result = await processSource(sources[0]);
expect(result.path).toBe('harvest');
expect(result.envelope?.triplets).toEqual([{ subject: '甲', predicate: '連到', object: '乙', predicate_embed: true }]);
expect(result.envelope?.extractor.model).toBe('local-harvest');
});
it('採不到三元組 + 無萃取模型 → skipped(不假萃)', async () => {
const sources = await pullRepoMarkdown(mockFetcher({ 'plain.md': '# 純文字\n沒有三元組。' }), 'o', 'r');
const result = await processSource(sources[0]);
expect(result.path).toBe('skipped');
expect(result.envelope).toBeNull();
});
it('採不到 → fallback extract(路徑 B', async () => {
const caller: LlmCaller = {
model: 'haiku',
call: async () => JSON.stringify({ nodes: [{ name: '甲' }], triplets: [{ subject: '甲', predicate: '是', object: '乙' }] }),
};
const sources = await pullRepoMarkdown(mockFetcher({ 'plain.md': '# 純文字\n甲是乙。' }), 'o', 'r');
const result = await processSource(sources[0], { shallowCaller: caller });
expect(result.path).toBe('extract');
expect(result.envelope?.extractor.model).toBe('haiku');
});
});