16ad1cb208
ingest 全管線(採取優先、extract fallback、跨庫織網、POST envelope): - T0.5 骨架:Hono + zod-openapi,無 D1/Vectorize/AI 綁定(不碰儲存鐵律) - T1 SourceAdapter:GitHub runtime API 拉 + per-file sha256 content-hash + /refresh 受理端 - T2 採取(路徑 A 優先):harvest template 1.8.0+ 卡(gloss/實體/typed-edge) - T3 extract(路徑 B fallback):LlmCaller 可選模型 + JSON-fail 升級閘 + 端點對齊硬自檢護欄;第一版不 embed(只打標) - T4 跨庫織網(主職):匯總多 repo → 偵測跨庫橋/異見,不算 bridge_score(graph 領域) - T5 輸出:buildEnvelope strict + 顯式禁送欄位自檢;graph-client 純 POST(cherry-pick _kbdb_client.py 改不碰 base);薄 ops CLI(不帶查詢 MCP) envelope 對齊 full contract(embed/id/aliases/predicate_embed);同步 contract 向量化欄位升格。 gate:vitest 28 passed / tsc clean / wrangler dry-run 乾淨(只 env-var 綁定)。 端到端 ingest→graph:graph receiver 已補對齊 → 待 ingest 部署 + GRAPH_BASE_URL → 待部署驗,未假綠。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
74 lines
2.8 KiB
TypeScript
74 lines
2.8 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
||
import { makeSourceUri, parseSourceUri, contentHash, pullRepoMarkdown, type GitHubFetcher } from '../src/lib/source-adapter';
|
||
import { processSource } from '../src/lib/pipeline';
|
||
import type { LlmCaller } from '../src/lib/extract';
|
||
|
||
describe('source-adapter uri', () => {
|
||
it('makeSourceUri / parseSourceUri round-trip', () => {
|
||
const uri = makeSourceUri('uncle6me-web', 'kbdb-ingest-plugin', 'system-dev/wiki/cards/ingest/掛載架構.md');
|
||
expect(uri).toBe('github:uncle6me-web/kbdb-ingest-plugin@system-dev/wiki/cards/ingest/掛載架構.md');
|
||
expect(parseSourceUri(uri)).toEqual({
|
||
owner: 'uncle6me-web',
|
||
repo: 'kbdb-ingest-plugin',
|
||
path: 'system-dev/wiki/cards/ingest/掛載架構.md',
|
||
});
|
||
});
|
||
|
||
it('content-hash 穩定且隨內容變', async () => {
|
||
const a = await contentHash('hello');
|
||
expect(a).toBe(await contentHash('hello'));
|
||
expect(a).not.toBe(await contentHash('world'));
|
||
});
|
||
});
|
||
|
||
const HARVEST_CARD = `---
|
||
gloss: 卡標題定義。
|
||
---
|
||
# 卡A
|
||
## 實體
|
||
- **甲** — 甲的定義。
|
||
- **乙** — 乙的定義。
|
||
## 關聯
|
||
- 甲 >> 連到 >> 乙
|
||
`;
|
||
|
||
function mockFetcher(files: Record<string, string>): GitHubFetcher {
|
||
return {
|
||
async listMarkdown() {
|
||
return Object.keys(files);
|
||
},
|
||
async getFile(_o, _r, path) {
|
||
return { text: files[path], commit: 'sha1' };
|
||
},
|
||
};
|
||
}
|
||
|
||
describe('pullRepoMarkdown + processSource', () => {
|
||
it('採取路徑 A:拉檔 → harvest → envelope(不 extract)', async () => {
|
||
const sources = await pullRepoMarkdown(mockFetcher({ 'cards/a.md': HARVEST_CARD }), 'o', 'r');
|
||
expect(sources.length).toBe(1);
|
||
const result = await processSource(sources[0]);
|
||
expect(result.path).toBe('harvest');
|
||
expect(result.envelope?.triplets).toEqual([{ subject: '甲', predicate: '連到', object: '乙', predicate_embed: true }]);
|
||
expect(result.envelope?.extractor.model).toBe('local-harvest');
|
||
});
|
||
|
||
it('採不到三元組 + 無萃取模型 → skipped(不假萃)', async () => {
|
||
const sources = await pullRepoMarkdown(mockFetcher({ 'plain.md': '# 純文字\n沒有三元組。' }), 'o', 'r');
|
||
const result = await processSource(sources[0]);
|
||
expect(result.path).toBe('skipped');
|
||
expect(result.envelope).toBeNull();
|
||
});
|
||
|
||
it('採不到 → fallback extract(路徑 B)', async () => {
|
||
const caller: LlmCaller = {
|
||
model: 'haiku',
|
||
call: async () => JSON.stringify({ nodes: [{ name: '甲' }], triplets: [{ subject: '甲', predicate: '是', object: '乙' }] }),
|
||
};
|
||
const sources = await pullRepoMarkdown(mockFetcher({ 'plain.md': '# 純文字\n甲是乙。' }), 'o', 'r');
|
||
const result = await processSource(sources[0], { shallowCaller: caller });
|
||
expect(result.path).toBe('extract');
|
||
expect(result.envelope?.extractor.model).toBe('haiku');
|
||
});
|
||
});
|