From 613071f41dc325314d293ff8914da48386bdea64 Mon Sep 17 00:00:00 2001 From: richblack Date: Fri, 26 Jun 2026 18:24:04 +0800 Subject: [PATCH] =?UTF-8?q?feat(graph):=20get=5Fsource=20+=20refresh=20?= =?UTF-8?q?=E7=AB=AF=E9=BB=9E=20+=20keyword=20=E6=94=B6=E6=96=82=20(T3.6-3?= =?UTF-8?q?.7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 對應 issue #1 T3 C 段(圖工具 HTTP API 備好,MCP 註冊薄殼待 arcrun)。 - get_source (3.7): graph-source.ts + GET /graph/source/:name — 回節點的 active triplet 來源指標(uri/anchor/block_id/content_hash),去重。 連帶加 source_anchor slot,ingest 從 source.anchor 帶入 - refresh (3.6/3.6b): graph-refresh.ts + POST /graph/refresh — 純被動代轉 ingest(KBDB_INGEST_URL),只人發起、無排程/webhook(fan-out 紅線)。 未設 URL → 誠實 forwarded:false,不假綠 - 3.6d: POST /search 移除公開 keyword 模式(重複 KBDB MCP),收斂 suggest-only; keywordSearch helper 留作 suggest 內部建構塊 - 3 新測試(get_source uri+anchor / active-only / refresh 未就緒誠實回報) gates: vitest 19 passed / zero SQL / 無新綁定 / dry-run bundle 乾淨 待接:MCP 註冊薄殼併 arcrun u6u-mcp-server;refresh 端到端待 ingest(T4) 部署 Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/3-specs/ingest-contract/design.md | 8 +++++ docs/3-specs/ingest-contract/tasks.md | 13 ++++--- src/actions/graph-refresh.ts | 34 ++++++++++++++++++ src/actions/graph-source.ts | 35 ++++++++++++++++++ src/actions/triplet-crud.ts | 2 ++ src/actions/triplet-ingest.ts | 1 + src/lib/templates.ts | 4 ++- src/routes/graph.ts | 19 ++++++++++ src/routes/search.ts | 25 +++++-------- src/types.ts | 4 ++- tests/graph-source.test.ts | 49 ++++++++++++++++++++++++++ 11 files changed, 171 insertions(+), 23 deletions(-) create mode 100644 src/actions/graph-refresh.ts create mode 100644 src/actions/graph-source.ts create mode 100644 tests/graph-source.test.ts diff --git a/docs/3-specs/ingest-contract/design.md b/docs/3-specs/ingest-contract/design.md index feb6c03..fe02312 100644 --- a/docs/3-specs/ingest-contract/design.md +++ b/docs/3-specs/ingest-contract/design.md @@ -67,6 +67,14 @@ traverse / search / neighbors 從 records 組鄰接表前,先 `filter(status = - **`refresh` 紅線**(T3.6b):只能人發起的 MCP 調用觸發,**禁掛排程/webhook 自動 refresh**(否則變回 fan-out,踩 flag 紅線)。 - **T3.6d**:整合時移除 `search-query.ts` 代理 base 關鍵字那條(重複,關鍵字歸 KBDB MCP)。 +## 7.5 get_source / refresh 落地(C 段,已實作) + +- **get_source**(`graph-source.ts` + `GET /graph/source/:name`):給節點名 → 回觸及它的 active triplet 的來源指標(`uri` / `anchor` / `block_id` / `content_hash`),按 uri+anchor 去重。為此 triplet template 增 `source_anchor` slot(ingest 從 `source.anchor` 帶入)。 +- **refresh**(`graph-refresh.ts` + `POST /graph/refresh`):純被動代轉 ingest 重抓+萃。graph 自己不抓不萃(ingest 純餵食器職責)。 + - 🚫 紅線:只人發起 MCP 調用觸發,無排程/webhook。 + - ingest 對象 = `KBDB_INGEST_URL`(env,T4 就緒前留空)。未設 → 誠實回 `{forwarded:false}`,不假綠。 +- **search keyword 收斂**(T3.6d):`POST /search` 移除公開 keyword 模式(重複 KBDB MCP `kbdb_search`),收斂為 suggest-only。`keywordSearch` helper 保留為 suggest 內部建構塊。 + ## 8. 不做 / 延後 - **graph CLI**(T3.7b):延後。人少在命令行 traverse、AI 用不到 → 不做(非省工,是不誤導 AI 以為有這條路)。 diff --git a/docs/3-specs/ingest-contract/tasks.md b/docs/3-specs/ingest-contract/tasks.md index 6f920dc..0568796 100644 --- a/docs/3-specs/ingest-contract/tasks.md +++ b/docs/3-specs/ingest-contract/tasks.md @@ -20,11 +20,14 @@ ## C. MCP(⚠️ 跨 repo,需 arcrun 配合 → issue 標清) -- [ ] **3.6** 圖查詢 + `refresh` HTTP API/邏輯備好(graph 端);MCP 註冊薄殼併入 arcrun KBDB MCP(協調後接,**不另起 graph MCP**) -- [ ] **3.6b** `refresh` 紅線:只人發起 MCP 觸發,禁排程/webhook 自動 -- [ ] **3.6d** 移除 `search-query.ts` 代理 base 關鍵字那條(重複,歸 KBDB MCP) -- [ ] **3.7** `get_source` 端點(指回 source.uri + anchor) -- [ ] **3.7b** ~~graph CLI~~ 延後不做(人少用、AI 用不到 → 不誤導) +- [x] **3.6** 圖查詢 + `refresh` **HTTP API/邏輯備好(graph 端)**:`GET /graph/source/:name`、`POST /graph/refresh`、既有 traverse/neighbors/path/relation。**MCP 註冊薄殼仍待 arcrun 配合**(不另起 graph MCP)(2026-06-26) +- [x] **3.6b** `refresh` 紅線:`graph-refresh.ts` 純被動代轉,只人發起調用觸發;無排程/webhook(2026-06-26) +- [x] **3.6d** 移除 graph **公開** keyword 端點(`POST /search` 收斂為 suggest-only;keywordSearch helper 留作 suggest 內部建構塊)(2026-06-26) +- [x] **3.7** `get_source`:`graph-source.ts` + `GET /graph/source/:name`(回 uri+anchor+block_id+content_hash,active-only,去重)。連帶加 `source_anchor` slot(2026-06-26) +- [x] **3.7b** ~~graph CLI~~ 延後不做(人少用、AI 用不到 → 不誤導) + +> **跨 repo 待接(總管協調)**:圖工具(traverse/neighbors/source)+ refresh 的 **MCP 註冊薄殼**併入 arcrun `u6u-mcp-server`(KBDB MCP),待 arcrun #7 落地後兩邊接。graph 端 HTTP API 已就緒。 +> **refresh 待部署**:`KBDB_INGEST_URL` 未設時 `refresh` 誠實回 `forwarded:false`(ingest repo T4 未就緒)。端到端待 ingest 部署驗。 ## 完成準則 diff --git a/src/actions/graph-refresh.ts b/src/actions/graph-refresh.ts new file mode 100644 index 0000000..b95facb --- /dev/null +++ b/src/actions/graph-refresh.ts @@ -0,0 +1,34 @@ +// refresh(T3.6 / T3.6b)— 代轉 ingest 重抓+萃某來源。 +// +// 🚫 紅線:只能【人發起的 MCP 調用】觸發。禁掛排程/webhook 自動 refresh +// (否則變回 fan-out,踩 GitHub flag 紅線)。本端點純被動:收到一次調用 → 代轉一次。 +// graph 自己不抓檔、不萃取(那是 ingest 純餵食器的職責);graph 只把 refresh 意圖 +// 轉給 ingest 的端點,ingest 抓+萃完後再走 POST /triplets/ingest 回灌。 + +export type RefreshRequest = { uri: string; owner_id?: string }; +export type RefreshResult = { forwarded: boolean; ingest_url?: string; note?: string }; + +/** + * 代轉 refresh 給 ingest 服務。ingestUrl 由 env 注入(KBDB_INGEST_URL)。 + * 未設 → 誠實回 {forwarded:false}(ingest repo T4 尚未就緒/未部署),不假裝成功。 + */ +export async function refreshSource( + req: RefreshRequest, + ingestUrl: string | undefined, +): Promise { + if (!ingestUrl) { + return { + forwarded: false, + note: 'KBDB_INGEST_URL 未設:ingest 服務尚未就緒(T4 待部署),refresh 無對象可轉。', + }; + } + const res = await fetch(ingestUrl.replace(/\/$/, '') + '/refresh', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ uri: req.uri, owner_id: req.owner_id }), + }); + if (!res.ok) { + throw new Error(`[ingest] refresh ${req.uri}: ${res.status} ${res.statusText}`); + } + return { forwarded: true, ingest_url: ingestUrl }; +} diff --git a/src/actions/graph-source.ts b/src/actions/graph-source.ts new file mode 100644 index 0000000..c782fb5 --- /dev/null +++ b/src/actions/graph-source.ts @@ -0,0 +1,35 @@ +// get_source(T3.7)— 指回原文:給一個節點名,回它所有 triplet 的來源指標。 +// 鐵律:走 base API、零 SQL。圖在插件層組裝。 +// 用途:圖遍歷找到一筆知識後,回跳產生它的 canonical MD(source.uri + anchor)。 + +import type { KbdbClient } from '../lib/kbdb-client'; +import { getNodeEdges } from './graph-nodes'; + +export type SourceRef = { + uri: string | null; // 來源穩定識別(github:owner/repo@path) + anchor: string | null; // 檔內定位(heading slug / block id) + block_id: string | null; // 向後相容:Logseq block id + content_hash: string | null; // 該批快照 hash + edge: { subject: string; predicate: string; object: string }; +}; + +/** 給節點名,回觸及它的(active)triplet 的來源指標清單,去重同 uri+anchor。 */ +export async function getSource(client: KbdbClient, node: string): Promise { + const edges = await getNodeEdges(client, node); // 已 active-only(經 queryTriplets) + const seen = new Set(); + const refs: SourceRef[] = []; + + for (const t of edges) { + const key = `${t.source_uri ?? ''}#${t.source_anchor ?? ''}`; + if (seen.has(key)) continue; + seen.add(key); + refs.push({ + uri: t.source_uri, + anchor: t.source_anchor, + block_id: t.source_block_id, + content_hash: t.content_hash, + edge: { subject: t.subject, predicate: t.predicate, object: t.object }, + }); + } + return refs; +} diff --git a/src/actions/triplet-crud.ts b/src/actions/triplet-crud.ts index 9fa24f0..61f0fb9 100644 --- a/src/actions/triplet-crud.ts +++ b/src/actions/triplet-crud.ts @@ -20,6 +20,7 @@ export type CreateTripletData = { object_entity_type?: string; source_uri?: string; content_hash?: string; + source_anchor?: string; }; /** 建立三元組 → POST /records(template=triplet)。 */ @@ -46,6 +47,7 @@ export async function createTriplet( if (data.object_entity_type) values.object_entity_type = data.object_entity_type; if (data.source_uri) values.source_uri = data.source_uri; if (data.content_hash) values.content_hash = data.content_hash; + if (data.source_anchor) values.source_anchor = data.source_anchor; const id = await client.createRecord(TPL_TRIPLET, values, data.owner_id); return { id, subject: data.subject, predicate: data.predicate, object: data.object }; diff --git a/src/actions/triplet-ingest.ts b/src/actions/triplet-ingest.ts index 3835bbf..3ce6505 100644 --- a/src/actions/triplet-ingest.ts +++ b/src/actions/triplet-ingest.ts @@ -69,6 +69,7 @@ export async function ingestEnvelope( source_block_id: env.source.block_id, source_uri: env.source.uri, content_hash: env.source.content_hash, + source_anchor: env.source.anchor, owner_id, }); } diff --git a/src/lib/templates.ts b/src/lib/templates.ts index edec1a0..275bb7d 100644 --- a/src/lib/templates.ts +++ b/src/lib/templates.ts @@ -16,7 +16,8 @@ export const TRIPLET_SLOTS = [ 'subject_entity_type', 'object_entity_type', // 取代/快照(T3.2):status=active|deprecated;superseded_by=取代它的新 record id; // source_uri+content_hash 承載 ingest idempotency(按 source_uri 分組 deprecate)。 - 'status', 'superseded_by', 'source_uri', 'content_hash', + // source_anchor 供 get_source 精準回跳原文(T3.7)。 + 'status', 'superseded_by', 'source_uri', 'content_hash', 'source_anchor', ]; // gloss(T3.2b):一句話描述,供「詞+gloss」語義 normalize 的 embedding 對象。 export const ENTITY_SLOTS = ['canonical', 'aliases_json', 'entity_type', 'owner', 'gloss']; @@ -50,6 +51,7 @@ export function recordToTriplet(rec: BaseRecord): Triplet { superseded_by: v.superseded_by || null, source_uri: v.source_uri || null, content_hash: v.content_hash || null, + source_anchor: v.source_anchor || null, created_at: 0, updated_at: 0, }; diff --git a/src/routes/graph.ts b/src/routes/graph.ts index d368eb7..9117aae 100644 --- a/src/routes/graph.ts +++ b/src/routes/graph.ts @@ -6,6 +6,8 @@ import type { Bindings } from '../types'; import { traverseGraph, queryRelation } from '../actions/graph-traverse'; import { getNodeEdges, getNeighbors } from '../actions/graph-nodes'; import { findShortestPath } from '../actions/graph-path'; +import { getSource } from '../actions/graph-source'; +import { refreshSource } from '../actions/graph-refresh'; import { makeKbdbClient } from '../lib/kbdb-client'; const graphRoutes = new Hono<{ Bindings: Bindings }>(); @@ -46,4 +48,21 @@ graphRoutes.get('/path', async (c) => { return c.json({ from, to, ...result }); }); +// GET /graph/source/:name — get_source(T3.7):回節點的原文來源指標(uri+anchor) +graphRoutes.get('/source/:name', async (c) => { + const name = decodeURIComponent(c.req.param('name')); + const sources = await getSource(makeKbdbClient(c.env), name); + return c.json({ node: name, sources, count: sources.length }); +}); + +// POST /graph/refresh — 代轉 ingest 重抓+萃(T3.6/3.6b) +// 🚫 只人發起 MCP 調用觸發,禁排程/webhook 自動(fan-out 紅線)。 +graphRoutes.post('/refresh', async (c) => { + const body = await c.req.json().catch(() => ({})); + const uri = body?.uri; + if (!uri) return c.json({ error: 'uri required' }, 400); + const result = await refreshSource({ uri, owner_id: body?.owner_id }, c.env.KBDB_INGEST_URL); + return c.json(result); +}); + export { graphRoutes }; diff --git a/src/routes/search.ts b/src/routes/search.ts index c90650e..1b4b922 100644 --- a/src/routes/search.ts +++ b/src/routes/search.ts @@ -1,43 +1,36 @@ // 搜尋路由入口 — 僅驗證參數,呼叫 actions // -// 插件層只做基本盤 keyword 搜尋(D1 LIKE,走 GET /entries/search)。 +// T3.6d:移除 graph 的 keyword 搜尋【公開端點】——純關鍵字搜尋重複於 KBDB MCP 的 kbdb_search, +// 歸 KBDB MCP,不由 graph 代理。graph 只保留圖專屬的 `suggest`(跨知識建議)。 +// 注意:keywordSearch helper 仍保留,作為 suggest 的內部建構塊(非對外端點)。 // 語意搜尋 / embedding 屬基本盤 optional embed 模組,不在插件 → 已移除 /search/embed。 import { Hono } from 'hono'; import { z } from 'zod'; import type { Bindings, Variables } from '../types'; -import { keywordSearch } from '../actions/search-query'; import { suggestKnowledge } from '../actions/search-suggest'; import { makeKbdbClient } from '../lib/kbdb-client'; const searchRoutes = new Hono<{ Bindings: Bindings; Variables: Variables }>(); -const UnifiedSearchSchema = z.object({ +const SuggestSchema = z.object({ query: z.string().min(1), - type: z.enum(['keyword', 'suggest']).optional().default('keyword'), topK: z.number().min(1).max(20).optional(), owner_id: z.string().optional(), }); -// 統一搜尋入口:POST /search +// 統一搜尋入口:POST /search — 僅 suggest(keyword 歸 KBDB MCP,T3.6d) searchRoutes.post('/', async (c) => { - const parsed = UnifiedSearchSchema.safeParse(await c.req.json()); + const parsed = SuggestSchema.safeParse(await c.req.json()); if (!parsed.success) return c.json({ error: parsed.error.flatten() }, 400); - const { query, type, topK, owner_id } = parsed.data; + const { query, topK, owner_id } = parsed.data; // Namespace 讀取過濾:partner token 只能搜到自己 namespace 的資料 const namespace = c.get('namespace'); const effectiveOwner = namespace ?? owner_id; - const client = makeKbdbClient(c.env); - - if (type === 'suggest') { - const result = await suggestKnowledge(client, query, topK, effectiveOwner); - return c.json(result); - } - - const matches = await keywordSearch(client, query, { limit: topK, owner_id: effectiveOwner }); - return c.json({ matches, count: matches.length, mode: 'keyword' }); + const result = await suggestKnowledge(makeKbdbClient(c.env), query, topK, effectiveOwner); + return c.json(result); }); export { searchRoutes }; diff --git a/src/types.ts b/src/types.ts index 75634bd..30ac9f0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -4,6 +4,7 @@ export type Bindings = { // 插件不碰 DB/Vectorize/AI — 全走基本盤 API(API-as-Wall)。 // 語意搜尋/embedding 屬基本盤 optional embed 模組,不在插件。 KBDB_BASE_URL?: string; // 基本盤 arcrun/kbdb API 網址(leo: 可設定,先留空) + KBDB_INGEST_URL?: string; // ingest 服務網址(refresh 代轉對象;T4 就緒前留空) KBDB_INTERNAL_TOKEN?: string; ENVIRONMENT: string; API_KEY?: string; @@ -32,8 +33,9 @@ export type Triplet = { object_entity_type: EntityType | null; // 客體 entity 類型 status: TripletStatus; // active(進圖遍歷)| deprecated(被取代,可查/可 rollback) superseded_by: string | null; // 取代它的新 record id(active 時為 null) - source_uri: string | null; // ingest 來源穩定識別(idempotency 分組鍵) + source_uri: string | null; // ingest 來源穩定識別(idempotency 分組鍵 + get_source 指標) content_hash: string | null; // 來源快照 hash(idempotency 比對鍵) + source_anchor: string | null; // 檔內定位(heading slug / block id),get_source 精準回跳 created_at: number; updated_at: number; }; diff --git a/tests/graph-source.test.ts b/tests/graph-source.test.ts new file mode 100644 index 0000000..0685b3a --- /dev/null +++ b/tests/graph-source.test.ts @@ -0,0 +1,49 @@ +// get_source + refresh(C 段)— 走 mock,零 SQL、不打真網路。 +import { describe, it, expect } from 'vitest'; +import { ingestEnvelope } from '../src/actions/triplet-ingest'; +import { getSource } from '../src/actions/graph-source'; +import { refreshSource } from '../src/actions/graph-refresh'; +import { mockClient } from './mock-client'; + +describe('getSource — 回節點的原文來源指標', () => { + it('回觸及節點的 triplet 的 uri + anchor', async () => { + const c = mockClient(); + await ingestEnvelope(c, { + source: { uri: 'github:u/w@a.md', content_hash: 'h1', anchor: '#graph-rag' }, + extractor: { model: 'm', tier: 'deep' }, + triplets: [{ subject: 'GraphRAG', predicate: '是', object: 'RAG 變體' }], + }); + + const refs = await getSource(c, 'GraphRAG'); + expect(refs.length).toBe(1); + expect(refs[0].uri).toBe('github:u/w@a.md'); + expect(refs[0].anchor).toBe('#graph-rag'); + expect(refs[0].edge).toEqual({ subject: 'GraphRAG', predicate: '是', object: 'RAG 變體' }); + }); + + it('deprecated triplet 不出現在 get_source(active-only)', async () => { + const c = mockClient(); + await ingestEnvelope(c, { + source: { uri: 'github:u/w@a.md', content_hash: 'h1', anchor: '#old' }, + extractor: { model: 'm', tier: 'deep' }, + triplets: [{ subject: 'X', predicate: 'r', object: 'old' }], + }); + await ingestEnvelope(c, { + source: { uri: 'github:u/w@a.md', content_hash: 'h2', anchor: '#new' }, + extractor: { model: 'm', tier: 'deep' }, + triplets: [{ subject: 'X', predicate: 'r', object: 'new' }], + }); + + const refs = await getSource(c, 'X'); + expect(refs.length).toBe(1); + expect(refs[0].anchor).toBe('#new'); // 只見 active 批 + }); +}); + +describe('refreshSource — 代轉 ingest(人發起)', () => { + it('KBDB_INGEST_URL 未設 → 誠實回 forwarded:false,不假裝成功', async () => { + const res = await refreshSource({ uri: 'github:u/w@a.md' }, undefined); + expect(res.forwarded).toBe(false); + expect(res.note).toMatch(/未就緒|未設/); + }); +});