feat: KBDB-graph 插件獨立 — 全面改寫成走基本盤 API(API-as-Wall)
按 leo 鐵律(2026-06-14)把插件從「直接 SQL 操作基本盤表」改寫成 「只透過基本盤 arcrun/kbdb HTTP API 讀寫」。零建表、零 migration、零 SQL。 - 新增 src/lib/kbdb-client.ts:唯一對外通道,封裝 entries/templates/records API - 新增 src/lib/templates.ts:triplet/entity template 定義(替代建表) - 改寫 21 個違規 action(triplet/graph/entity/search)→ 走 client,圖在插件層記憶體組裝 - 移除所有 migrations、D1/Vectorize/AI 綁定;embedding/語意搜尋歸基本盤 optional 模組 - index.ts 只掛 triplets/graph/entities/search 路由;基本盤路由歸 arcrun/kbdb - 測試改走 mock client(純 node);裁剪 CLAUDE.md 只留 graph 插件 + 鐵律 - 修正 SDD design.md「讀現狀推翻鐵律」的錯誤判斷(共用 D1 → API-as-Wall) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
// Entity CRUD — 零 SQL / 零 D1 版。全走基本盤 API(KbdbClient)。
|
||||
// Entity = 基本盤 record(template=entity),填 slot 不建表。Pending alias 拆到 entity-pending.ts。
|
||||
//
|
||||
// base 缺口 [→arcrun]:無 PUT /records/:id → record 無法原地更新;
|
||||
// addAlias 改「重建一筆新 entity record」覆寫(補上 PUT 後可改原地 patch aliases_json)。
|
||||
|
||||
import type { Entity } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { TPL_ENTITY, ensurePluginTemplates, recordToEntity } from '../lib/templates';
|
||||
|
||||
const norm = (s: string): string => s.toLowerCase().trim();
|
||||
|
||||
// ─── Entity ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/** 建立 Entity(canonical name)。底層 = 一筆 entity template record。 */
|
||||
export async function createEntity(client: KbdbClient, canonical: string, owner?: string): Promise<Entity> {
|
||||
await ensurePluginTemplates(client);
|
||||
const id = await client.createRecord(
|
||||
TPL_ENTITY,
|
||||
{ canonical, aliases_json: '[]', entity_type: '', owner: owner ?? '' },
|
||||
owner,
|
||||
);
|
||||
return { id, canonical, aliases: [] };
|
||||
}
|
||||
|
||||
/** exact match 查找 Entity(小寫去空白比對 canonical 與 aliases)。語意相似度屬基本盤 embed 模組,不在此。 */
|
||||
export async function findEntityByName(client: KbdbClient, name: string, owner?: string): Promise<Entity | null> {
|
||||
const target = norm(name);
|
||||
const records = await client.listRecordsByTemplate(TPL_ENTITY, owner);
|
||||
for (const rec of records) {
|
||||
const ent = recordToEntity(rec);
|
||||
if (norm(ent.canonical) === target) return ent;
|
||||
if (ent.aliases.some(a => norm(a) === target)) return ent;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** 列出所有 Entity。 */
|
||||
export async function listEntities(client: KbdbClient, limit = 100, owner?: string): Promise<Entity[]> {
|
||||
const records = await client.listRecordsByTemplate(TPL_ENTITY, owner);
|
||||
return records.map(recordToEntity).filter(e => e.canonical).slice(0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* 新增 alias。base 無 PUT /records/:id → 改「重建一筆新 entity record」覆寫(含舊 canonical + 既有 aliases + 新 alias)。
|
||||
* [→arcrun] base 缺 PUT /records/:id:補上後改為原地 patch aliases_json,省一次重建。
|
||||
*/
|
||||
export async function addAlias(client: KbdbClient, entityId: string, alias: string, owner?: string): Promise<void> {
|
||||
const rec = await client.getRecord(entityId);
|
||||
if (!rec) throw new Error(`Entity ${entityId} not found`);
|
||||
const ent = recordToEntity(rec);
|
||||
if (ent.aliases.includes(alias)) return;
|
||||
const aliases = [...ent.aliases, alias];
|
||||
await ensurePluginTemplates(client);
|
||||
await client.createRecord(
|
||||
TPL_ENTITY,
|
||||
{
|
||||
canonical: ent.canonical,
|
||||
aliases_json: JSON.stringify(aliases),
|
||||
entity_type: rec.values.entity_type ?? '',
|
||||
owner: rec.values.owner ?? owner ?? '',
|
||||
},
|
||||
owner,
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
// Entity Graph Embedding — [→arcrun embed 模組]
|
||||
//
|
||||
// 處置決定(2026-06-14):整檔原本是 embedding 邏輯(聚合 entity 所有 triplet 的
|
||||
// bge-m3 向量、加權平均成認知向量、upsert 到 Vectorize namespace 'entity-graph')。
|
||||
// 依鐵律 4:embedding / 語意搜尋【不是插件職責】,屬基本盤 optional embed 模組。
|
||||
// 插件不綁 AI/Vectorize,故移除全部 D1 + Vectorize 實作,僅留薄殼標記去向。
|
||||
//
|
||||
// 若日後需要 entity graph embedding:在基本盤 embed 模組實作(讀 triplet record 走 API +
|
||||
// base 內部 AI/Vectorize),插件這層不碰。此函式維持簽名相容,永遠回 updated:false。
|
||||
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
|
||||
/**
|
||||
* [→arcrun embed 模組] no-op 薄殼。
|
||||
* graph embedding 已移出插件職責;保留簽名供 caller 不需改動,永遠回 { updated: false }。
|
||||
*/
|
||||
export async function recalcEntityGraph(
|
||||
_client: KbdbClient,
|
||||
_entityName: string,
|
||||
): Promise<{ updated: boolean; triplet_count: number }> {
|
||||
// 插件不做 embedding。實作搬到基本盤 optional embed 模組。
|
||||
return { updated: false, triplet_count: 0 };
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
// Entity 正規化 — 純 exact match 版(零 AI / 零 Vectorize)。
|
||||
//
|
||||
// 鐵律:embedding / 語意相似度合併【不是插件職責】,屬基本盤 optional embed 模組。
|
||||
// 插件只做 exact match(小寫去空白比對 canonical / aliases):
|
||||
// 命中 → 回 canonical;未命中 → 建新 entity 後回 rawName。
|
||||
// 原本的 cosine 門檻(0.92 merge / 0.75 pending)與 pending 流程,
|
||||
// 待基本盤 embed 模組上線後在 base 層處理;插件不綁 AI/Vectorize。
|
||||
|
||||
import { findEntityByName, createEntity } from './entity-crud';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
|
||||
/**
|
||||
* 正規化 rawName(純 exact):
|
||||
* 1. exact match 命中 → 回傳 canonical
|
||||
* 2. 未命中 → 建新 entity → 回傳 rawName
|
||||
* 任何錯誤靜默降級,回傳 rawName。
|
||||
*/
|
||||
export async function normalizeEntity(
|
||||
client: KbdbClient,
|
||||
rawName: string,
|
||||
owner?: string,
|
||||
): Promise<string> {
|
||||
try {
|
||||
const exact = await findEntityByName(client, rawName, owner);
|
||||
if (exact) return exact.canonical;
|
||||
|
||||
await createEntity(client, rawName, owner);
|
||||
return rawName;
|
||||
} catch (err) {
|
||||
console.error('[normalizeEntity] error, fallback to rawName:', err);
|
||||
return rawName;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
// Pending Alias — 零 SQL / 零 D1。全走基本盤 API(template=entity_pending record)。
|
||||
//
|
||||
// base 缺口 [→arcrun]:無 DELETE /records/:id → pending record 無法硬刪。
|
||||
// confirm/reject 採 soft:執行動作但不刪 pending(待 base 補 DELETE)。
|
||||
// 故 getPendingAliases 須由 caller 自行過濾已處理者,或待 DELETE 補上後在此硬刪。
|
||||
|
||||
import type { Entity, PendingAlias } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { TPL_ENTITY_PENDING, ensurePluginTemplates } from '../lib/templates';
|
||||
import { createEntity, addAlias } from './entity-crud';
|
||||
|
||||
/** 建立 Pending Alias 記錄(一筆 entity_pending record)。 */
|
||||
export async function createPendingAlias(
|
||||
client: KbdbClient,
|
||||
rawName: string,
|
||||
candidateEntityId: string,
|
||||
candidateCanonical: string,
|
||||
similarity: number,
|
||||
owner?: string,
|
||||
): Promise<PendingAlias> {
|
||||
await ensurePluginTemplates(client);
|
||||
const id = await client.createRecord(
|
||||
TPL_ENTITY_PENDING,
|
||||
{
|
||||
raw_name: rawName,
|
||||
candidate_entity_id: candidateEntityId,
|
||||
candidate_canonical: candidateCanonical,
|
||||
similarity: String(similarity),
|
||||
},
|
||||
owner,
|
||||
);
|
||||
return {
|
||||
id,
|
||||
raw_name: rawName,
|
||||
candidate_entity_id: candidateEntityId,
|
||||
candidate_canonical: candidateCanonical,
|
||||
similarity,
|
||||
created_at: Math.floor(Date.now() / 1000),
|
||||
};
|
||||
}
|
||||
|
||||
/** 列出所有 Pending Aliases。 */
|
||||
export async function getPendingAliases(client: KbdbClient, limit = 100, owner?: string): Promise<PendingAlias[]> {
|
||||
const records = await client.listRecordsByTemplate(TPL_ENTITY_PENDING, owner);
|
||||
return records
|
||||
.filter((r) => r.values.raw_name)
|
||||
.map((r) => ({
|
||||
id: r.record_id,
|
||||
raw_name: r.values.raw_name,
|
||||
candidate_entity_id: r.values.candidate_entity_id ?? '',
|
||||
candidate_canonical: r.values.candidate_canonical ?? '',
|
||||
similarity: parseFloat(r.values.similarity ?? '0'),
|
||||
created_at: 0,
|
||||
}))
|
||||
.slice(0, limit);
|
||||
}
|
||||
|
||||
/** 確認 → addAlias 到候選 entity。pending soft 保留([→arcrun] base 缺 DELETE record)。 */
|
||||
export async function confirmPendingAlias(client: KbdbClient, pendingId: string, owner?: string): Promise<void> {
|
||||
const rec = await client.getRecord(pendingId);
|
||||
if (!rec || !rec.values.raw_name) throw new Error(`Pending alias ${pendingId} not found`);
|
||||
await addAlias(client, rec.values.candidate_entity_id, rec.values.raw_name, owner);
|
||||
}
|
||||
|
||||
/** 拒絕 → 以 raw_name 建新 entity。pending soft 保留([→arcrun] base 缺 DELETE record)。 */
|
||||
export async function rejectPendingAlias(client: KbdbClient, pendingId: string, owner?: string): Promise<Entity> {
|
||||
const rec = await client.getRecord(pendingId);
|
||||
if (!rec || !rec.values.raw_name) throw new Error(`Pending alias ${pendingId} not found`);
|
||||
return createEntity(client, rec.values.raw_name, owner);
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// 圖節點操作 — 零 SQL、零 D1,全走基本盤 API
|
||||
// 取全部 triplet 一次(queryTriplets),在插件層記憶體 group/filter。
|
||||
|
||||
import type { Triplet } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { queryTriplets } from './triplet-crud';
|
||||
|
||||
/** 取全部 triplet(一次 API 呼叫,limit 拉滿上限)。 */
|
||||
async function loadAllTriplets(client: KbdbClient): Promise<Triplet[]> {
|
||||
const { triplets } = await queryTriplets(client, { limit: 2000 });
|
||||
return triplets;
|
||||
}
|
||||
|
||||
export async function listNodes(
|
||||
client: KbdbClient,
|
||||
options: { search?: string; limit?: number },
|
||||
): Promise<Array<{ node: string; edge_count: number }>> {
|
||||
const limit = Math.min(options.limit ?? 100, 500);
|
||||
const triplets = await loadAllTriplets(client);
|
||||
|
||||
// subject ∪ object,記憶體 group 計 edge_count
|
||||
const counts = new Map<string, number>();
|
||||
for (const t of triplets) {
|
||||
counts.set(t.subject, (counts.get(t.subject) ?? 0) + 1);
|
||||
counts.set(t.object, (counts.get(t.object) ?? 0) + 1);
|
||||
}
|
||||
|
||||
let nodes = Array.from(counts.entries()).map(([node, edge_count]) => ({ node, edge_count }));
|
||||
if (options.search) {
|
||||
const q = options.search.toLowerCase();
|
||||
nodes = nodes.filter((n) => n.node.toLowerCase().includes(q));
|
||||
}
|
||||
nodes.sort((a, b) => b.edge_count - a.edge_count);
|
||||
return nodes.slice(0, limit);
|
||||
}
|
||||
|
||||
export async function getNodeEdges(
|
||||
client: KbdbClient,
|
||||
name: string,
|
||||
): Promise<Triplet[]> {
|
||||
const triplets = await loadAllTriplets(client);
|
||||
return triplets.filter((t) => t.subject === name || t.object === name);
|
||||
}
|
||||
|
||||
export async function getNeighbors(
|
||||
client: KbdbClient,
|
||||
name: string,
|
||||
): Promise<string[]> {
|
||||
const edges = await getNodeEdges(client, name);
|
||||
const neighbors = new Set<string>();
|
||||
for (const t of edges) {
|
||||
if (t.subject !== name) neighbors.add(t.subject);
|
||||
if (t.object !== name) neighbors.add(t.object);
|
||||
}
|
||||
return Array.from(neighbors);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
// 最短路徑(BFS)— 零 SQL、零 D1。
|
||||
// 取全部 triplet 一次建無向鄰接表,記憶體 BFS 求最短路。
|
||||
|
||||
import type { Triplet } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { queryTriplets } from './triplet-crud';
|
||||
|
||||
type PathResult = {
|
||||
path: string[] | null;
|
||||
edges: Triplet[];
|
||||
hops: number;
|
||||
};
|
||||
|
||||
/** node → 與之相連的所有 triplet(無向鄰接表)。 */
|
||||
function buildAdjacency(triplets: Triplet[]): Map<string, Triplet[]> {
|
||||
const adj = new Map<string, Triplet[]>();
|
||||
const push = (node: string, t: Triplet) => {
|
||||
const list = adj.get(node);
|
||||
if (list) list.push(t);
|
||||
else adj.set(node, [t]);
|
||||
};
|
||||
for (const t of triplets) {
|
||||
push(t.subject, t);
|
||||
if (t.object !== t.subject) push(t.object, t);
|
||||
}
|
||||
return adj;
|
||||
}
|
||||
|
||||
export async function findShortestPath(
|
||||
client: KbdbClient,
|
||||
from: string,
|
||||
to: string,
|
||||
): Promise<PathResult> {
|
||||
if (from === to) return { path: [from], edges: [], hops: 0 };
|
||||
|
||||
const maxDepth = 6;
|
||||
const { triplets } = await queryTriplets(client, { limit: 2000 });
|
||||
const adj = buildAdjacency(triplets);
|
||||
|
||||
const visited = new Set<string>([from]);
|
||||
const parent = new Map<string, { node: string; edge: Triplet }>();
|
||||
const queue: Array<{ node: string; depth: number }> = [{ node: from, depth: 0 }];
|
||||
let found = false;
|
||||
|
||||
while (queue.length > 0 && !found) {
|
||||
const { node: current, depth } = queue.shift()!;
|
||||
if (depth >= maxDepth) continue;
|
||||
|
||||
for (const t of adj.get(current) ?? []) {
|
||||
const next = t.subject === current ? t.object : t.subject;
|
||||
if (!visited.has(next)) {
|
||||
visited.add(next);
|
||||
parent.set(next, { node: current, edge: t });
|
||||
queue.push({ node: next, depth: depth + 1 });
|
||||
if (next === to) { found = true; break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) return { path: null, edges: [], hops: -1 };
|
||||
|
||||
// 回溯路徑
|
||||
const path: string[] = [to];
|
||||
const pathEdges: Triplet[] = [];
|
||||
let current = to;
|
||||
while (parent.has(current)) {
|
||||
const { node, edge } = parent.get(current)!;
|
||||
path.unshift(node);
|
||||
pathEdges.unshift(edge);
|
||||
current = node;
|
||||
}
|
||||
|
||||
return { path, edges: pathEdges, hops: path.length - 1 };
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// 圖遍歷 + 關係查詢 — 零 SQL、零 D1。
|
||||
// 取全部 triplet 一次建鄰接表,BFS / filter 在記憶體跑。
|
||||
|
||||
import type { Triplet, GraphNode } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { queryTriplets } from './triplet-crud';
|
||||
|
||||
/** node → 與之相連的所有 triplet(無向鄰接表)。 */
|
||||
function buildAdjacency(triplets: Triplet[]): Map<string, Triplet[]> {
|
||||
const adj = new Map<string, Triplet[]>();
|
||||
const push = (node: string, t: Triplet) => {
|
||||
const list = adj.get(node);
|
||||
if (list) list.push(t);
|
||||
else adj.set(node, [t]);
|
||||
};
|
||||
for (const t of triplets) {
|
||||
push(t.subject, t);
|
||||
if (t.object !== t.subject) push(t.object, t);
|
||||
}
|
||||
return adj;
|
||||
}
|
||||
|
||||
export async function traverseGraph(
|
||||
client: KbdbClient,
|
||||
start: string,
|
||||
maxDepth: number,
|
||||
): Promise<GraphNode[]> {
|
||||
const depth = Math.min(maxDepth, 5);
|
||||
const { triplets } = await queryTriplets(client, { limit: 2000 });
|
||||
const adj = buildAdjacency(triplets);
|
||||
|
||||
const visited = new Set<string>();
|
||||
const queue: Array<{ node: string; level: number }> = [{ node: start, level: 0 }];
|
||||
const results: GraphNode[] = [];
|
||||
|
||||
while (queue.length > 0) {
|
||||
const { node, level } = queue.shift()!;
|
||||
if (visited.has(node) || level > depth) continue;
|
||||
visited.add(node);
|
||||
|
||||
const edges = adj.get(node) ?? [];
|
||||
results.push({ node, level, edges });
|
||||
|
||||
for (const t of edges) {
|
||||
const next = t.subject === node ? t.object : t.subject;
|
||||
if (!visited.has(next)) queue.push({ node: next, level: level + 1 });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function queryRelation(
|
||||
client: KbdbClient,
|
||||
from: string,
|
||||
to: string,
|
||||
): Promise<Triplet[]> {
|
||||
const { triplets } = await queryTriplets(client, { limit: 2000 });
|
||||
return triplets.filter(
|
||||
(t) =>
|
||||
(t.subject === from && t.object === to) ||
|
||||
(t.subject === to && t.object === from),
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
// Predicate 正規化(插件層):純字串正規化,零 embedding。
|
||||
//
|
||||
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。
|
||||
// [→arcrun embed 模組] 原本用 bge-m3 embedding cosine 比對近義 predicate(>0.90 use_existing /
|
||||
// 0.85~0.90 pending / <0.85 new)。語意比對屬基本盤 embed 模組,不是 graph 插件職責。
|
||||
// 此處降級為純字串正規化(trim + 小寫 + 收斂空白),任何近義收斂交給基本盤 embed 模組。
|
||||
// 任何錯誤靜默降級,不擋寫入。
|
||||
|
||||
export type PredicateNormalizeResult =
|
||||
| { action: 'use_existing'; canonical: string; score: number }
|
||||
| { action: 'pending'; similar: Array<{ canonical: string; score: number }> }
|
||||
| { action: 'new'; predicate: string };
|
||||
|
||||
/**
|
||||
* 正規化 predicate(純字串):trim + 收斂連續空白。回傳 { action: 'new', predicate }。
|
||||
* [→arcrun embed 模組] 近義收斂(use_existing / pending)交由基本盤 embed 模組處理,插件不做向量比對。
|
||||
* 任何錯誤靜默降級,回傳原始 predicate。
|
||||
*/
|
||||
export function normalizePredicateOnWrite(predicate: string): PredicateNormalizeResult {
|
||||
try {
|
||||
const canonical = predicate.trim().replace(/\s+/g, ' ');
|
||||
return { action: 'new', predicate: canonical };
|
||||
} catch (err) {
|
||||
console.error('[normalizePredicateOnWrite] error, fallback to raw:', err);
|
||||
return { action: 'new', predicate };
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
// [→arcrun embed 模組] 手動 embedding 管理(embedAndStore / getVector / deleteVector)
|
||||
// 已移出插件:embedding / Vectorize 屬基本盤的 optional embed 模組,不是 graph 插件職責。
|
||||
//
|
||||
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。此檔不再持有任何向量邏輯。
|
||||
// 保留薄殼 stub 以維持匯出相容;呼叫端(route)應改打基本盤 embed 模組,或停用此端點。
|
||||
|
||||
const NOT_IN_PLUGIN =
|
||||
'embedding 屬基本盤 optional embed 模組([→arcrun embed 模組]),不在 graph 插件實作';
|
||||
|
||||
export function embedNotSupported(): never {
|
||||
throw new Error(NOT_IN_PLUGIN);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// 搜尋(插件層):只做基本盤 keyword 搜尋(D1 LIKE,走 GET /entries/search)。
|
||||
//
|
||||
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。讀寫只透過 KbdbClient。
|
||||
// [→arcrun embed 模組] 語意搜尋 / embedding 是基本盤的 optional embed 模組,不是插件職責。
|
||||
// 插件不綁 AI/Vectorize;「語意搜尋」在此降級為 keyword 搜尋。
|
||||
|
||||
import type { KbdbClient, BaseEntry } from '../lib/kbdb-client';
|
||||
|
||||
export type SearchMatch = {
|
||||
score: number;
|
||||
metadata: Record<string, unknown>;
|
||||
type: 'block' | 'triplet';
|
||||
triplet: Record<string, unknown> | null;
|
||||
block: Record<string, unknown> | null;
|
||||
};
|
||||
|
||||
export type KeywordSearchOptions = {
|
||||
limit?: number;
|
||||
owner_id?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* 基本盤 keyword 搜尋。打 GET /entries/search(D1 LIKE),把 BaseEntry 包成相容的 SearchMatch。
|
||||
* [→arcrun embed 模組] 若日後要真語意搜尋,由基本盤 embed 模組提供,不在插件實作。
|
||||
*/
|
||||
export async function keywordSearch(
|
||||
client: KbdbClient,
|
||||
query: string,
|
||||
options: KeywordSearchOptions = {},
|
||||
): Promise<SearchMatch[]> {
|
||||
const { limit = 10, owner_id } = options;
|
||||
const entries = await client.searchEntries(query, owner_id);
|
||||
|
||||
const matches: SearchMatch[] = entries.map((e: BaseEntry) => ({
|
||||
score: 0, // keyword 搜尋無相似度分數;語意分數待 embed 模組
|
||||
metadata: {},
|
||||
type: 'block' as const,
|
||||
triplet: null,
|
||||
block: e as unknown as Record<string, unknown>,
|
||||
}));
|
||||
|
||||
return matches.slice(0, limit);
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
// 搜尋推薦(插件層):用基本盤 keyword 搜尋結果組模板。
|
||||
//
|
||||
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。讀寫只透過 KbdbClient。
|
||||
// [→arcrun embed 模組] 語意分數 / 相似度排序屬基本盤 embed 模組;此處只做 keyword 搜尋 + 零幻覺模板。
|
||||
// 零幻覺:只用知識庫資料組模板,不經 LLM。
|
||||
|
||||
import { keywordSearch } from './search-query';
|
||||
import type { SearchMatch } from './search-query';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
|
||||
type SuggestResult = {
|
||||
suggestion: string;
|
||||
matches: SearchMatch[];
|
||||
count: number;
|
||||
};
|
||||
|
||||
export async function suggestKnowledge(
|
||||
client: KbdbClient,
|
||||
query: string,
|
||||
limit: number = 10,
|
||||
owner_id?: string,
|
||||
): Promise<SuggestResult> {
|
||||
const allMatches = await keywordSearch(client, query, { limit, owner_id });
|
||||
|
||||
const goodMatches = allMatches.filter(m => m.triplet !== null || m.block !== null).filter(m => {
|
||||
// 過濾純 ref 類 block:((uuid))、{{embed ((uuid))}} 等對用戶無意義
|
||||
if (m.type === 'block' && m.block) {
|
||||
const content = (m.block as Record<string, string>).content || '';
|
||||
if (/^\s*\(\([a-f0-9-]+\)\)\s*$/.test(content)) return false;
|
||||
if (/^\s*\{\{embed\s+\(\([a-f0-9-]+\)\)\}\}\s*$/.test(content)) return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
if (goodMatches.length === 0) {
|
||||
return { suggestion: '', matches: [], count: 0 };
|
||||
}
|
||||
|
||||
const suggestion = buildFallback(goodMatches);
|
||||
|
||||
return { suggestion, matches: goodMatches, count: goodMatches.length };
|
||||
}
|
||||
|
||||
// 親切口語化回應(零幻覺,只引用知識庫資料)
|
||||
function buildFallback(matches: SearchMatch[]): string {
|
||||
const blockMatches = matches.filter(m => m.type === 'block');
|
||||
const tripletMatches = matches.filter(m => m.type === 'triplet');
|
||||
|
||||
if (blockMatches.length > 0 && tripletMatches.length === 0) {
|
||||
const first = blockMatches[0].block as Record<string, string> | null;
|
||||
const pageName = first?.page_name || '筆記';
|
||||
if (blockMatches.length === 1) {
|
||||
return `你之前在「${pageName}」寫過相關的內容,幫你找出來了~`;
|
||||
}
|
||||
return `嘿,你之前寫過 ${blockMatches.length} 筆相關筆記,幫你撈出來了~`;
|
||||
}
|
||||
|
||||
if (tripletMatches.length > 0 && blockMatches.length === 0) {
|
||||
const first = tripletMatches[0].triplet as Record<string, string> | null;
|
||||
if (!first) return `欸,找到 ${matches.length} 筆你之前寫過的東西,看看有沒有幫助?`;
|
||||
if (tripletMatches.length === 1) {
|
||||
return `你之前有寫到「${first.subject} ${first.predicate} ${first.object}」,是不是跟這個有關?`;
|
||||
}
|
||||
return `嘿,你之前寫過 ${tripletMatches.length} 筆跟「${first.subject}」相關的筆記,幫你撈出來了~`;
|
||||
}
|
||||
|
||||
return `嘿,找到 ${matches.length} 筆相關資料(${blockMatches.length} 筆筆記、${tripletMatches.length} 筆知識關聯),幫你整理好了~`;
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
// Triplet 知識分群 — 純函式
|
||||
//
|
||||
// 原版用 Workers AI 自動判斷 cluster。插件已不綁 AI(embedding/LLM 屬基本盤 optional 模組)。
|
||||
// 分群改為「由呼叫端(基本盤 embed 模組 / 上游 ingest)算好後傳入」;這裡只做正規化與 bridge_score 計算。
|
||||
|
||||
/** 正規化 cluster 標籤:小寫、去空白、最多 3 個、去重。 */
|
||||
export function classifyClusters(clusters: string[] | undefined): { clusters: string[]; bridgeScore: number } {
|
||||
const norm = Array.from(
|
||||
new Set(
|
||||
(clusters ?? [])
|
||||
.filter((c) => typeof c === 'string' && c.trim())
|
||||
.map((c) => c.trim().toLowerCase()),
|
||||
),
|
||||
).slice(0, 3);
|
||||
return { clusters: norm, bridgeScore: Math.max(0, norm.length - 1) };
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
// 三元組 CRUD — 走基本盤 API(API-as-Wall,零 SQL)
|
||||
// 寫 triplet = 確保 template='triplet' + POST /records 填 slot。
|
||||
// 查 triplet = GET /records/by-template/triplet → 插件層 filter/組裝。
|
||||
|
||||
import type { Triplet } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { TPL_TRIPLET, ensurePluginTemplates, recordToTriplet } from '../lib/templates';
|
||||
import { classifyClusters } from './triplet-cluster';
|
||||
|
||||
export type CreateTripletData = {
|
||||
subject: string;
|
||||
predicate: string;
|
||||
object: string;
|
||||
source_block_id?: string;
|
||||
confidence?: number;
|
||||
owner_id?: string;
|
||||
clusters?: string[];
|
||||
bridge_score?: number;
|
||||
subject_entity_type?: string;
|
||||
object_entity_type?: string;
|
||||
};
|
||||
|
||||
/** 建立三元組 → POST /records(template=triplet)。 */
|
||||
export async function createTriplet(
|
||||
client: KbdbClient,
|
||||
data: CreateTripletData,
|
||||
): Promise<{ id: string; subject: string; predicate: string; object: string }> {
|
||||
await ensurePluginTemplates(client);
|
||||
|
||||
const clusters = data.clusters ?? [];
|
||||
const bridgeScore = data.bridge_score ?? Math.max(0, clusters.length - 1);
|
||||
|
||||
const values: Record<string, string> = {
|
||||
subject: data.subject,
|
||||
predicate: data.predicate,
|
||||
object: data.object,
|
||||
confidence: String(data.confidence ?? 1.0),
|
||||
clusters_json: JSON.stringify(clusters),
|
||||
bridge_score: String(bridgeScore),
|
||||
};
|
||||
if (data.source_block_id) values.source_block_id = data.source_block_id;
|
||||
if (data.subject_entity_type) values.subject_entity_type = data.subject_entity_type;
|
||||
if (data.object_entity_type) values.object_entity_type = data.object_entity_type;
|
||||
|
||||
const id = await client.createRecord(TPL_TRIPLET, values, data.owner_id);
|
||||
return { id, subject: data.subject, predicate: data.predicate, object: data.object };
|
||||
}
|
||||
|
||||
export type TripletFilters = {
|
||||
subject?: string;
|
||||
predicate?: string;
|
||||
object?: string;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
owner_id?: string;
|
||||
entity_type?: string;
|
||||
};
|
||||
|
||||
/** 查三元組 → 取 template 全部 record,插件層 filter(base 無複合 slot 查詢)。 */
|
||||
export async function queryTriplets(
|
||||
client: KbdbClient,
|
||||
filters: TripletFilters,
|
||||
): Promise<{ triplets: Triplet[]; count: number }> {
|
||||
const records = await client.listRecordsByTemplate(TPL_TRIPLET, filters.owner_id);
|
||||
let triplets = records.map(recordToTriplet);
|
||||
|
||||
if (filters.subject) triplets = triplets.filter((t) => t.subject === filters.subject);
|
||||
if (filters.predicate) triplets = triplets.filter((t) => t.predicate === filters.predicate);
|
||||
if (filters.object) triplets = triplets.filter((t) => t.object === filters.object);
|
||||
if (filters.entity_type) {
|
||||
triplets = triplets.filter(
|
||||
(t) => t.subject_entity_type === filters.entity_type || t.object_entity_type === filters.entity_type,
|
||||
);
|
||||
}
|
||||
|
||||
const offset = filters.offset ?? 0;
|
||||
const limit = Math.min(filters.limit ?? 50, 2000);
|
||||
const page = triplets.slice(offset, offset + limit);
|
||||
return { triplets: page, count: page.length };
|
||||
}
|
||||
|
||||
/** 取單一三元組 → GET /records/:id。 */
|
||||
export async function getTriplet(client: KbdbClient, id: string): Promise<Triplet | null> {
|
||||
const rec = await client.getRecord(id);
|
||||
if (!rec) return null;
|
||||
return recordToTriplet({ ...rec, template: TPL_TRIPLET });
|
||||
}
|
||||
|
||||
// re-export clusters helper(AI 分群,純函式 + 走 client 無關)
|
||||
export { classifyClusters };
|
||||
@@ -0,0 +1,10 @@
|
||||
// 三元組 embedding — [→arcrun embed]
|
||||
//
|
||||
// 鐵律:embedding / Vectorize 屬基本盤/上游(arcrun embed 模組),插件不綁 Vectorize、不碰向量索引。
|
||||
// 本檔精簡為薄殼:只保留純字串組裝 helper(tripletToText,供別處組 query 文字用)。
|
||||
// 真正的向量生成/upsert/delete 由基本盤負責;插件層不再持有 Ai 或向量索引繫結。
|
||||
|
||||
/** 把 S-P-O 組成一段可餵 embedding 的文字(純函式,無 DB / 無 Vectorize)。 */
|
||||
export function tripletToText(subject: string, predicate: string, object: string): string {
|
||||
return `${subject} ${predicate} ${object}`;
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// 列出所有唯一實體(subject ∪ object)— 零 SQL / 零 D1
|
||||
// 取全部 triplet record(走基本盤 API),在記憶體統計 as_subject/as_object/total。
|
||||
// GET /entities
|
||||
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { queryTriplets } from './triplet-crud';
|
||||
|
||||
export interface EntityStat {
|
||||
name: string;
|
||||
as_subject: number;
|
||||
as_object: number;
|
||||
total: number;
|
||||
}
|
||||
|
||||
export interface EntityListResult {
|
||||
entities: EntityStat[];
|
||||
total: number;
|
||||
limit: number;
|
||||
offset: number;
|
||||
}
|
||||
|
||||
export async function listTripletEntities(
|
||||
client: KbdbClient,
|
||||
{ limit = 200, offset = 0, q }: { limit?: number; offset?: number; q?: string },
|
||||
): Promise<EntityListResult> {
|
||||
const safeLimit = Math.min(limit, 500);
|
||||
const { triplets } = await queryTriplets(client, { limit: 2000 });
|
||||
|
||||
const stats = new Map<string, EntityStat>();
|
||||
const bump = (name: string, role: 'as_subject' | 'as_object') => {
|
||||
if (!name) return;
|
||||
let s = stats.get(name);
|
||||
if (!s) {
|
||||
s = { name, as_subject: 0, as_object: 0, total: 0 };
|
||||
stats.set(name, s);
|
||||
}
|
||||
s[role] += 1;
|
||||
s.total += 1;
|
||||
};
|
||||
|
||||
for (const t of triplets) {
|
||||
bump(t.subject, 'as_subject');
|
||||
bump(t.object, 'as_object');
|
||||
}
|
||||
|
||||
let entities = [...stats.values()];
|
||||
if (q) {
|
||||
const needle = q.toLowerCase();
|
||||
entities = entities.filter((e) => e.name.toLowerCase().includes(needle));
|
||||
}
|
||||
entities.sort((a, b) => b.total - a.total);
|
||||
|
||||
const total = entities.length;
|
||||
const page = entities.slice(offset, offset + safeLimit);
|
||||
|
||||
return { entities: page, total, limit: safeLimit, offset };
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// triplet-extract.ts — LLM 三元組萃取 + 寫入工具函數
|
||||
// 萃取=純 LLM(不碰 DB);寫入=走基本盤 API(零 SQL / 零 D1)。
|
||||
// 供 block-ingest.ts 和 block-process.ts 共用
|
||||
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { createTriplet, queryTriplets } from './triplet-crud';
|
||||
|
||||
const EXTRACT_PROMPT = `你是知識萃取助手,從文章中萃取知識三元組。
|
||||
只萃取:性格特質、關鍵經歷精華、核心觀點信念、說話方式與風格。
|
||||
禁止:具體年份日期、純事實統計、流水帳年表。
|
||||
格式:[{"subject":"...","predicate":"2-6字","object":"15-50字","confidence":0.8}]
|
||||
直接輸出 JSON Array,第一字元 [,最後字元 ]。不要其他文字。`;
|
||||
|
||||
export interface LLMTriplet {
|
||||
subject: string;
|
||||
predicate: string;
|
||||
object: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/** Workers AI 萃取三元組,每段獨立呼叫,單段失敗不中斷 */
|
||||
export async function extractTripletsViaLLM(ai: Ai, chunks: string[]): Promise<LLMTriplet[]> {
|
||||
const results: LLMTriplet[] = [];
|
||||
for (const chunk of chunks) {
|
||||
if (!chunk.trim()) continue;
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const raw = await (ai as any).run('@cf/meta/llama-3.3-70b-instruct-fp8-fast', {
|
||||
messages: [
|
||||
{ role: 'system', content: EXTRACT_PROMPT },
|
||||
{ role: 'user', content: `萃取三元組:\n${chunk}` },
|
||||
],
|
||||
max_tokens: 512,
|
||||
temperature: 0.1,
|
||||
});
|
||||
const text = (typeof raw === 'string' ? raw : (raw?.response ?? ''))
|
||||
.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
const m = text.match(/\[[\s\S]*\]/);
|
||||
if (!m) continue;
|
||||
const parsed = JSON.parse(m[0]);
|
||||
if (!Array.isArray(parsed)) continue;
|
||||
for (const t of parsed) {
|
||||
const s = String(t?.subject ?? '').trim();
|
||||
const p = String(t?.predicate ?? '').trim();
|
||||
const o = String(t?.object ?? '').trim();
|
||||
if (s && p && o) results.push({ subject: s, predicate: p, object: o, confidence: Number(t?.confidence) || 0.8 });
|
||||
}
|
||||
} catch { /* 單段失敗跳過 */ }
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/** 寫入一條三元組(走基本盤 API),已存在回傳 false,新寫入回傳 true。
|
||||
* 查重 + 寫入全走 KbdbClient → triplet-crud,零 SQL / 零 D1。 */
|
||||
export async function writeTripletToDb(
|
||||
client: KbdbClient,
|
||||
t: { subject: string; predicate: string; object: string; confidence?: number },
|
||||
owner: string | null,
|
||||
): Promise<boolean> {
|
||||
// 查重:以 S-P-O 三欄精確比對(queryTriplets 取 template record 後在插件層 filter)
|
||||
const { count } = await queryTriplets(client, {
|
||||
subject: t.subject,
|
||||
predicate: t.predicate,
|
||||
object: t.object,
|
||||
limit: 1,
|
||||
});
|
||||
if (count > 0) return false;
|
||||
|
||||
await createTriplet(client, {
|
||||
subject: t.subject,
|
||||
predicate: t.predicate,
|
||||
object: t.object,
|
||||
confidence: t.confidence ?? 0.8,
|
||||
owner_id: owner ?? undefined,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Triplet 統計 action — 知識圖譜規模統計
|
||||
// 零 SQL / 零 D1:取全部 triplet record(走基本盤 API),在記憶體 reduce 出統計。
|
||||
// GET /triplets/stats
|
||||
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
import { queryTriplets } from './triplet-crud';
|
||||
|
||||
export interface TripletStats {
|
||||
total: number;
|
||||
// 保留 by_owner_id(原 by_user_id 改名,對齊基本盤 owner_id 欄位)。
|
||||
// 注:基本盤 record 不回傳 owner_id / 時間戳,故 by_owner_id/recent 在純插件層無法填,
|
||||
// 待 [→arcrun] base 於 record 回應帶上 owner_id + created_at 後補。
|
||||
by_owner_id: Record<string, number>;
|
||||
recent: { today: number; this_week: number };
|
||||
top_subjects: { subject: string; count: number }[];
|
||||
top_predicates: { predicate: string; count: number }[];
|
||||
}
|
||||
|
||||
export async function getTripletStats(client: KbdbClient): Promise<TripletStats> {
|
||||
const { triplets } = await queryTriplets(client, { limit: 2000 });
|
||||
|
||||
const subjectCounts = new Map<string, number>();
|
||||
const predicateCounts = new Map<string, number>();
|
||||
const ownerCounts: Record<string, number> = {};
|
||||
|
||||
for (const t of triplets) {
|
||||
subjectCounts.set(t.subject, (subjectCounts.get(t.subject) ?? 0) + 1);
|
||||
predicateCounts.set(t.predicate, (predicateCounts.get(t.predicate) ?? 0) + 1);
|
||||
}
|
||||
|
||||
const top = (m: Map<string, number>) =>
|
||||
[...m.entries()].sort((a, b) => b[1] - a[1]).slice(0, 10);
|
||||
|
||||
return {
|
||||
total: triplets.length,
|
||||
by_owner_id: ownerCounts, // base record 暫無 owner_id,待上游補
|
||||
recent: { today: 0, this_week: 0 }, // base record 暫無 created_at,待上游補
|
||||
top_subjects: top(subjectCounts).map(([subject, count]) => ({ subject, count })),
|
||||
top_predicates: top(predicateCounts).map(([predicate, count]) => ({ predicate, count })),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
// 三元組 >> 語法解析器
|
||||
// 掃描 block content 裡的「A >> B >> C」格式,回傳解析結果
|
||||
|
||||
export type ParsedTriplet = {
|
||||
subject: string;
|
||||
predicate: string;
|
||||
object: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* 掃描 content 裡所有符合「詞 >> 詞 >> 詞」的行
|
||||
* 一個 block 可包含多行 >> 語法
|
||||
* 前後空白 trim,空的部分跳過
|
||||
*
|
||||
* 範例:
|
||||
* parseTripletSyntax('書僮採集 >> 指向北極星 >> 異見三元組累積')
|
||||
* // → [{ subject: '書僮採集', predicate: '指向北極星', object: '異見三元組累積' }]
|
||||
*/
|
||||
export function parseTripletSyntax(content: string): ParsedTriplet[] {
|
||||
const results: ParsedTriplet[] = [];
|
||||
for (const line of content.split('\n')) {
|
||||
const parts = line.split('>>').map(p => p.trim());
|
||||
if (parts.length === 3 && parts.every(p => p.length > 0)) {
|
||||
results.push({ subject: parts[0], predicate: parts[1], object: parts[2] });
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
// 三元組 partial update action
|
||||
//
|
||||
// [→arcrun] base 缺 PUT /records/:id(也無 DELETE):基本盤目前只支援建/查 record,
|
||||
// 不支援就地更新一筆 record 的 slot 值。插件不准用 SQL 直改表,故更新暫不支援。
|
||||
//
|
||||
// 暫時策略:回 not-supported(null),由 route 轉成 4xx/501。
|
||||
// 待上游補 PUT /records/:id 後,這裡改成 client.req('PUT', ...) 或薄殼呼叫。
|
||||
// 不得以 SQL/D1 繞過。
|
||||
|
||||
import type { Triplet } from '../types';
|
||||
import type { KbdbClient } from '../lib/kbdb-client';
|
||||
|
||||
export async function patchTriplet(
|
||||
_client: KbdbClient,
|
||||
_id: string,
|
||||
_data: {
|
||||
subject?: string; predicate?: string; object?: string; confidence?: number;
|
||||
subject_alias?: string; predicate_alias?: string; object_alias?: string;
|
||||
},
|
||||
): Promise<{ ok: true; triplet: Triplet } | null> {
|
||||
// base 無 PUT /records/:id → 無法就地更新;回 null(not-supported)。
|
||||
return null;
|
||||
}
|
||||
Reference in New Issue
Block a user