feat: KBDB-graph 插件獨立 — 全面改寫成走基本盤 API(API-as-Wall)

按 leo 鐵律(2026-06-14)把插件從「直接 SQL 操作基本盤表」改寫成
「只透過基本盤 arcrun/kbdb HTTP API 讀寫」。零建表、零 migration、零 SQL。

- 新增 src/lib/kbdb-client.ts:唯一對外通道,封裝 entries/templates/records API
- 新增 src/lib/templates.ts:triplet/entity template 定義(替代建表)
- 改寫 21 個違規 action(triplet/graph/entity/search)→ 走 client,圖在插件層記憶體組裝
- 移除所有 migrations、D1/Vectorize/AI 綁定;embedding/語意搜尋歸基本盤 optional 模組
- index.ts 只掛 triplets/graph/entities/search 路由;基本盤路由歸 arcrun/kbdb
- 測試改走 mock client(純 node);裁剪 CLAUDE.md 只留 graph 插件 + 鐵律
- 修正 SDD design.md「讀現狀推翻鐵律」的錯誤判斷(共用 D1 → API-as-Wall)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-14 20:59:41 +08:00
commit efe8e165cf
62 changed files with 7671 additions and 0 deletions
+65
View File
@@ -0,0 +1,65 @@
// Entity CRUD — 零 SQL / 零 D1 版。全走基本盤 APIKbdbClient)。
// Entity = 基本盤 recordtemplate=entity),填 slot 不建表。Pending alias 拆到 entity-pending.ts。
//
// base 缺口 [→arcrun]:無 PUT /records/:id → record 無法原地更新;
// addAlias 改「重建一筆新 entity record」覆寫(補上 PUT 後可改原地 patch aliases_json)。
import type { Entity } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
import { TPL_ENTITY, ensurePluginTemplates, recordToEntity } from '../lib/templates';
const norm = (s: string): string => s.toLowerCase().trim();
// ─── Entity ──────────────────────────────────────────────────────────────────
/** 建立 Entitycanonical name)。底層 = 一筆 entity template record。 */
export async function createEntity(client: KbdbClient, canonical: string, owner?: string): Promise<Entity> {
await ensurePluginTemplates(client);
const id = await client.createRecord(
TPL_ENTITY,
{ canonical, aliases_json: '[]', entity_type: '', owner: owner ?? '' },
owner,
);
return { id, canonical, aliases: [] };
}
/** exact match 查找 Entity(小寫去空白比對 canonical 與 aliases)。語意相似度屬基本盤 embed 模組,不在此。 */
export async function findEntityByName(client: KbdbClient, name: string, owner?: string): Promise<Entity | null> {
const target = norm(name);
const records = await client.listRecordsByTemplate(TPL_ENTITY, owner);
for (const rec of records) {
const ent = recordToEntity(rec);
if (norm(ent.canonical) === target) return ent;
if (ent.aliases.some(a => norm(a) === target)) return ent;
}
return null;
}
/** 列出所有 Entity。 */
export async function listEntities(client: KbdbClient, limit = 100, owner?: string): Promise<Entity[]> {
const records = await client.listRecordsByTemplate(TPL_ENTITY, owner);
return records.map(recordToEntity).filter(e => e.canonical).slice(0, limit);
}
/**
* 新增 alias。base 無 PUT /records/:id → 改「重建一筆新 entity record」覆寫(含舊 canonical + 既有 aliases + 新 alias)。
* [→arcrun] base 缺 PUT /records/:id:補上後改為原地 patch aliases_json,省一次重建。
*/
export async function addAlias(client: KbdbClient, entityId: string, alias: string, owner?: string): Promise<void> {
const rec = await client.getRecord(entityId);
if (!rec) throw new Error(`Entity ${entityId} not found`);
const ent = recordToEntity(rec);
if (ent.aliases.includes(alias)) return;
const aliases = [...ent.aliases, alias];
await ensurePluginTemplates(client);
await client.createRecord(
TPL_ENTITY,
{
canonical: ent.canonical,
aliases_json: JSON.stringify(aliases),
entity_type: rec.values.entity_type ?? '',
owner: rec.values.owner ?? owner ?? '',
},
owner,
);
}
+23
View File
@@ -0,0 +1,23 @@
// Entity Graph Embedding — [→arcrun embed 模組]
//
// 處置決定(2026-06-14):整檔原本是 embedding 邏輯(聚合 entity 所有 triplet 的
// bge-m3 向量、加權平均成認知向量、upsert 到 Vectorize namespace 'entity-graph')。
// 依鐵律 4embedding / 語意搜尋【不是插件職責】,屬基本盤 optional embed 模組。
// 插件不綁 AI/Vectorize,故移除全部 D1 + Vectorize 實作,僅留薄殼標記去向。
//
// 若日後需要 entity graph embedding:在基本盤 embed 模組實作(讀 triplet record 走 API +
// base 內部 AI/Vectorize),插件這層不碰。此函式維持簽名相容,永遠回 updated:false。
import type { KbdbClient } from '../lib/kbdb-client';
/**
* [→arcrun embed 模組] no-op 薄殼。
* graph embedding 已移出插件職責;保留簽名供 caller 不需改動,永遠回 { updated: false }。
*/
export async function recalcEntityGraph(
_client: KbdbClient,
_entityName: string,
): Promise<{ updated: boolean; triplet_count: number }> {
// 插件不做 embedding。實作搬到基本盤 optional embed 模組。
return { updated: false, triplet_count: 0 };
}
+33
View File
@@ -0,0 +1,33 @@
// Entity 正規化 — 純 exact match 版(零 AI / 零 Vectorize)。
//
// 鐵律:embedding / 語意相似度合併【不是插件職責】,屬基本盤 optional embed 模組。
// 插件只做 exact match(小寫去空白比對 canonical / aliases):
// 命中 → 回 canonical;未命中 → 建新 entity 後回 rawName。
// 原本的 cosine 門檻(0.92 merge / 0.75 pending)與 pending 流程,
// 待基本盤 embed 模組上線後在 base 層處理;插件不綁 AI/Vectorize。
import { findEntityByName, createEntity } from './entity-crud';
import type { KbdbClient } from '../lib/kbdb-client';
/**
* 正規化 rawName(純 exact):
* 1. exact match 命中 → 回傳 canonical
* 2. 未命中 → 建新 entity → 回傳 rawName
* 任何錯誤靜默降級,回傳 rawName。
*/
export async function normalizeEntity(
client: KbdbClient,
rawName: string,
owner?: string,
): Promise<string> {
try {
const exact = await findEntityByName(client, rawName, owner);
if (exact) return exact.canonical;
await createEntity(client, rawName, owner);
return rawName;
} catch (err) {
console.error('[normalizeEntity] error, fallback to rawName:', err);
return rawName;
}
}
+70
View File
@@ -0,0 +1,70 @@
// Pending Alias — 零 SQL / 零 D1。全走基本盤 APItemplate=entity_pending record)。
//
// base 缺口 [→arcrun]:無 DELETE /records/:id → pending record 無法硬刪。
// confirm/reject 採 soft:執行動作但不刪 pending(待 base 補 DELETE)。
// 故 getPendingAliases 須由 caller 自行過濾已處理者,或待 DELETE 補上後在此硬刪。
import type { Entity, PendingAlias } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
import { TPL_ENTITY_PENDING, ensurePluginTemplates } from '../lib/templates';
import { createEntity, addAlias } from './entity-crud';
/** 建立 Pending Alias 記錄(一筆 entity_pending record)。 */
export async function createPendingAlias(
client: KbdbClient,
rawName: string,
candidateEntityId: string,
candidateCanonical: string,
similarity: number,
owner?: string,
): Promise<PendingAlias> {
await ensurePluginTemplates(client);
const id = await client.createRecord(
TPL_ENTITY_PENDING,
{
raw_name: rawName,
candidate_entity_id: candidateEntityId,
candidate_canonical: candidateCanonical,
similarity: String(similarity),
},
owner,
);
return {
id,
raw_name: rawName,
candidate_entity_id: candidateEntityId,
candidate_canonical: candidateCanonical,
similarity,
created_at: Math.floor(Date.now() / 1000),
};
}
/** 列出所有 Pending Aliases。 */
export async function getPendingAliases(client: KbdbClient, limit = 100, owner?: string): Promise<PendingAlias[]> {
const records = await client.listRecordsByTemplate(TPL_ENTITY_PENDING, owner);
return records
.filter((r) => r.values.raw_name)
.map((r) => ({
id: r.record_id,
raw_name: r.values.raw_name,
candidate_entity_id: r.values.candidate_entity_id ?? '',
candidate_canonical: r.values.candidate_canonical ?? '',
similarity: parseFloat(r.values.similarity ?? '0'),
created_at: 0,
}))
.slice(0, limit);
}
/** 確認 → addAlias 到候選 entity。pending soft 保留([→arcrun] base 缺 DELETE record)。 */
export async function confirmPendingAlias(client: KbdbClient, pendingId: string, owner?: string): Promise<void> {
const rec = await client.getRecord(pendingId);
if (!rec || !rec.values.raw_name) throw new Error(`Pending alias ${pendingId} not found`);
await addAlias(client, rec.values.candidate_entity_id, rec.values.raw_name, owner);
}
/** 拒絕 → 以 raw_name 建新 entity。pending soft 保留([→arcrun] base 缺 DELETE record)。 */
export async function rejectPendingAlias(client: KbdbClient, pendingId: string, owner?: string): Promise<Entity> {
const rec = await client.getRecord(pendingId);
if (!rec || !rec.values.raw_name) throw new Error(`Pending alias ${pendingId} not found`);
return createEntity(client, rec.values.raw_name, owner);
}
+56
View File
@@ -0,0 +1,56 @@
// 圖節點操作 — 零 SQL、零 D1,全走基本盤 API
// 取全部 triplet 一次(queryTriplets),在插件層記憶體 group/filter。
import type { Triplet } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
import { queryTriplets } from './triplet-crud';
/** 取全部 triplet(一次 API 呼叫,limit 拉滿上限)。 */
async function loadAllTriplets(client: KbdbClient): Promise<Triplet[]> {
const { triplets } = await queryTriplets(client, { limit: 2000 });
return triplets;
}
export async function listNodes(
client: KbdbClient,
options: { search?: string; limit?: number },
): Promise<Array<{ node: string; edge_count: number }>> {
const limit = Math.min(options.limit ?? 100, 500);
const triplets = await loadAllTriplets(client);
// subject object,記憶體 group 計 edge_count
const counts = new Map<string, number>();
for (const t of triplets) {
counts.set(t.subject, (counts.get(t.subject) ?? 0) + 1);
counts.set(t.object, (counts.get(t.object) ?? 0) + 1);
}
let nodes = Array.from(counts.entries()).map(([node, edge_count]) => ({ node, edge_count }));
if (options.search) {
const q = options.search.toLowerCase();
nodes = nodes.filter((n) => n.node.toLowerCase().includes(q));
}
nodes.sort((a, b) => b.edge_count - a.edge_count);
return nodes.slice(0, limit);
}
export async function getNodeEdges(
client: KbdbClient,
name: string,
): Promise<Triplet[]> {
const triplets = await loadAllTriplets(client);
return triplets.filter((t) => t.subject === name || t.object === name);
}
export async function getNeighbors(
client: KbdbClient,
name: string,
): Promise<string[]> {
const edges = await getNodeEdges(client, name);
const neighbors = new Set<string>();
for (const t of edges) {
if (t.subject !== name) neighbors.add(t.subject);
if (t.object !== name) neighbors.add(t.object);
}
return Array.from(neighbors);
}
+74
View File
@@ -0,0 +1,74 @@
// 最短路徑(BFS)— 零 SQL、零 D1。
// 取全部 triplet 一次建無向鄰接表,記憶體 BFS 求最短路。
import type { Triplet } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
import { queryTriplets } from './triplet-crud';
type PathResult = {
path: string[] | null;
edges: Triplet[];
hops: number;
};
/** node → 與之相連的所有 triplet(無向鄰接表)。 */
function buildAdjacency(triplets: Triplet[]): Map<string, Triplet[]> {
const adj = new Map<string, Triplet[]>();
const push = (node: string, t: Triplet) => {
const list = adj.get(node);
if (list) list.push(t);
else adj.set(node, [t]);
};
for (const t of triplets) {
push(t.subject, t);
if (t.object !== t.subject) push(t.object, t);
}
return adj;
}
export async function findShortestPath(
client: KbdbClient,
from: string,
to: string,
): Promise<PathResult> {
if (from === to) return { path: [from], edges: [], hops: 0 };
const maxDepth = 6;
const { triplets } = await queryTriplets(client, { limit: 2000 });
const adj = buildAdjacency(triplets);
const visited = new Set<string>([from]);
const parent = new Map<string, { node: string; edge: Triplet }>();
const queue: Array<{ node: string; depth: number }> = [{ node: from, depth: 0 }];
let found = false;
while (queue.length > 0 && !found) {
const { node: current, depth } = queue.shift()!;
if (depth >= maxDepth) continue;
for (const t of adj.get(current) ?? []) {
const next = t.subject === current ? t.object : t.subject;
if (!visited.has(next)) {
visited.add(next);
parent.set(next, { node: current, edge: t });
queue.push({ node: next, depth: depth + 1 });
if (next === to) { found = true; break; }
}
}
}
if (!found) return { path: null, edges: [], hops: -1 };
// 回溯路徑
const path: string[] = [to];
const pathEdges: Triplet[] = [];
let current = to;
while (parent.has(current)) {
const { node, edge } = parent.get(current)!;
path.unshift(node);
pathEdges.unshift(edge);
current = node;
}
return { path, edges: pathEdges, hops: path.length - 1 };
}
+64
View File
@@ -0,0 +1,64 @@
// 圖遍歷 + 關係查詢 — 零 SQL、零 D1。
// 取全部 triplet 一次建鄰接表,BFS / filter 在記憶體跑。
import type { Triplet, GraphNode } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
import { queryTriplets } from './triplet-crud';
/** node → 與之相連的所有 triplet(無向鄰接表)。 */
function buildAdjacency(triplets: Triplet[]): Map<string, Triplet[]> {
const adj = new Map<string, Triplet[]>();
const push = (node: string, t: Triplet) => {
const list = adj.get(node);
if (list) list.push(t);
else adj.set(node, [t]);
};
for (const t of triplets) {
push(t.subject, t);
if (t.object !== t.subject) push(t.object, t);
}
return adj;
}
export async function traverseGraph(
client: KbdbClient,
start: string,
maxDepth: number,
): Promise<GraphNode[]> {
const depth = Math.min(maxDepth, 5);
const { triplets } = await queryTriplets(client, { limit: 2000 });
const adj = buildAdjacency(triplets);
const visited = new Set<string>();
const queue: Array<{ node: string; level: number }> = [{ node: start, level: 0 }];
const results: GraphNode[] = [];
while (queue.length > 0) {
const { node, level } = queue.shift()!;
if (visited.has(node) || level > depth) continue;
visited.add(node);
const edges = adj.get(node) ?? [];
results.push({ node, level, edges });
for (const t of edges) {
const next = t.subject === node ? t.object : t.subject;
if (!visited.has(next)) queue.push({ node: next, level: level + 1 });
}
}
return results;
}
export async function queryRelation(
client: KbdbClient,
from: string,
to: string,
): Promise<Triplet[]> {
const { triplets } = await queryTriplets(client, { limit: 2000 });
return triplets.filter(
(t) =>
(t.subject === from && t.object === to) ||
(t.subject === to && t.object === from),
);
}
+27
View File
@@ -0,0 +1,27 @@
// Predicate 正規化(插件層):純字串正規化,零 embedding。
//
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。
// [→arcrun embed 模組] 原本用 bge-m3 embedding cosine 比對近義 predicate>0.90 use_existing /
// 0.85~0.90 pending / <0.85 new)。語意比對屬基本盤 embed 模組,不是 graph 插件職責。
// 此處降級為純字串正規化(trim + 小寫 + 收斂空白),任何近義收斂交給基本盤 embed 模組。
// 任何錯誤靜默降級,不擋寫入。
export type PredicateNormalizeResult =
| { action: 'use_existing'; canonical: string; score: number }
| { action: 'pending'; similar: Array<{ canonical: string; score: number }> }
| { action: 'new'; predicate: string };
/**
* 正規化 predicate(純字串):trim + 收斂連續空白。回傳 { action: 'new', predicate }。
* [→arcrun embed 模組] 近義收斂(use_existing / pending)交由基本盤 embed 模組處理,插件不做向量比對。
* 任何錯誤靜默降級,回傳原始 predicate。
*/
export function normalizePredicateOnWrite(predicate: string): PredicateNormalizeResult {
try {
const canonical = predicate.trim().replace(/\s+/g, ' ');
return { action: 'new', predicate: canonical };
} catch (err) {
console.error('[normalizePredicateOnWrite] error, fallback to raw:', err);
return { action: 'new', predicate };
}
}
+12
View File
@@ -0,0 +1,12 @@
// [→arcrun embed 模組] 手動 embedding 管理(embedAndStore / getVector / deleteVector
// 已移出插件:embedding / Vectorize 屬基本盤的 optional embed 模組,不是 graph 插件職責。
//
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。此檔不再持有任何向量邏輯。
// 保留薄殼 stub 以維持匯出相容;呼叫端(route)應改打基本盤 embed 模組,或停用此端點。
const NOT_IN_PLUGIN =
'embedding 屬基本盤 optional embed 模組([→arcrun embed 模組]),不在 graph 插件實作';
export function embedNotSupported(): never {
throw new Error(NOT_IN_PLUGIN);
}
+43
View File
@@ -0,0 +1,43 @@
// 搜尋(插件層):只做基本盤 keyword 搜尋(D1 LIKE,走 GET /entries/search)。
//
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。讀寫只透過 KbdbClient。
// [→arcrun embed 模組] 語意搜尋 / embedding 是基本盤的 optional embed 模組,不是插件職責。
// 插件不綁 AI/Vectorize;「語意搜尋」在此降級為 keyword 搜尋。
import type { KbdbClient, BaseEntry } from '../lib/kbdb-client';
export type SearchMatch = {
score: number;
metadata: Record<string, unknown>;
type: 'block' | 'triplet';
triplet: Record<string, unknown> | null;
block: Record<string, unknown> | null;
};
export type KeywordSearchOptions = {
limit?: number;
owner_id?: string;
};
/**
* 基本盤 keyword 搜尋。打 GET /entries/searchD1 LIKE),把 BaseEntry 包成相容的 SearchMatch。
* [→arcrun embed 模組] 若日後要真語意搜尋,由基本盤 embed 模組提供,不在插件實作。
*/
export async function keywordSearch(
client: KbdbClient,
query: string,
options: KeywordSearchOptions = {},
): Promise<SearchMatch[]> {
const { limit = 10, owner_id } = options;
const entries = await client.searchEntries(query, owner_id);
const matches: SearchMatch[] = entries.map((e: BaseEntry) => ({
score: 0, // keyword 搜尋無相似度分數;語意分數待 embed 模組
metadata: {},
type: 'block' as const,
triplet: null,
block: e as unknown as Record<string, unknown>,
}));
return matches.slice(0, limit);
}
+68
View File
@@ -0,0 +1,68 @@
// 搜尋推薦(插件層):用基本盤 keyword 搜尋結果組模板。
//
// 鐵律:插件零 SQL / 零 D1 / 零 Vectorize。讀寫只透過 KbdbClient。
// [→arcrun embed 模組] 語意分數 / 相似度排序屬基本盤 embed 模組;此處只做 keyword 搜尋 + 零幻覺模板。
// 零幻覺:只用知識庫資料組模板,不經 LLM。
import { keywordSearch } from './search-query';
import type { SearchMatch } from './search-query';
import type { KbdbClient } from '../lib/kbdb-client';
type SuggestResult = {
suggestion: string;
matches: SearchMatch[];
count: number;
};
export async function suggestKnowledge(
client: KbdbClient,
query: string,
limit: number = 10,
owner_id?: string,
): Promise<SuggestResult> {
const allMatches = await keywordSearch(client, query, { limit, owner_id });
const goodMatches = allMatches.filter(m => m.triplet !== null || m.block !== null).filter(m => {
// 過濾純 ref 類 block((uuid))、{{embed ((uuid))}} 等對用戶無意義
if (m.type === 'block' && m.block) {
const content = (m.block as Record<string, string>).content || '';
if (/^\s*\(\([a-f0-9-]+\)\)\s*$/.test(content)) return false;
if (/^\s*\{\{embed\s+\(\([a-f0-9-]+\)\)\}\}\s*$/.test(content)) return false;
}
return true;
});
if (goodMatches.length === 0) {
return { suggestion: '', matches: [], count: 0 };
}
const suggestion = buildFallback(goodMatches);
return { suggestion, matches: goodMatches, count: goodMatches.length };
}
// 親切口語化回應(零幻覺,只引用知識庫資料)
function buildFallback(matches: SearchMatch[]): string {
const blockMatches = matches.filter(m => m.type === 'block');
const tripletMatches = matches.filter(m => m.type === 'triplet');
if (blockMatches.length > 0 && tripletMatches.length === 0) {
const first = blockMatches[0].block as Record<string, string> | null;
const pageName = first?.page_name || '筆記';
if (blockMatches.length === 1) {
return `你之前在「${pageName}」寫過相關的內容,幫你找出來了~`;
}
return `嘿,你之前寫過 ${blockMatches.length} 筆相關筆記,幫你撈出來了~`;
}
if (tripletMatches.length > 0 && blockMatches.length === 0) {
const first = tripletMatches[0].triplet as Record<string, string> | null;
if (!first) return `欸,找到 ${matches.length} 筆你之前寫過的東西,看看有沒有幫助?`;
if (tripletMatches.length === 1) {
return `你之前有寫到「${first.subject} ${first.predicate} ${first.object}」,是不是跟這個有關?`;
}
return `嘿,你之前寫過 ${tripletMatches.length} 筆跟「${first.subject}」相關的筆記,幫你撈出來了~`;
}
return `嘿,找到 ${matches.length} 筆相關資料(${blockMatches.length} 筆筆記、${tripletMatches.length} 筆知識關聯),幫你整理好了~`;
}
+16
View File
@@ -0,0 +1,16 @@
// Triplet 知識分群 — 純函式
//
// 原版用 Workers AI 自動判斷 cluster。插件已不綁 AIembedding/LLM 屬基本盤 optional 模組)。
// 分群改為「由呼叫端(基本盤 embed 模組 / 上游 ingest)算好後傳入」;這裡只做正規化與 bridge_score 計算。
/** 正規化 cluster 標籤:小寫、去空白、最多 3 個、去重。 */
export function classifyClusters(clusters: string[] | undefined): { clusters: string[]; bridgeScore: number } {
const norm = Array.from(
new Set(
(clusters ?? [])
.filter((c) => typeof c === 'string' && c.trim())
.map((c) => c.trim().toLowerCase()),
),
).slice(0, 3);
return { clusters: norm, bridgeScore: Math.max(0, norm.length - 1) };
}
+90
View File
@@ -0,0 +1,90 @@
// 三元組 CRUD — 走基本盤 APIAPI-as-Wall,零 SQL
// 寫 triplet = 確保 template='triplet' + POST /records 填 slot。
// 查 triplet = GET /records/by-template/triplet → 插件層 filter/組裝。
import type { Triplet } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
import { TPL_TRIPLET, ensurePluginTemplates, recordToTriplet } from '../lib/templates';
import { classifyClusters } from './triplet-cluster';
export type CreateTripletData = {
subject: string;
predicate: string;
object: string;
source_block_id?: string;
confidence?: number;
owner_id?: string;
clusters?: string[];
bridge_score?: number;
subject_entity_type?: string;
object_entity_type?: string;
};
/** 建立三元組 → POST /recordstemplate=triplet)。 */
export async function createTriplet(
client: KbdbClient,
data: CreateTripletData,
): Promise<{ id: string; subject: string; predicate: string; object: string }> {
await ensurePluginTemplates(client);
const clusters = data.clusters ?? [];
const bridgeScore = data.bridge_score ?? Math.max(0, clusters.length - 1);
const values: Record<string, string> = {
subject: data.subject,
predicate: data.predicate,
object: data.object,
confidence: String(data.confidence ?? 1.0),
clusters_json: JSON.stringify(clusters),
bridge_score: String(bridgeScore),
};
if (data.source_block_id) values.source_block_id = data.source_block_id;
if (data.subject_entity_type) values.subject_entity_type = data.subject_entity_type;
if (data.object_entity_type) values.object_entity_type = data.object_entity_type;
const id = await client.createRecord(TPL_TRIPLET, values, data.owner_id);
return { id, subject: data.subject, predicate: data.predicate, object: data.object };
}
export type TripletFilters = {
subject?: string;
predicate?: string;
object?: string;
limit?: number;
offset?: number;
owner_id?: string;
entity_type?: string;
};
/** 查三元組 → 取 template 全部 record,插件層 filterbase 無複合 slot 查詢)。 */
export async function queryTriplets(
client: KbdbClient,
filters: TripletFilters,
): Promise<{ triplets: Triplet[]; count: number }> {
const records = await client.listRecordsByTemplate(TPL_TRIPLET, filters.owner_id);
let triplets = records.map(recordToTriplet);
if (filters.subject) triplets = triplets.filter((t) => t.subject === filters.subject);
if (filters.predicate) triplets = triplets.filter((t) => t.predicate === filters.predicate);
if (filters.object) triplets = triplets.filter((t) => t.object === filters.object);
if (filters.entity_type) {
triplets = triplets.filter(
(t) => t.subject_entity_type === filters.entity_type || t.object_entity_type === filters.entity_type,
);
}
const offset = filters.offset ?? 0;
const limit = Math.min(filters.limit ?? 50, 2000);
const page = triplets.slice(offset, offset + limit);
return { triplets: page, count: page.length };
}
/** 取單一三元組 → GET /records/:id。 */
export async function getTriplet(client: KbdbClient, id: string): Promise<Triplet | null> {
const rec = await client.getRecord(id);
if (!rec) return null;
return recordToTriplet({ ...rec, template: TPL_TRIPLET });
}
// re-export clusters helperAI 分群,純函式 + 走 client 無關)
export { classifyClusters };
+10
View File
@@ -0,0 +1,10 @@
// 三元組 embedding — [→arcrun embed]
//
// 鐵律:embedding / Vectorize 屬基本盤/上游(arcrun embed 模組),插件不綁 Vectorize、不碰向量索引。
// 本檔精簡為薄殼:只保留純字串組裝 helper(tripletToText,供別處組 query 文字用)。
// 真正的向量生成/upsert/delete 由基本盤負責;插件層不再持有 Ai 或向量索引繫結。
/** 把 S-P-O 組成一段可餵 embedding 的文字(純函式,無 DB / 無 Vectorize)。 */
export function tripletToText(subject: string, predicate: string, object: string): string {
return `${subject} ${predicate} ${object}`;
}
+57
View File
@@ -0,0 +1,57 @@
// 列出所有唯一實體(subject object)— 零 SQL / 零 D1
// 取全部 triplet record(走基本盤 API),在記憶體統計 as_subject/as_object/total。
// GET /entities
import type { KbdbClient } from '../lib/kbdb-client';
import { queryTriplets } from './triplet-crud';
export interface EntityStat {
name: string;
as_subject: number;
as_object: number;
total: number;
}
export interface EntityListResult {
entities: EntityStat[];
total: number;
limit: number;
offset: number;
}
export async function listTripletEntities(
client: KbdbClient,
{ limit = 200, offset = 0, q }: { limit?: number; offset?: number; q?: string },
): Promise<EntityListResult> {
const safeLimit = Math.min(limit, 500);
const { triplets } = await queryTriplets(client, { limit: 2000 });
const stats = new Map<string, EntityStat>();
const bump = (name: string, role: 'as_subject' | 'as_object') => {
if (!name) return;
let s = stats.get(name);
if (!s) {
s = { name, as_subject: 0, as_object: 0, total: 0 };
stats.set(name, s);
}
s[role] += 1;
s.total += 1;
};
for (const t of triplets) {
bump(t.subject, 'as_subject');
bump(t.object, 'as_object');
}
let entities = [...stats.values()];
if (q) {
const needle = q.toLowerCase();
entities = entities.filter((e) => e.name.toLowerCase().includes(needle));
}
entities.sort((a, b) => b.total - a.total);
const total = entities.length;
const page = entities.slice(offset, offset + safeLimit);
return { entities: page, total, limit: safeLimit, offset };
}
+77
View File
@@ -0,0 +1,77 @@
// triplet-extract.ts — LLM 三元組萃取 + 寫入工具函數
// 萃取=純 LLM(不碰 DB);寫入=走基本盤 API(零 SQL / 零 D1)。
// 供 block-ingest.ts 和 block-process.ts 共用
import type { KbdbClient } from '../lib/kbdb-client';
import { createTriplet, queryTriplets } from './triplet-crud';
const EXTRACT_PROMPT = `你是知識萃取助手,從文章中萃取知識三元組。
只萃取:性格特質、關鍵經歷精華、核心觀點信念、說話方式與風格。
禁止:具體年份日期、純事實統計、流水帳年表。
格式:[{"subject":"...","predicate":"2-6字","object":"15-50字","confidence":0.8}]
直接輸出 JSON Array,第一字元 [,最後字元 ]。不要其他文字。`;
export interface LLMTriplet {
subject: string;
predicate: string;
object: string;
confidence: number;
}
/** Workers AI 萃取三元組,每段獨立呼叫,單段失敗不中斷 */
export async function extractTripletsViaLLM(ai: Ai, chunks: string[]): Promise<LLMTriplet[]> {
const results: LLMTriplet[] = [];
for (const chunk of chunks) {
if (!chunk.trim()) continue;
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const raw = await (ai as any).run('@cf/meta/llama-3.3-70b-instruct-fp8-fast', {
messages: [
{ role: 'system', content: EXTRACT_PROMPT },
{ role: 'user', content: `萃取三元組:\n${chunk}` },
],
max_tokens: 512,
temperature: 0.1,
});
const text = (typeof raw === 'string' ? raw : (raw?.response ?? ''))
.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
const m = text.match(/\[[\s\S]*\]/);
if (!m) continue;
const parsed = JSON.parse(m[0]);
if (!Array.isArray(parsed)) continue;
for (const t of parsed) {
const s = String(t?.subject ?? '').trim();
const p = String(t?.predicate ?? '').trim();
const o = String(t?.object ?? '').trim();
if (s && p && o) results.push({ subject: s, predicate: p, object: o, confidence: Number(t?.confidence) || 0.8 });
}
} catch { /* 單段失敗跳過 */ }
}
return results;
}
/** 寫入一條三元組(走基本盤 API),已存在回傳 false,新寫入回傳 true。
* 查重 + 寫入全走 KbdbClient → triplet-crud,零 SQL / 零 D1。 */
export async function writeTripletToDb(
client: KbdbClient,
t: { subject: string; predicate: string; object: string; confidence?: number },
owner: string | null,
): Promise<boolean> {
// 查重:以 S-P-O 三欄精確比對(queryTriplets 取 template record 後在插件層 filter
const { count } = await queryTriplets(client, {
subject: t.subject,
predicate: t.predicate,
object: t.object,
limit: 1,
});
if (count > 0) return false;
await createTriplet(client, {
subject: t.subject,
predicate: t.predicate,
object: t.object,
confidence: t.confidence ?? 0.8,
owner_id: owner ?? undefined,
});
return true;
}
+41
View File
@@ -0,0 +1,41 @@
// Triplet 統計 action — 知識圖譜規模統計
// 零 SQL / 零 D1:取全部 triplet record(走基本盤 API),在記憶體 reduce 出統計。
// GET /triplets/stats
import type { KbdbClient } from '../lib/kbdb-client';
import { queryTriplets } from './triplet-crud';
export interface TripletStats {
total: number;
// 保留 by_owner_id(原 by_user_id 改名,對齊基本盤 owner_id 欄位)。
// 注:基本盤 record 不回傳 owner_id / 時間戳,故 by_owner_id/recent 在純插件層無法填,
// 待 [→arcrun] base 於 record 回應帶上 owner_id + created_at 後補。
by_owner_id: Record<string, number>;
recent: { today: number; this_week: number };
top_subjects: { subject: string; count: number }[];
top_predicates: { predicate: string; count: number }[];
}
export async function getTripletStats(client: KbdbClient): Promise<TripletStats> {
const { triplets } = await queryTriplets(client, { limit: 2000 });
const subjectCounts = new Map<string, number>();
const predicateCounts = new Map<string, number>();
const ownerCounts: Record<string, number> = {};
for (const t of triplets) {
subjectCounts.set(t.subject, (subjectCounts.get(t.subject) ?? 0) + 1);
predicateCounts.set(t.predicate, (predicateCounts.get(t.predicate) ?? 0) + 1);
}
const top = (m: Map<string, number>) =>
[...m.entries()].sort((a, b) => b[1] - a[1]).slice(0, 10);
return {
total: triplets.length,
by_owner_id: ownerCounts, // base record 暫無 owner_id,待上游補
recent: { today: 0, this_week: 0 }, // base record 暫無 created_at,待上游補
top_subjects: top(subjectCounts).map(([subject, count]) => ({ subject, count })),
top_predicates: top(predicateCounts).map(([predicate, count]) => ({ predicate, count })),
};
}
+28
View File
@@ -0,0 +1,28 @@
// 三元組 >> 語法解析器
// 掃描 block content 裡的「A >> B >> C」格式,回傳解析結果
export type ParsedTriplet = {
subject: string;
predicate: string;
object: string;
};
/**
* 掃描 content 裡所有符合「詞 >> 詞 >> 詞」的行
* 一個 block 可包含多行 >> 語法
* 前後空白 trim,空的部分跳過
*
* 範例:
* parseTripletSyntax('書僮採集 >> 指向北極星 >> 異見三元組累積')
* // → [{ subject: '書僮採集', predicate: '指向北極星', object: '異見三元組累積' }]
*/
export function parseTripletSyntax(content: string): ParsedTriplet[] {
const results: ParsedTriplet[] = [];
for (const line of content.split('\n')) {
const parts = line.split('>>').map(p => p.trim());
if (parts.length === 3 && parts.every(p => p.length > 0)) {
results.push({ subject: parts[0], predicate: parts[1], object: parts[2] });
}
}
return results;
}
+23
View File
@@ -0,0 +1,23 @@
// 三元組 partial update action
//
// [→arcrun] base 缺 PUT /records/:id(也無 DELETE):基本盤目前只支援建/查 record,
// 不支援就地更新一筆 record 的 slot 值。插件不准用 SQL 直改表,故更新暫不支援。
//
// 暫時策略:回 not-supportednull),由 route 轉成 4xx/501。
// 待上游補 PUT /records/:id 後,這裡改成 client.req('PUT', ...) 或薄殼呼叫。
// 不得以 SQL/D1 繞過。
import type { Triplet } from '../types';
import type { KbdbClient } from '../lib/kbdb-client';
export async function patchTriplet(
_client: KbdbClient,
_id: string,
_data: {
subject?: string; predicate?: string; object?: string; confidence?: number;
subject_alias?: string; predicate_alias?: string; object_alias?: string;
},
): Promise<{ ok: true; triplet: Triplet } | null> {
// base 無 PUT /records/:id → 無法就地更新;回 nullnot-supported)。
return null;
}