From 8b54ebb68a9625f7c74841dbf68c0dff34f4a5cf Mon Sep 17 00:00:00 2001 From: richblack Date: Sat, 16 May 2026 21:47:15 +0800 Subject: [PATCH] =?UTF-8?q?feat(cypher-executor):=20step-level=20telemetry?= =?UTF-8?q?=20(LI=20roadmap=202026-W19=20=E5=BB=BA=E8=AD=B0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 對應第一份 arcrun-roadmap (block id e924c231) 提的: 「mira_feed_watcher 執行時間偏長(~35秒),無 error 資訊 建議:加入 checkpoint/step-level telemetry,監測瓶頸」 新增 TelemetryEvent: - node_success — 單一 Component node 跑完 - node_failure — 單一 Component node 失敗 寫入點:GraphExecutor.executeNode catch + 最終 trace.push 之後 - 只記 node.type === 'Component'(Input/Output 跳過避免噪音) - 含 workflow_name + component_id + duration_ms + (error_code on fail) - fire-and-forget, 不擋主流程 實測(wiki_synthesis trigger 後): - 4 個 node_success blocks 寫入 KBDB (4 個 kbdb_get) - duration 範圍 653ms-2003ms,立刻看到誰是瓶頸 - paused 的 classify (claude_api) 不算 success(trace 已記 paused 狀態) 下次 weekly_review compose_review 會看到 component-level breakdown, 能寫出「kbdb_get 平均 X ms、claude_api 平均 Y ms」等更細的分析。 Co-Authored-By: Claude Opus 4.7 --- cypher-executor/src/graph-executor.ts | 28 +++++++++++++++++++++++++-- cypher-executor/src/lib/telemetry.ts | 4 +++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/cypher-executor/src/graph-executor.ts b/cypher-executor/src/graph-executor.ts index c2542a1..869e86e 100644 --- a/cypher-executor/src/graph-executor.ts +++ b/cypher-executor/src/graph-executor.ts @@ -6,6 +6,7 @@ import { tryAuthDispatch } from './actions/auth-dispatcher'; import { expandPromptRecipe } from './lib/recipe-expander'; import { persistPausedRun, isResumablePending, parseRecipeOutput } from './lib/paused-runs'; import { buildMagicVars } from './lib/magic-vars'; +import { recordTelemetry } from './lib/telemetry'; export type ComponentLoader = (componentId: string) => Promise; export type WorkflowLoader = (workflowId: string) => Promise; @@ -352,14 +353,25 @@ export class GraphExecutor { if (e instanceof WorkflowPaused) throw e; const errMsg = e.message || String(e); + const duration_ms = Date.now() - start; trace.push({ nodeId: node.id, type: node.type, input: nodeInput, output: null, error: errMsg, - duration_ms: Date.now() - start, + duration_ms, }); + // Step-level telemetry:node 失敗事件(LI SDD M2.x 自評建議) + if (this.env && node.type === 'Component') { + recordTelemetry(this.env, this.apiKey, { + event_type: 'node_failure', + workflow_name: graph.name, + component_id: node.componentId, + error_code: 'node_error', + duration_ms, + }); + } // 若已是 ExecutionError(上游節點拋出),保留原始 trace 繼續往上傳 if (e instanceof ExecutionError) throw e; throw new ExecutionError( @@ -370,14 +382,26 @@ export class GraphExecutor { ); } + const duration_ms = Date.now() - start; trace.push({ nodeId: node.id, type: node.type, input: nodeInput, output: result, - duration_ms: Date.now() - start, + duration_ms, }); + // Step-level telemetry:node 成功事件(只記 Component,Input/Output 跳過) + // LI SDD M2.x:給 weekly_review 提的「效能基準線」建議用 — 每個 node duration 都可追 + if (this.env && node.type === 'Component') { + recordTelemetry(this.env, this.apiKey, { + event_type: 'node_success', + workflow_name: graph.name, + component_id: node.componentId, + duration_ms, + }); + } + // 處理出邊 const outEdges = graph.edges.filter(e => e.from === node.id); diff --git a/cypher-executor/src/lib/telemetry.ts b/cypher-executor/src/lib/telemetry.ts index 13b7e64..9471875 100644 --- a/cypher-executor/src/lib/telemetry.ts +++ b/cypher-executor/src/lib/telemetry.ts @@ -21,7 +21,9 @@ export type TelemetryEvent = | 'run_success' | 'run_fail' | 'validation_error' - | 'mcp_tool_call'; + | 'mcp_tool_call' + | 'node_success' // 單一 node 跑完(給 step-level 效能分析用) + | 'node_failure'; // 單一 node 失敗 export interface TelemetryRecord { event_type: TelemetryEvent;