feat(cypher-executor): step-level telemetry (LI roadmap 2026-W19 建議)
對應第一份 arcrun-roadmap (block id e924c231) 提的:
「mira_feed_watcher 執行時間偏長(~35秒),無 error 資訊
建議:加入 checkpoint/step-level telemetry,監測瓶頸」
新增 TelemetryEvent:
- node_success — 單一 Component node 跑完
- node_failure — 單一 Component node 失敗
寫入點:GraphExecutor.executeNode catch + 最終 trace.push 之後
- 只記 node.type === 'Component'(Input/Output 跳過避免噪音)
- 含 workflow_name + component_id + duration_ms + (error_code on fail)
- fire-and-forget, 不擋主流程
實測(wiki_synthesis trigger 後):
- 4 個 node_success blocks 寫入 KBDB (4 個 kbdb_get)
- duration 範圍 653ms-2003ms,立刻看到誰是瓶頸
- paused 的 classify (claude_api) 不算 success(trace 已記 paused 狀態)
下次 weekly_review compose_review 會看到 component-level breakdown,
能寫出「kbdb_get 平均 X ms、claude_api 平均 Y ms」等更細的分析。
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import { tryAuthDispatch } from './actions/auth-dispatcher';
|
|||||||
import { expandPromptRecipe } from './lib/recipe-expander';
|
import { expandPromptRecipe } from './lib/recipe-expander';
|
||||||
import { persistPausedRun, isResumablePending, parseRecipeOutput } from './lib/paused-runs';
|
import { persistPausedRun, isResumablePending, parseRecipeOutput } from './lib/paused-runs';
|
||||||
import { buildMagicVars } from './lib/magic-vars';
|
import { buildMagicVars } from './lib/magic-vars';
|
||||||
|
import { recordTelemetry } from './lib/telemetry';
|
||||||
|
|
||||||
export type ComponentLoader = (componentId: string) => Promise<ComponentRunner>;
|
export type ComponentLoader = (componentId: string) => Promise<ComponentRunner>;
|
||||||
export type WorkflowLoader = (workflowId: string) => Promise<ExecutionGraph>;
|
export type WorkflowLoader = (workflowId: string) => Promise<ExecutionGraph>;
|
||||||
@@ -352,14 +353,25 @@ export class GraphExecutor {
|
|||||||
if (e instanceof WorkflowPaused) throw e;
|
if (e instanceof WorkflowPaused) throw e;
|
||||||
|
|
||||||
const errMsg = e.message || String(e);
|
const errMsg = e.message || String(e);
|
||||||
|
const duration_ms = Date.now() - start;
|
||||||
trace.push({
|
trace.push({
|
||||||
nodeId: node.id,
|
nodeId: node.id,
|
||||||
type: node.type,
|
type: node.type,
|
||||||
input: nodeInput,
|
input: nodeInput,
|
||||||
output: null,
|
output: null,
|
||||||
error: errMsg,
|
error: errMsg,
|
||||||
duration_ms: Date.now() - start,
|
duration_ms,
|
||||||
});
|
});
|
||||||
|
// Step-level telemetry:node 失敗事件(LI SDD M2.x 自評建議)
|
||||||
|
if (this.env && node.type === 'Component') {
|
||||||
|
recordTelemetry(this.env, this.apiKey, {
|
||||||
|
event_type: 'node_failure',
|
||||||
|
workflow_name: graph.name,
|
||||||
|
component_id: node.componentId,
|
||||||
|
error_code: 'node_error',
|
||||||
|
duration_ms,
|
||||||
|
});
|
||||||
|
}
|
||||||
// 若已是 ExecutionError(上游節點拋出),保留原始 trace 繼續往上傳
|
// 若已是 ExecutionError(上游節點拋出),保留原始 trace 繼續往上傳
|
||||||
if (e instanceof ExecutionError) throw e;
|
if (e instanceof ExecutionError) throw e;
|
||||||
throw new ExecutionError(
|
throw new ExecutionError(
|
||||||
@@ -370,14 +382,26 @@ export class GraphExecutor {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const duration_ms = Date.now() - start;
|
||||||
trace.push({
|
trace.push({
|
||||||
nodeId: node.id,
|
nodeId: node.id,
|
||||||
type: node.type,
|
type: node.type,
|
||||||
input: nodeInput,
|
input: nodeInput,
|
||||||
output: result,
|
output: result,
|
||||||
duration_ms: Date.now() - start,
|
duration_ms,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Step-level telemetry:node 成功事件(只記 Component,Input/Output 跳過)
|
||||||
|
// LI SDD M2.x:給 weekly_review 提的「效能基準線」建議用 — 每個 node duration 都可追
|
||||||
|
if (this.env && node.type === 'Component') {
|
||||||
|
recordTelemetry(this.env, this.apiKey, {
|
||||||
|
event_type: 'node_success',
|
||||||
|
workflow_name: graph.name,
|
||||||
|
component_id: node.componentId,
|
||||||
|
duration_ms,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// 處理出邊
|
// 處理出邊
|
||||||
const outEdges = graph.edges.filter(e => e.from === node.id);
|
const outEdges = graph.edges.filter(e => e.from === node.id);
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ export type TelemetryEvent =
|
|||||||
| 'run_success'
|
| 'run_success'
|
||||||
| 'run_fail'
|
| 'run_fail'
|
||||||
| 'validation_error'
|
| 'validation_error'
|
||||||
| 'mcp_tool_call';
|
| 'mcp_tool_call'
|
||||||
|
| 'node_success' // 單一 node 跑完(給 step-level 效能分析用)
|
||||||
|
| 'node_failure'; // 單一 node 失敗
|
||||||
|
|
||||||
export interface TelemetryRecord {
|
export interface TelemetryRecord {
|
||||||
event_type: TelemetryEvent;
|
event_type: TelemetryEvent;
|
||||||
|
|||||||
Reference in New Issue
Block a user