From d47a8bcabd0275c64817517b27d9bb7375cb9106 Mon Sep 17 00:00:00 2001 From: Losita <2810873701@qq.com> Date: Fri, 17 Apr 2026 12:27:04 +0800 Subject: [PATCH] =?UTF-8?q?Version:=200.9.25.dev.260417=20=E5=90=8E?= =?UTF-8?q?=E7=AB=AF=EF=BC=9A=201.=20AIHub=20=E6=A8=A1=E5=9E=8B=E5=88=86?= =?UTF-8?q?=E7=BA=A7=E4=BB=8E=20Worker/Strategist=20=E4=B8=A4=E7=BA=A7?= =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=B8=BA=20Lite/Pro/Max=20=E4=B8=89=E7=BA=A7?= =?UTF-8?q?=20-=20AIHub=20=E7=BB=93=E6=9E=84=E4=BD=93=E4=BB=8E=20Worker=20?= =?UTF-8?q?+=20Strategist=20=E6=94=B9=E4=B8=BA=20Lite=20+=20Pro=20+=20Max?= =?UTF-8?q?=EF=BC=8C=E5=88=86=E5=88=AB=E5=AF=B9=E5=BA=94=E8=BD=BB=E9=87=8F?= =?UTF-8?q?=EF=BC=88=E6=A0=87=E9=A2=98=E7=94=9F=E6=88=90=EF=BC=89=E3=80=81?= =?UTF-8?q?=E6=A0=87=E5=87=86=EF=BC=88Chat=20=E8=B7=AF=E7=94=B1/=E9=97=B2?= =?UTF-8?q?=E8=81=8A/=E4=BA=A4=E4=BB=98=E6=80=BB=E7=BB=93=EF=BC=89?= =?UTF-8?q?=E3=80=81=E9=AB=98=E8=83=BD=E5=8A=9B=EF=BC=88Plan=20=E8=A7=84?= =?UTF-8?q?=E5=88=92/Execute=20ReAct=EF=BC=89=E4=B8=89=E4=B8=AA=E8=83=BD?= =?UTF-8?q?=E5=8A=9B=E5=B1=82=E7=BA=A7=20-=20config.example.yaml=20?= =?UTF-8?q?=E6=96=B0=E5=A2=9E=20liteModel=20/=20proModel=20/=20maxModel=20?= =?UTF-8?q?=E4=B8=89=E4=B8=AA=E6=A8=A1=E5=9E=8B=E9=85=8D=E7=BD=AE=E9=A1=B9?= =?UTF-8?q?=EF=BC=8C=E6=9B=BF=E4=BB=A3=E5=8E=9F=20workerModel=20/=20strate?= =?UTF-8?q?gistModel=20-=20=E5=90=AF=E5=8A=A8=E5=B1=82=20InitEino=20?= =?UTF-8?q?=E6=94=B9=E4=B8=BA=E5=88=9B=E5=BB=BA=E4=B8=89=E4=B8=AA=E7=8B=AC?= =?UTF-8?q?=E7=AB=8B=E6=A8=A1=E5=9E=8B=E5=AE=9E=E4=BE=8B=EF=BC=8C=E6=8A=BD?= =?UTF-8?q?=E5=8F=96=E5=85=AC=E5=85=B1=20baseURL=20=E5=92=8C=20apiKey=20?= =?UTF-8?q?=E5=87=8F=E5=B0=91=E9=87=8D=E5=A4=8D=20-=20pickChatModel=20?= =?UTF-8?q?=E7=BB=9F=E4=B8=80=E8=BF=94=E5=9B=9E=20Pro=20=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=EF=BC=8C=E6=97=A7=20strategist=20=E5=8F=82=E6=95=B0=E4=B8=8D?= =?UTF-8?q?=E5=86=8D=E7=94=9F=E6=95=88=EF=BC=9BpickTitleModel=20=E4=BB=8E?= =?UTF-8?q?=20Worker=20=E5=88=87=E5=88=B0=20Lite=20-=20runNewAgentGraph=20?= =?UTF-8?q?=E6=8C=89=20Plan/Execute=E2=86=92Max=E3=80=81Chat/Deliver?= =?UTF-8?q?=E2=86=92Pro=20=E5=88=86=E7=BA=A7=E6=B3=A8=E5=85=A5=EF=BC=9BGra?= =?UTF-8?q?ph=20=E5=87=BA=E9=94=99=E5=9B=9E=E9=80=80=E4=B9=9F=E5=88=87?= =?UTF-8?q?=E5=88=B0=20Pro=20-=20Memory=20=E6=A8=A1=E5=9D=97=E5=88=9D?= =?UTF-8?q?=E5=A7=8B=E5=8C=96=E4=BB=8E=20Worker=20=E6=94=B9=E4=B8=BA=20Pro?= =?UTF-8?q?=202.=20Plan=20=E8=8A=82=E7=82=B9=E4=BB=8E"=E4=B8=A4=E9=98=B6?= =?UTF-8?q?=E6=AE=B5=E8=AF=84=E4=BC=B0"=E7=AE=80=E5=8C=96=E4=B8=BA"?= =?UTF-8?q?=E5=8D=95=E8=BD=AE=E6=B7=B1=E5=BA=A6=E8=A7=84=E5=88=92"?= =?UTF-8?q?=EF=BC=8Cthinking=20=E5=BC=80=E5=85=B3=E6=94=B9=E4=B8=BA?= =?UTF-8?q?=E5=85=A8=E9=85=8D=E7=BD=AE=E5=8C=96=20-=20=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=20Phase=201=EF=BC=88=E5=BF=AB=E9=80=9F=E8=AF=84=E4=BC=B0=20160?= =?UTF-8?q?0=20token=EF=BC=89+=20Phase=202=EF=BC=88=E6=B7=B1=E5=BA=A6?= =?UTF-8?q?=E8=A7=84=E5=88=92=203200=20token=EF=BC=89=E7=9A=84=E4=B8=A4?= =?UTF-8?q?=E8=BD=AE=E8=B0=83=E7=94=A8=E9=80=BB=E8=BE=91=EF=BC=8C=E6=94=B9?= =?UTF-8?q?=E4=B8=BA=E5=8D=95=E8=BD=AE=E4=B8=8D=E9=99=90=20token=20?= =?UTF-8?q?=E6=B7=B1=E5=BA=A6=E8=A7=84=E5=88=92=20-=20PlanDecision=20?= =?UTF-8?q?=E7=A7=BB=E9=99=A4=20need=5Fthinking=20=E5=AD=97=E6=AE=B5?= =?UTF-8?q?=EF=BC=8Cprompt=20=E8=A7=84=E5=88=99=E5=92=8C=20JSON=20contract?= =?UTF-8?q?=20=E5=90=8C=E6=AD=A5=E5=88=A0=E9=99=A4=E8=AF=A5=E5=AD=97?= =?UTF-8?q?=E6=AE=B5=20-=20=E5=90=84=E8=8A=82=E7=82=B9=EF=BC=88Plan=20/=20?= =?UTF-8?q?Execute=20/=20Deliver=EF=BC=89thinking=20=E5=BC=80=E5=85=B3?= =?UTF-8?q?=E4=BB=8E=E7=A1=AC=E7=BC=96=E7=A0=81=E6=94=B9=E4=B8=BA=E4=BB=8E?= =?UTF-8?q?=20AgentGraphDeps=20=E8=AF=BB=E5=8F=96=EF=BC=8C=E7=94=B1=20conf?= =?UTF-8?q?ig.yaml=20=E7=9A=84=20agent.thinking=20=E6=AE=B5=E6=8C=89?= =?UTF-8?q?=E8=8A=82=E7=82=B9=E6=B3=A8=E5=85=A5=20-=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=20agent.thinking=20=E9=85=8D=E7=BD=AE=E6=AE=B5=EF=BC=88plan=20?= =?UTF-8?q?/=20execute=20/=20deliver=20/=20memory=20=E5=9B=9B=E4=B8=AA?= =?UTF-8?q?=E7=8B=AC=E7=AB=8B=E5=B8=83=E5=B0=94=E5=BC=80=E5=85=B3=EF=BC=89?= =?UTF-8?q?=EF=BC=8Cconfig.example.yaml=20=E8=A1=A5=E9=BD=90=E9=BB=98?= =?UTF-8?q?=E8=AE=A4=E5=80=BC=20-=20=E6=96=B0=E5=A2=9E=20resolveThinkingMo?= =?UTF-8?q?de=20=E5=85=AC=E5=85=B1=E5=87=BD=E6=95=B0=EF=BC=8Cplan=20/=20ex?= =?UTF-8?q?ecute=20/=20deliver=20=E5=92=8C=20memory=20=E5=86=B3=E7=AD=96/?= =?UTF-8?q?=E6=8A=BD=E5=8F=96=E9=93=BE=E8=B7=AF=E7=BB=9F=E4=B8=80=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=203.=20Memory=20=E6=A8=A1=E5=9D=97=20LLM=20=E8=B0=83?= =?UTF-8?q?=E7=94=A8=E6=94=AF=E6=8C=81=20thinking=20=E5=BC=80=E5=85=B3=20-?= =?UTF-8?q?=20Config=20=E6=96=B0=E5=A2=9E=20LLMThinking=20=E5=AD=97?= =?UTF-8?q?=E6=AE=B5=EF=BC=8Cconfig=5Floader=20=E4=BB=8E=20agent.thinking.?= =?UTF-8?q?memory=20=E8=AF=BB=E5=8F=96=20-=20LLMDecisionOrchestrator.Compa?= =?UTF-8?q?re=20=E5=92=8C=20LLMWriteOrchestrator.ExtractFacts=20=E7=9A=84?= =?UTF-8?q?=20thinking=20=E6=A8=A1=E5=BC=8F=E4=BB=8E=E7=A1=AC=E7=BC=96?= =?UTF-8?q?=E7=A0=81=20Disabled=20=E6=94=B9=E4=B8=BA=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=20=E5=89=8D=E7=AB=AF=EF=BC=9A=201.=20?= =?UTF-8?q?=E7=A7=BB=E9=99=A4=E5=8A=A9=E6=89=8B=E8=BE=93=E5=85=A5=E5=8C=BA?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E9=80=89=E6=8B=A9=E5=99=A8=E5=8F=8A=E5=85=A8?= =?UTF-8?q?=E9=83=A8=E5=81=8F=E5=A5=BD=E6=8C=81=E4=B9=85=E5=8C=96=E9=80=BB?= =?UTF-8?q?=E8=BE=91=20-=20=E5=88=A0=E9=99=A4=20ModelType=20=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=E3=80=81selectedModel=20ref=E3=80=81MODEL=5FPREFERENC?= =?UTF-8?q?E=5FSTORAGE=5FKEY=20=E5=B8=B8=E9=87=8F=20-=20=E5=88=A0=E9=99=A4?= =?UTF-8?q?=20isModelType=20/=20loadModelPreferenceMap=20/=20persistModelP?= =?UTF-8?q?referenceMap=20/=20savePreferredModel=20/=20resolvePreferredMod?= =?UTF-8?q?el=20/=20applyPreferredModelForConversation=20=E5=85=AD?= =?UTF-8?q?=E4=B8=AA=E5=87=BD=E6=95=B0=E5=8F=8A=20modelPreferenceMap=20ref?= =?UTF-8?q?=20-=20=E5=88=A0=E9=99=A4=20selectedModel=20watch=20=E7=9B=91?= =?UTF-8?q?=E5=90=AC=E3=80=81=E5=8F=91=E9=80=81=E6=B6=88=E6=81=AF=E6=97=B6?= =?UTF-8?q?=E7=9A=84=20savePreferredModel=20=E8=B0=83=E7=94=A8=E3=80=81?= =?UTF-8?q?=E5=88=87=E4=BC=9A=E8=AF=9D=E6=97=B6=E7=9A=84=20applyPreferredM?= =?UTF-8?q?odelForConversation=20=E8=B0=83=E7=94=A8=E3=80=81=E4=BC=9A?= =?UTF-8?q?=E8=AF=9D=E8=BF=81=E7=A7=BB=E6=97=B6=E7=9A=84=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E5=81=8F=E5=A5=BD=E8=BF=81=E7=A7=BB=20-=20fetchChatStream=20?= =?UTF-8?q?=E7=9A=84=20model=20=E5=8F=82=E6=95=B0=E7=A1=AC=E7=BC=96?= =?UTF-8?q?=E7=A0=81=E4=B8=BA=20'worker'=20-=20=E5=88=A0=E9=99=A4=E6=A8=A1?= =?UTF-8?q?=E6=9D=BF=E4=B8=AD"=E6=A8=A1=E5=9E=8B"=E4=B8=8B=E6=8B=89?= =?UTF-8?q?=E9=80=89=E6=8B=A9=E5=99=A8=EF=BC=88=E6=A0=87=E5=87=86/?= =?UTF-8?q?=E7=AD=96=E7=95=A5=EF=BC=89=E5=8F=8A=E5=AF=B9=E5=BA=94=E7=9A=84?= =?UTF-8?q?=E5=85=A8=E5=B1=80=E6=A0=B7=E5=BC=8F=20.assistant-model-select-?= =?UTF-8?q?panel=202.=20=E4=B8=8A=E4=B8=8B=E6=96=87=E7=AA=97=E5=8F=A3?= =?UTF-8?q?=E6=8C=87=E7=A4=BA=E5=99=A8=E7=AE=80=E5=8C=96=E4=B8=BA=E4=BB=85?= =?UTF-8?q?=E6=98=BE=E7=A4=BA=E6=80=BB=E5=8D=A0=E7=94=A8=20-=20ContextWind?= =?UTF-8?q?owMeter=20=E7=A7=BB=E9=99=A4=20msg0~msg3=20=E5=9B=9B=E6=AE=B5?= =?UTF-8?q?=E5=BD=A9=E8=89=B2=E5=88=86=E6=AE=B5=E9=80=BB=E8=BE=91=EF=BC=88?= =?UTF-8?q?ContextSegment=20=E6=8E=A5=E5=8F=A3=E3=80=81segments=20computed?= =?UTF-8?q?=E3=80=81v-for=20=E6=B8=B2=E6=9F=93=EF=BC=89=20-=20=E8=BF=9B?= =?UTF-8?q?=E5=BA=A6=E6=9D=A1=E6=94=B9=E4=B8=BA=E5=8D=95=E4=B8=80=E8=93=9D?= =?UTF-8?q?=E8=89=B2=E6=9D=A1=EF=BC=8C=E6=8C=89=20total/budget=20=E6=AF=94?= =?UTF-8?q?=E4=BE=8B=E5=A1=AB=E5=85=85=EF=BC=9B=E8=B6=85=E9=A2=84=E7=AE=97?= =?UTF-8?q?=E6=97=B6=E5=8F=98=E7=BA=A2=20-=20Tooltip=20=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E4=B8=BA=E4=BB=85=E6=98=BE=E7=A4=BA"=E6=80=BB=E8=AE=A1=20X=20/?= =?UTF-8?q?=20=E9=A2=84=E7=AE=97=20Y=EF=BC=88Z%=EF=BC=89"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 仓库:无 --- backend/cmd/start.go | 2 +- backend/config.example.yaml | 19 ++- backend/inits/eino.go | 47 ++++-- backend/memory/model/config.go | 3 + .../orchestrator/llm_decision_orchestrator.go | 13 +- .../orchestrator/llm_write_orchestrator.go | 2 +- backend/memory/service/config_loader.go | 1 + backend/newAgent/model/graph_run_state.go | 5 + backend/newAgent/model/plan_contract.go | 1 - backend/newAgent/node/agent_nodes.go | 3 + backend/newAgent/node/deliver.go | 6 +- backend/newAgent/node/execute.go | 3 +- backend/newAgent/node/plan.go | 82 ++++------ backend/newAgent/prompt/plan.go | 8 +- backend/service/agentsvc/agent.go | 10 +- backend/service/agentsvc/agent_meta.go | 8 +- backend/service/agentsvc/agent_newagent.go | 18 ++- .../assistant/ContextWindowMeter.vue | 69 +++----- .../components/dashboard/AssistantPanel.vue | 153 +----------------- 19 files changed, 147 insertions(+), 306 deletions(-) diff --git a/backend/cmd/start.go b/backend/cmd/start.go index e9252ee..8883a73 100644 --- a/backend/cmd/start.go +++ b/backend/cmd/start.go @@ -83,7 +83,7 @@ func Start() { memoryMetrics := memoryobserve.NewMetricsRegistry() memoryModule := memory.NewModuleWithObserve( db, - infrallm.WrapArkClient(aiHub.Worker), + infrallm.WrapArkClient(aiHub.Pro), ragRuntime, memoryCfg, memory.ObserveDeps{ diff --git a/backend/config.example.yaml b/backend/config.example.yaml index d72acd1..6400a43 100644 --- a/backend/config.example.yaml +++ b/backend/config.example.yaml @@ -67,16 +67,27 @@ time: # 智能体模型与规划参数。 agent: - # 日常执行链路使用的主模型。 - workerModel: "doubao-seed-2-0-code-preview-260215" - # 规划、拆解、策略推导使用的模型。 - strategistModel: "doubao-seed-2-0-code-preview-260215" + # 轻量模型:标题生成等低复杂度、低延迟场景。 + liteModel: "doubao-seed-2-0-code-preview-260215" + # 标准模型:Chat 路由/闲聊/深度回答/Deliver 总结。 + proModel: "doubao-seed-2-0-code-preview-260215" + # 高能力模型:Plan 规划 + Execute ReAct 等深度推理场景。 + maxModel: "doubao-seed-2-0-code-preview-260215" # 模型服务根路径。 baseURL: "https://ark.cn-beijing.volces.com/api/v3" # 日内并发优化并发度,建议按模型配额调整。 dailyRefineConcurrency: 7 # 周级跨天配平额度上限,防止过度调整。 weeklyAdjustBudget: 5 + thinking: + # plan 节点(单轮深度规划),默认开 thinking。 + plan: true + # execute 节点(ReAct 深度推理),默认开 thinking。 + execute: true + # deliver 节点(交付总结),默认关 thinking。 + deliver: false + # 记忆模块(决策比对 + 抽取),默认关 thinking。 + memory: false # 通用 RAG 配置。 rag: diff --git a/backend/inits/eino.go b/backend/inits/eino.go index 3a00007..3865599 100644 --- a/backend/inits/eino.go +++ b/backend/inits/eino.go @@ -8,32 +8,53 @@ import ( "github.com/spf13/viper" ) -// AIHub 存储不同能力的模型实例 +// AIHub 存储三级模型的实例,按能力分级调度。 +// +// 分级策略: +// 1. Lite:轻量模型,用于标题生成等低复杂度、低延迟场景; +// 2. Pro:标准模型,用于 Chat 路由/闲聊/深度回答/Deliver 总结; +// 3. Max:高能力模型,用于 Plan 规划和 Execute ReAct 等需要深度推理的场景。 type AIHub struct { - Strategist *ark.ChatModel // 智力担当:处理复杂排程逻辑 - Worker *ark.ChatModel // 效率担当:处理简单任务、总结 + Lite *ark.ChatModel // 轻量模型:标题生成等低复杂度任务 + Pro *ark.ChatModel // 标准模型:Chat 路由、闲聊、交付总结 + Max *ark.ChatModel // 高能力模型:Plan 规划、Execute ReAct } func InitEino() (*AIHub, error) { ctx := context.Background() - worker, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{ - Model: viper.GetString("agent.workerModel"), // 使用的模型版本 - BaseURL: viper.GetString("agent.baseURL"), // Eino API 的基础 URL - APIKey: os.Getenv("ARK_API_KEY"), // API 密钥 + baseURL := viper.GetString("agent.baseURL") + apiKey := os.Getenv("ARK_API_KEY") + + // 1. Lite 模型:标题生成等低复杂度场景,优先控制成本和延迟。 + lite, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{ + Model: viper.GetString("agent.liteModel"), + BaseURL: baseURL, + APIKey: apiKey, }) if err != nil { return nil, err } - strategist, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{ - Model: viper.GetString("agent.strategistModel"), // 使用的模型版本 - BaseURL: viper.GetString("agent.baseURL"), // Eino API 的基础 URL - APIKey: os.Getenv("ARK_API_KEY"), // API 密钥 + // 2. Pro 模型:Chat 路由/闲聊/交付总结等标准复杂度场景。 + pro, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{ + Model: viper.GetString("agent.proModel"), + BaseURL: baseURL, + APIKey: apiKey, + }) + if err != nil { + return nil, err + } + // 3. Max 模型:Plan 规划和 Execute ReAct 等需要深度推理的场景。 + maxModel, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{ + Model: viper.GetString("agent.maxModel"), + BaseURL: baseURL, + APIKey: apiKey, }) if err != nil { return nil, err } return &AIHub{ - Strategist: strategist, - Worker: worker, + Lite: lite, + Pro: pro, + Max: maxModel, }, nil } diff --git a/backend/memory/model/config.go b/backend/memory/model/config.go index bd2b729..810b616 100644 --- a/backend/memory/model/config.go +++ b/backend/memory/model/config.go @@ -72,6 +72,9 @@ type Config struct { // 2. 默认 0.5,与"守门员"prompt 的 confidence>=0.5 输出规则配合; // 3. fallback 路径 confidence 设为 0.45,低于默认阈值,LLM 不可用时不写入。 WriteMinConfidence float64 + + // 记忆模块 LLM 调用是否开启 thinking,由 config.yaml 的 agent.thinking.memory 注入。 + LLMThinking bool } // NormalizeReadMode 统一读取模式字符串。 diff --git a/backend/memory/orchestrator/llm_decision_orchestrator.go b/backend/memory/orchestrator/llm_decision_orchestrator.go index 01425ee..7f45da7 100644 --- a/backend/memory/orchestrator/llm_decision_orchestrator.go +++ b/backend/memory/orchestrator/llm_decision_orchestrator.go @@ -62,10 +62,7 @@ func (o *LLMDecisionOrchestrator) Compare( infrallm.GenerateOptions{ Temperature: 0.1, MaxTokens: defaultDecisionCompareMaxTokens, - Thinking: infrallm.ThinkingModeDisabled, - Metadata: map[string]any{ - "stage": "memory_decision_compare", - }, + Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking), }, ) if err != nil { @@ -128,3 +125,11 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m candidate.MemoryType, candidate.Content, ) } + +// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。 +func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode { + if enabled { + return infrallm.ThinkingModeEnabled + } + return infrallm.ThinkingModeDisabled +} diff --git a/backend/memory/orchestrator/llm_write_orchestrator.go b/backend/memory/orchestrator/llm_write_orchestrator.go index b3c16ab..648df72 100644 --- a/backend/memory/orchestrator/llm_write_orchestrator.go +++ b/backend/memory/orchestrator/llm_write_orchestrator.go @@ -67,7 +67,7 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym infrallm.GenerateOptions{ Temperature: clampTemperature(o.cfg.LLMTemperature), MaxTokens: defaultMemoryExtractMaxTokens, - Thinking: infrallm.ThinkingModeDisabled, + Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking), Metadata: map[string]any{ "stage": "memory_extract", "user_id": payload.UserID, diff --git a/backend/memory/service/config_loader.go b/backend/memory/service/config_loader.go index b87e6c8..7301c2c 100644 --- a/backend/memory/service/config_loader.go +++ b/backend/memory/service/config_loader.go @@ -40,6 +40,7 @@ func LoadConfigFromViper() memorymodel.Config { DecisionFallbackMode: viper.GetString("memory.decision.fallbackMode"), WriteMode: viper.GetString("memory.write.mode"), WriteMinConfidence: viper.GetFloat64("memory.write.minConfidence"), + LLMThinking: viper.GetBool("agent.thinking.memory"), } if cfg.Threshold <= 0 { diff --git a/backend/newAgent/model/graph_run_state.go b/backend/newAgent/model/graph_run_state.go index 1628070..8e2d1db 100644 --- a/backend/newAgent/model/graph_run_state.go +++ b/backend/newAgent/model/graph_run_state.go @@ -72,6 +72,11 @@ type AgentGraphDeps struct { RoughBuildFunc RoughBuildFunc // 按 Service 注入,粗排算法入口 WriteSchedulePreview WriteSchedulePreviewFunc // 按 Service 注入,排程预览写入入口 + // thinking 开关:由 config.yaml 的 agent.thinking 段注入,各节点按需读取。 + ThinkingPlan bool + ThinkingExecute bool + ThinkingDeliver bool + // 记忆预取管线:由 service 层启动的后台检索 goroutine 写入。 // channel 携带已渲染的文本内容(非原始 ItemDTO),节点直接写入 pinned block。 MemoryFuture chan string // buffered(1),携带 renderMemoryPinnedContentByMode 的输出 diff --git a/backend/newAgent/model/plan_contract.go b/backend/newAgent/model/plan_contract.go index ee14be4..b04e4d1 100644 --- a/backend/newAgent/model/plan_contract.go +++ b/backend/newAgent/model/plan_contract.go @@ -52,7 +52,6 @@ type PlanDecision struct { Action PlanAction `json:"action"` Reason string `json:"reason,omitempty"` Complexity PlanComplexity `json:"complexity"` - NeedThinking bool `json:"need_thinking"` PlanSteps []PlanStep `json:"plan_steps,omitempty"` NeedsRoughBuild bool `json:"needs_rough_build,omitempty"` TaskClassIDs []int `json:"task_class_ids,omitempty"` diff --git a/backend/newAgent/node/agent_nodes.go b/backend/newAgent/node/agent_nodes.go index 42b42e7..29c37df 100644 --- a/backend/newAgent/node/agent_nodes.go +++ b/backend/newAgent/node/agent_nodes.go @@ -120,6 +120,7 @@ func (n *AgentNodes) Plan(ctx context.Context, st *newagentmodel.AgentGraphState ChunkEmitter: st.EnsureChunkEmitter(), ResumeNode: "plan", AlwaysExecute: st.Request.AlwaysExecute, + ThinkingEnabled: st.Deps.ThinkingPlan, }, ); err != nil { return nil, err @@ -230,6 +231,7 @@ func (n *AgentNodes) Execute(ctx context.Context, st *newagentmodel.AgentGraphSt WriteSchedulePreview: st.Deps.WriteSchedulePreview, OriginalScheduleState: st.OriginalScheduleState, AlwaysExecute: st.Request.AlwaysExecute, + ThinkingEnabled: st.Deps.ThinkingExecute, }, ); err != nil { return nil, err @@ -277,6 +279,7 @@ func (n *AgentNodes) Deliver(ctx context.Context, st *newagentmodel.AgentGraphSt ConversationContext: st.EnsureConversationContext(), Client: st.Deps.ResolveDeliverClient(), ChunkEmitter: st.EnsureChunkEmitter(), + ThinkingEnabled: st.Deps.ThinkingDeliver, }, ); err != nil { return nil, err diff --git a/backend/newAgent/node/deliver.go b/backend/newAgent/node/deliver.go index 28978b8..d72ca07 100644 --- a/backend/newAgent/node/deliver.go +++ b/backend/newAgent/node/deliver.go @@ -32,6 +32,7 @@ type DeliverNodeInput struct { ConversationContext *newagentmodel.ConversationContext Client *infrallm.Client ChunkEmitter *newagentstream.ChunkEmitter + ThinkingEnabled bool // 是否开启 thinking,由 config.yaml 的 agent.thinking.deliver 注入 } // RunDeliverNode 执行一轮交付节点逻辑。 @@ -64,7 +65,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error { } // 2. 调 LLM 生成交付总结。 - summary := generateDeliverSummary(ctx, input.Client, flowState, conversationContext) + summary := generateDeliverSummary(ctx, input.Client, flowState, conversationContext, input.ThinkingEnabled) // 3. 伪流式推送总结。 if strings.TrimSpace(summary) != "" { @@ -98,6 +99,7 @@ func generateDeliverSummary( client *infrallm.Client, flowState *newagentmodel.CommonState, conversationContext *newagentmodel.ConversationContext, + thinkingEnabled bool, ) string { if flowState != nil { switch { @@ -119,7 +121,7 @@ func generateDeliverSummary( infrallm.GenerateOptions{ Temperature: 0.5, MaxTokens: 800, - Thinking: infrallm.ThinkingModeDisabled, + Thinking: resolveThinkingMode(thinkingEnabled), Metadata: map[string]any{ "stage": deliverStageName, }, diff --git a/backend/newAgent/node/execute.go b/backend/newAgent/node/execute.go index 8ada052..26532d1 100644 --- a/backend/newAgent/node/execute.go +++ b/backend/newAgent/node/execute.go @@ -59,6 +59,7 @@ type ExecuteNodeInput struct { WriteSchedulePreview newagentmodel.WriteSchedulePreviewFunc OriginalScheduleState *schedule.ScheduleState AlwaysExecute bool // true 时写工具跳过确认闸门直接执行 + ThinkingEnabled bool // 是否开启 thinking,由 config.yaml 的 agent.thinking.execute 注入 } // ExecuteRoundObservation 记录执行阶段每轮的关键观察。 @@ -203,7 +204,7 @@ func RunExecuteNode(ctx context.Context, input ExecuteNodeInput) error { infrallm.GenerateOptions{ Temperature: 1.0, // thinking 模式强制要求 temperature=1 MaxTokens: 16000, // 需为 thinking chain 留出足够预算 - Thinking: infrallm.ThinkingModeEnabled, + Thinking: resolveThinkingMode(input.ThinkingEnabled), Metadata: map[string]any{ "stage": executeStageName, "step_index": flowState.CurrentStep, diff --git a/backend/newAgent/node/plan.go b/backend/newAgent/node/plan.go index 28664ce..a7cb50d 100644 --- a/backend/newAgent/node/plan.go +++ b/backend/newAgent/node/plan.go @@ -35,19 +35,19 @@ type PlanNodeInput struct { ChunkEmitter *newagentstream.ChunkEmitter ResumeNode string AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点 + ThinkingEnabled bool // 是否开启 thinking,由 config.yaml 的 agent.thinking.plan 注入 } // RunPlanNode 执行一轮规划节点逻辑。 // // 步骤说明: -// 1. 先校验最小依赖,并推送一条”正在规划”的状态,避免用户空等; -// 2. Phase 1(快速评估):不开 thinking,让 LLM 同时产出复杂度评估和规划结果; -// 3. Phase 2(深度规划):若 LLM 自评需要深度思考且规划已完成,开 thinking 重跑; -// 4. 若模型先对用户说了话,则先把 speak 伪流式推给前端,并写回 history; -// 5. 最后按 action 推进流程: -// 5.1 continue:继续停留在 planning; -// 5.2 ask_user:打开 pending interaction,后续交给 interrupt 收口; -// 5.3 plan_done:固化完整计划,刷新 pinned context,并进入 waiting_confirm。 +// 1. 先校验最小依赖,并推送一条"正在规划"的状态,避免用户空等; +// 2. 单轮深度规划:开 thinking、无 token 上限,让 LLM 一步到位产出完整计划; +// 3. 若模型先对用户说了话,则先把 speak 伪流式推给前端,并写回 history; +// 4. 最后按 action 推进流程: +// 4.1 continue:继续停留在 planning; +// 4.2 ask_user:打开 pending interaction,后续交给 interrupt 收口; +// 4.3 plan_done:固化完整计划,刷新 pinned context,并进入 waiting_confirm。 func RunPlanNode(ctx context.Context, input PlanNodeInput) error { runtimeState, conversationContext, emitter, err := preparePlanNodeInput(input) if err != nil { @@ -69,68 +69,31 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error { // 2. 构造本轮规划输入。 messages := newagentprompt.BuildPlanMessages(flowState, conversationContext, input.UserInput) - // 3. Phase 1:快速评估(开 thinking),让 LLM 同时产出复杂度评估和规划结果。 + // 3. 单轮深度规划:由配置决定是否开启 thinking,不做 token 上限约束。 decision, rawResult, err := infrallm.GenerateJSON[newagentmodel.PlanDecision]( ctx, input.Client, messages, infrallm.GenerateOptions{ Temperature: 0.2, - MaxTokens: 1600, - Thinking: infrallm.ThinkingModeEnabled, + Thinking: resolveThinkingMode(input.ThinkingEnabled), Metadata: map[string]any{ "stage": planStageName, - "phase": "assessment", + "phase": "planning", }, }, ) if err != nil { if rawResult != nil && strings.TrimSpace(rawResult.Text) != "" { - return fmt.Errorf("规划评估解析失败,原始输出=%s,错误=%w", strings.TrimSpace(rawResult.Text), err) + return fmt.Errorf("规划解析失败,原始输出=%s,错误=%w", strings.TrimSpace(rawResult.Text), err) } - return fmt.Errorf("规划评估阶段模型调用失败: %w", err) + return fmt.Errorf("规划阶段模型调用失败: %w", err) } if err := decision.Validate(); err != nil { - return fmt.Errorf("规划评估决策不合法: %w", err) + return fmt.Errorf("规划决策不合法: %w", err) } - // 4. Phase 2:若 LLM 自评需要深度思考且本轮规划已完成,则开启 thinking 重跑。 - // 条件:NeedThinking=true + Action=plan_done → 说明 LLM 认为当前无 thinking 的计划质量不够。 - // 其他 action(continue / ask_user)不需要 thinking,直接用 Phase 1 结果。 - if decision.NeedThinking && decision.Action == newagentmodel.PlanActionDone { - if err := emitter.EmitStatus( - planStatusBlockID, - planStageName, - "deep_planning", - "正在深入思考,生成更完善的计划。", - false, - ); err != nil { - return fmt.Errorf("深度规划状态推送失败: %w", err) - } - - deepDecision, _, deepErr := infrallm.GenerateJSON[newagentmodel.PlanDecision]( - ctx, - input.Client, - messages, - infrallm.GenerateOptions{ - Temperature: 0.2, - MaxTokens: 3200, - Thinking: infrallm.ThinkingModeEnabled, - Metadata: map[string]any{ - "stage": planStageName, - "phase": "deep_planning", - }, - }, - ) - if deepErr == nil && deepDecision != nil { - if validateErr := deepDecision.Validate(); validateErr == nil { - decision = deepDecision - } - } - // 深度规划失败时静默降级到 Phase 1 结果,不中断流程。 - } - - // 5. 若模型先对用户说了话,且不是 ask_user(ask_user 交给 interrupt 收口),则先以伪流式推送,再写回 history。 + // 4. 若模型先对用户说了话,且不是 ask_user(ask_user 交给 interrupt 收口),则先以伪流式推送,再写回 history。 if strings.TrimSpace(decision.Speak) != "" && decision.Action != newagentmodel.PlanActionAskUser { if err := emitter.EmitPseudoAssistantText( ctx, @@ -144,7 +107,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error { conversationContext.AppendHistory(schema.AssistantMessage(decision.Speak, nil)) } - // 6. 按规划动作推进流程状态。 + // 5. 按规划动作推进流程状态。 switch decision.Action { case newagentmodel.PlanActionContinue: flowState.Phase = newagentmodel.PhasePlanning @@ -169,10 +132,10 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error { } } // always_execute 开启时,计划层跳过确认闸门,直接进入执行阶段。 - // 这样可以与 Execute 节点的“写工具跳过确认”语义保持一致。 + // 这样可以与 Execute 节点的"写工具跳过确认"语义保持一致。 if input.AlwaysExecute { // 1. 自动执行模式不会经过 Confirm 卡片,因此这里先把完整计划明确展示给用户。 - // 2. 摘要格式复用 Confirm 节点,保证“手动确认”和“自动执行”两条链路文案一致。 + // 2. 摘要格式复用 Confirm 节点,保证"手动确认"和"自动执行"两条链路文案一致。 // 3. 推流后同步写入历史,确保后续 Execute 阶段的上下文也能看到这份计划。 summary := strings.TrimSpace(buildPlanSummary(decision.PlanSteps)) if summary != "" { @@ -296,3 +259,12 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string { } return strings.TrimSpace(strings.Join(lines, "\n\n")) } + +// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。 +// 供 plan / execute / deliver 节点统一使用。 +func resolveThinkingMode(enabled bool) infrallm.ThinkingMode { + if enabled { + return infrallm.ThinkingModeEnabled + } + return infrallm.ThinkingModeDisabled +} diff --git a/backend/newAgent/prompt/plan.go b/backend/newAgent/prompt/plan.go index 6b8a0ce..c5e3bd6 100644 --- a/backend/newAgent/prompt/plan.go +++ b/backend/newAgent/prompt/plan.go @@ -21,8 +21,7 @@ const planSystemPrompt = ` 5. plan_steps 必须使用自然语言,便于后端将完整 plan 重新注入到后续上下文顶部。 6. 只输出 JSON,不要输出 markdown,不要输出额外解释,不要在 JSON 外再补文字。 7. 每次输出前先评估任务复杂度:simple(简单明确,无复杂依赖)、moderate(多步操作,需要一定推理)、complex(需要深度推理、多方案比较或复杂依赖关系)。 -8. 根据复杂度判断 need_thinking:你是否需要深度思考才能生成高质量计划?当不确定时倾向于 false。 -9. 粗排识别规则:若满足以下两个条件,在 action=plan_done 时附加 needs_rough_build=true 和 task_class_ids: +8. 粗排识别规则:若满足以下两个条件,在 action=plan_done 时附加 needs_rough_build=true 和 task_class_ids: 条件1:用户输入中存在"任务类 ID"字段(见上下文"任务类 ID"部分); 条件2:用户意图明确是"批量安排/帮我排课/把任务类排进日程"等批量调度需求。 满足时:后端会在用户确认计划后自动运行粗排算法(硬性约束已由算法保证,无需 LLM 校验)。 @@ -99,7 +98,6 @@ func BuildPlanDecisionContractText() string { - action:只能是 %s / %s / %s - reason:给后端和日志看的简短说明 - complexity:任务复杂度,只能是 simple / moderate / complex -- need_thinking:是否需要深度思考才能生成高质量计划,只能是 true / false - plan_steps:仅当 action=%s 时允许返回;返回时必须是完整计划,不是增量 - plan_steps[].content:步骤正文,必填 - plan_steps[].done_when:可选,建议写"什么情况下算这一步做完" @@ -112,7 +110,6 @@ func BuildPlanDecisionContractText() string { "action": "%s", "reason": "当前信息已足够继续规划", "complexity": "moderate", - "need_thinking": false } { @@ -120,7 +117,6 @@ func BuildPlanDecisionContractText() string { "action": "%s", "reason": "当前时间范围仍不明确", "complexity": "simple", - "need_thinking": false } { @@ -128,7 +124,7 @@ func BuildPlanDecisionContractText() string { "action": "%s", "reason": "当前计划已具备执行条件", "complexity": "simple", - "need_thinking": false, + "plan_steps": [ { "content": "先确认本周可用时间范围", diff --git a/backend/service/agentsvc/agent.go b/backend/service/agentsvc/agent.go index c777bc8..97c1b51 100644 --- a/backend/service/agentsvc/agent.go +++ b/backend/service/agentsvc/agent.go @@ -104,14 +104,10 @@ func thinkingModeToBool(mode string) bool { // pickChatModel 根据请求选择模型。 // 当前约定: -// - strategist:策略模型; -// - 其余值默认 worker(包含空字符串场景)。 +// - 旧链路已全面切到 newAgent graph,这里仅作为 runNormalChatFlow 回退时的模型选择入口; +// - 统一返回 Pro 模型,旧 strategist 参数不再生效。 func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) { - modelName := strings.TrimSpace(requestModel) - if strings.EqualFold(modelName, "strategist") { - return s.AIHub.Strategist, "strategist" - } - return s.AIHub.Worker, "worker" + return s.AIHub.Pro, "pro" } // PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。 diff --git a/backend/service/agentsvc/agent_meta.go b/backend/service/agentsvc/agent_meta.go index e63c501..c3ea28f 100644 --- a/backend/service/agentsvc/agent_meta.go +++ b/backend/service/agentsvc/agent_meta.go @@ -278,15 +278,15 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history [] } // pickTitleModel 选择用于标题生成的模型。 -// 优先 worker(成本低、速度快);worker 不可用时回退 strategist。 +// 优先 Lite(成本低、速度快);Lite 不可用时回退 Pro。 func (s *AgentService) pickTitleModel() *ark.ChatModel { if s.AIHub == nil { return nil } - if s.AIHub.Worker != nil { - return s.AIHub.Worker + if s.AIHub.Lite != nil { + return s.AIHub.Lite } - return s.AIHub.Strategist + return s.AIHub.Pro } // buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。 diff --git a/backend/service/agentsvc/agent_newagent.go b/backend/service/agentsvc/agent_newagent.go index 159bb36..9521733 100644 --- a/backend/service/agentsvc/agent_newagent.go +++ b/backend/service/agentsvc/agent_newagent.go @@ -15,6 +15,7 @@ import ( newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule" "github.com/cloudwego/eino/schema" + "github.com/spf13/viper" agentchat "github.com/LoveLosita/smartflow/backend/agent/chat" "github.com/LoveLosita/smartflow/backend/conv" @@ -149,10 +150,12 @@ func (s *AgentService) runNewAgentGraph( graphRequest.Normalize() // 7. 适配 LLM clients(从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client)。 - chatClient := infrallm.WrapArkClient(s.AIHub.Worker) - planClient := infrallm.WrapArkClient(s.AIHub.Worker) - executeClient := infrallm.WrapArkClient(s.AIHub.Worker) - deliverClient := infrallm.WrapArkClient(s.AIHub.Worker) + // 7.1 Chat/Deliver 使用 Pro 模型:路由分流、闲聊、交付总结属于标准复杂度。 + // 7.2 Plan/Execute 使用 Max 模型:规划和 ReAct 循环需要深度推理能力。 + chatClient := infrallm.WrapArkClient(s.AIHub.Pro) + planClient := infrallm.WrapArkClient(s.AIHub.Max) + executeClient := infrallm.WrapArkClient(s.AIHub.Max) + deliverClient := infrallm.WrapArkClient(s.AIHub.Pro) // 8. 适配 SSE emitter。 sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan) @@ -173,6 +176,9 @@ func (s *AgentService) runNewAgentGraph( RoughBuildFunc: s.makeRoughBuildFunc(), WriteSchedulePreview: s.makeWriteSchedulePreviewFunc(), MemoryFuture: memoryFuture, + ThinkingPlan: viper.GetBool("agent.thinking.plan"), + ThinkingExecute: viper.GetBool("agent.thinking.execute"), + ThinkingDeliver: viper.GetBool("agent.thinking.deliver"), } // 10. 构造 AgentGraphRunInput 并运行 graph。 @@ -190,8 +196,8 @@ func (s *AgentService) runNewAgentGraph( log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr) pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr)) - // Graph 出错时回退普通聊天,保证可用性。 - s.runNormalChatFlow(requestCtx, s.AIHub.Worker, resolvedModelName, userMessage, "", nil, retryMeta, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan) + // Graph 出错时回退普通聊天,保证可用性。回退使用 Pro 模型。 + s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, "", nil, retryMeta, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan) return } diff --git a/frontend/src/components/assistant/ContextWindowMeter.vue b/frontend/src/components/assistant/ContextWindowMeter.vue index 7978808..7ea4af0 100644 --- a/frontend/src/components/assistant/ContextWindowMeter.vue +++ b/frontend/src/components/assistant/ContextWindowMeter.vue @@ -3,14 +3,6 @@ import { computed } from 'vue' import type { ConversationContextStats } from '@/types/dashboard' -interface ContextSegment { - key: 'msg0' | 'msg1' | 'msg2' | 'msg3' - label: string - value: number - widthPercent: number - color: string -} - const props = withDefaults( defineProps<{ stats?: ConversationContextStats | null @@ -33,6 +25,14 @@ const usagePercent = computed(() => { return Math.round((safeStats.value.total / safeStats.value.budget) * 100) }) +const barWidthPercent = computed(() => { + if (!safeStats.value || safeStats.value.budget <= 0) { + return 0 + } + // 1. 按 total / budget 计算宽度,上限 100%(超预算时撑满进度条)。 + return Math.min(100, (safeStats.value.total / safeStats.value.budget) * 100) +}) + const isOverBudget = computed(() => { if (!safeStats.value) { return false @@ -40,31 +40,6 @@ const isOverBudget = computed(() => { return safeStats.value.total > safeStats.value.budget }) -const segments = computed(() => { - const stats = safeStats.value - if (!stats) { - return [] - } - - // 1. 进度条固定做成紧凑胶囊,因此按 max(total, budget) 计算比例,既保留预算留白,也兼容超预算占满。 - // 2. 四段颜色继续对应后端 msg0~msg3 的真实语义,避免前端为了视觉压缩而打乱统计含义。 - // 3. 零值段不渲染,减少窄尺寸下的噪点,让小组件也能保留基本可读性。 - const base = Math.max(stats.total, stats.budget, 1) - const rawSegments = [ - { key: 'msg0', label: '规则', value: stats.msg0, color: 'linear-gradient(90deg, #2556c7, #3b82f6)' }, - { key: 'msg1', label: '历史', value: stats.msg1, color: 'linear-gradient(90deg, #0f766e, #14b8a6)' }, - { key: 'msg2', label: '执行', value: stats.msg2, color: 'linear-gradient(90deg, #b45309, #f59e0b)' }, - { key: 'msg3', label: '当前', value: stats.msg3, color: 'linear-gradient(90deg, #15803d, #22c55e)' }, - ] as const - - return rawSegments - .filter((segment) => segment.value > 0) - .map((segment) => ({ - ...segment, - widthPercent: Math.max(0, Math.min(100, (segment.value / base) * 100)), - })) -}) - const usageText = computed(() => { if (props.loading) { return '...' @@ -86,9 +61,7 @@ const tooltipText = computed(() => { return props.disabled ? '新会话发送首条消息后展示上下文窗口统计' : '当前会话暂无上下文窗口统计' } - const segmentText = segments.value.map((segment) => `${segment.label} ${segment.value}`).join(' / ') - const usageSummary = `总计 ${safeStats.value.total} / 预算 ${safeStats.value.budget}(${usagePercent.value}%)` - return segmentText ? `${usageSummary};${segmentText}` : usageSummary + return `总计 ${safeStats.value.total} / 预算 ${safeStats.value.budget}(${usagePercent.value}%)` }) @@ -106,18 +79,7 @@ const tooltipText = computed(() => {