Version: 0.9.25.dev.260417

后端： 1. AIHub 模型分级从 Worker/Strategist 两级重构为 Lite/Pro/Max 三级 - AIHub 结构体从 Worker + Strategist 改为 Lite + Pro + Max，分别对应轻量（标题生成）、标准（Chat 路由/闲聊/交付总结）、高能力（Plan 规划/Execute ReAct）三个能力层级 - config.example.yaml 新增 liteModel / proModel / maxModel 三个模型配置项，替代原 workerModel / strategistModel - 启动层 InitEino 改为创建三个独立模型实例，抽取公共 baseURL 和 apiKey 减少重复 - pickChatModel 统一返回 Pro 模型，旧 strategist 参数不再生效；pickTitleModel 从 Worker 切到 Lite - runNewAgentGraph 按 Plan/Execute→Max、Chat/Deliver→Pro 分级注入；Graph 出错回退也切到 Pro - Memory 模块初始化从 Worker 改为 Pro 2. Plan 节点从"两阶段评估"简化为"单轮深度规划"，thinking 开关改为全配置化 - 移除 Phase 1（快速评估 1600 token）+ Phase 2（深度规划 3200 token）的两轮调用逻辑，改为单轮不限 token 深度规划 - PlanDecision 移除 need_thinking 字段，prompt 规则和 JSON contract 同步删除该字段 - 各节点（Plan / Execute / Deliver）thinking 开关从硬编码改为从 AgentGraphDeps 读取，由 config.yaml 的 agent.thinking 段按节点注入 - 新增 agent.thinking 配置段（plan / execute / deliver / memory 四个独立布尔开关），config.example.yaml 补齐默认值 - 新增 resolveThinkingMode 公共函数，plan / execute / deliver 和 memory 决策/抽取链路统一使用 3. Memory 模块 LLM 调用支持 thinking 开关 - Config 新增 LLMThinking 字段，config_loader 从 agent.thinking.memory 读取 - LLMDecisionOrchestrator.Compare 和 LLMWriteOrchestrator.ExtractFacts 的 thinking 模式从硬编码 Disabled 改为读取配置前端： 1. 移除助手输入区模型选择器及全部偏好持久化逻辑 - 删除 ModelType 类型、selectedModel ref、MODEL_PREFERENCE_STORAGE_KEY 常量 - 删除 isModelType / loadModelPreferenceMap / persistModelPreferenceMap / savePreferredModel / resolvePreferredModel / applyPreferredModelForConversation 六个函数及 modelPreferenceMap ref - 删除 selectedModel watch 监听、发送消息时的 savePreferredModel 调用、切会话时的 applyPreferredModelForConversation 调用、会话迁移时的模型偏好迁移 - fetchChatStream 的 model 参数硬编码为 'worker' - 删除模板中"模型"下拉选择器（标准/策略）及对应的全局样式 .assistant-model-select-panel 2. 上下文窗口指示器简化为仅显示总占用 - ContextWindowMeter 移除 msg0~msg3 四段彩色分段逻辑（ContextSegment 接口、segments computed、v-for 渲染） - 进度条改为单一蓝色条，按 total/budget 比例填充；超预算时变红 - Tooltip 简化为仅显示"总计 X / 预算 Y（Z%）" 仓库：无
2026-04-17 12:27:04 +08:00
parent dd6638f8db
commit d47a8bcabd
19 changed files with 147 additions and 306 deletions
--- a/backend/cmd/start.go
+++ b/backend/cmd/start.go
@@ -83,7 +83,7 @@ func Start() {
 	memoryMetrics := memoryobserve.NewMetricsRegistry()
 	memoryModule := memory.NewModuleWithObserve(
 		db,
-		infrallm.WrapArkClient(aiHub.Worker),
+		infrallm.WrapArkClient(aiHub.Pro),
 		ragRuntime,
 		memoryCfg,
 		memory.ObserveDeps{
--- a/backend/config.example.yaml
+++ b/backend/config.example.yaml
@@ -67,16 +67,27 @@ time:

 # 智能体模型与规划参数。
 agent:
-  # 日常执行链路使用的主模型。
-  workerModel: "doubao-seed-2-0-code-preview-260215"
-  # 规划、拆解、策略推导使用的模型。
-  strategistModel: "doubao-seed-2-0-code-preview-260215"
+  # 轻量模型：标题生成等低复杂度、低延迟场景。
+  liteModel: "doubao-seed-2-0-code-preview-260215"
+  # 标准模型：Chat 路由/闲聊/深度回答/Deliver 总结。
+  proModel: "doubao-seed-2-0-code-preview-260215"
+  # 高能力模型：Plan 规划 + Execute ReAct 等深度推理场景。
+  maxModel: "doubao-seed-2-0-code-preview-260215"
  # 模型服务根路径。
  baseURL: "https://ark.cn-beijing.volces.com/api/v3"
  # 日内并发优化并发度，建议按模型配额调整。
  dailyRefineConcurrency: 7
  # 周级跨天配平额度上限，防止过度调整。
  weeklyAdjustBudget: 5
+  thinking:
+    # plan 节点（单轮深度规划），默认开 thinking。
+    plan: true
+    # execute 节点（ReAct 深度推理），默认开 thinking。
+    execute: true
+    # deliver 节点（交付总结），默认关 thinking。
+    deliver: false
+    # 记忆模块（决策比对 + 抽取），默认关 thinking。
+    memory: false

 # 通用 RAG 配置。
 rag:
--- a/backend/inits/eino.go
+++ b/backend/inits/eino.go
@@ -8,32 +8,53 @@ import (
 	"github.com/spf13/viper"
 )

-// AIHub 存储不同能力的模型实例
+// AIHub 存储三级模型的实例，按能力分级调度。
+//
+// 分级策略：
+// 1. Lite：轻量模型，用于标题生成等低复杂度、低延迟场景；
+// 2. Pro：标准模型，用于 Chat 路由/闲聊/深度回答/Deliver 总结；
+// 3. Max：高能力模型，用于 Plan 规划和 Execute ReAct 等需要深度推理的场景。
 type AIHub struct {
-	Strategist *ark.ChatModel // 智力担当：处理复杂排程逻辑
-	Worker     *ark.ChatModel // 效率担当：处理简单任务、总结
+	Lite *ark.ChatModel // 轻量模型：标题生成等低复杂度任务
+	Pro  *ark.ChatModel // 标准模型：Chat 路由、闲聊、交付总结
+	Max  *ark.ChatModel // 高能力模型：Plan 规划、Execute ReAct
 }

 func InitEino() (*AIHub, error) {
 	ctx := context.Background()
-	worker, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
-		Model:   viper.GetString("agent.workerModel"), // 使用的模型版本
-		BaseURL: viper.GetString("agent.baseURL"),     // Eino API 的基础 URL
-		APIKey:  os.Getenv("ARK_API_KEY"),             // API 密钥
+	baseURL := viper.GetString("agent.baseURL")
+	apiKey := os.Getenv("ARK_API_KEY")
+
+	// 1. Lite 模型：标题生成等低复杂度场景，优先控制成本和延迟。
+	lite, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		Model:   viper.GetString("agent.liteModel"),
+		BaseURL: baseURL,
+		APIKey:  apiKey,
 	})
 	if err != nil {
 		return nil, err
 	}
-	strategist, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
-		Model:   viper.GetString("agent.strategistModel"), // 使用的模型版本
-		BaseURL: viper.GetString("agent.baseURL"),         // Eino API 的基础 URL
-		APIKey:  os.Getenv("ARK_API_KEY"),                 // API 密钥
+	// 2. Pro 模型：Chat 路由/闲聊/交付总结等标准复杂度场景。
+	pro, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		Model:   viper.GetString("agent.proModel"),
+		BaseURL: baseURL,
+		APIKey:  apiKey,
+	})
+	if err != nil {
+		return nil, err
+	}
+	// 3. Max 模型：Plan 规划和 Execute ReAct 等需要深度推理的场景。
+	maxModel, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		Model:   viper.GetString("agent.maxModel"),
+		BaseURL: baseURL,
+		APIKey:  apiKey,
 	})
 	if err != nil {
 		return nil, err
 	}
 	return &AIHub{
-		Strategist: strategist,
-		Worker:     worker,
+		Lite: lite,
+		Pro:  pro,
+		Max:  maxModel,
 	}, nil
 }
--- a/backend/memory/model/config.go
+++ b/backend/memory/model/config.go
@@ -72,6 +72,9 @@ type Config struct {
 	// 2. 默认 0.5，与"守门员"prompt 的 confidence>=0.5 输出规则配合；
 	// 3. fallback 路径 confidence 设为 0.45，低于默认阈值，LLM 不可用时不写入。
 	WriteMinConfidence float64
+
+	// 记忆模块 LLM 调用是否开启 thinking，由 config.yaml 的 agent.thinking.memory 注入。
+	LLMThinking bool
 }

 // NormalizeReadMode 统一读取模式字符串。
--- a/backend/memory/orchestrator/llm_decision_orchestrator.go
+++ b/backend/memory/orchestrator/llm_decision_orchestrator.go
@@ -62,10 +62,7 @@ func (o *LLMDecisionOrchestrator) Compare(
 		infrallm.GenerateOptions{
 			Temperature: 0.1,
 			MaxTokens:   defaultDecisionCompareMaxTokens,
-			Thinking:    infrallm.ThinkingModeDisabled,
-			Metadata: map[string]any{
-				"stage": "memory_decision_compare",
-			},
+			Thinking:    resolveMemoryThinkingMode(o.cfg.LLMThinking),
 		},
 	)
 	if err != nil {
@@ -128,3 +125,11 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
 		candidate.MemoryType, candidate.Content,
 	)
 }
+
+// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
+func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode {
+	if enabled {
+		return infrallm.ThinkingModeEnabled
+	}
+	return infrallm.ThinkingModeDisabled
+}
--- a/backend/memory/orchestrator/llm_write_orchestrator.go
+++ b/backend/memory/orchestrator/llm_write_orchestrator.go
@@ -67,7 +67,7 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
 		infrallm.GenerateOptions{
 			Temperature: clampTemperature(o.cfg.LLMTemperature),
 			MaxTokens:   defaultMemoryExtractMaxTokens,
-			Thinking:    infrallm.ThinkingModeDisabled,
+			Thinking:    resolveMemoryThinkingMode(o.cfg.LLMThinking),
 			Metadata: map[string]any{
 				"stage":           "memory_extract",
 				"user_id":         payload.UserID,
--- a/backend/memory/service/config_loader.go
+++ b/backend/memory/service/config_loader.go
@@ -40,6 +40,7 @@ func LoadConfigFromViper() memorymodel.Config {
 		DecisionFallbackMode:      viper.GetString("memory.decision.fallbackMode"),
 		WriteMode:                 viper.GetString("memory.write.mode"),
 		WriteMinConfidence:        viper.GetFloat64("memory.write.minConfidence"),
+		LLMThinking:               viper.GetBool("agent.thinking.memory"),
 	}

 	if cfg.Threshold <= 0 {
--- a/backend/newAgent/model/graph_run_state.go
+++ b/backend/newAgent/model/graph_run_state.go
@@ -72,6 +72,11 @@ type AgentGraphDeps struct {
 	RoughBuildFunc       RoughBuildFunc           // 按 Service 注入，粗排算法入口
 	WriteSchedulePreview WriteSchedulePreviewFunc // 按 Service 注入，排程预览写入入口

+	// thinking 开关：由 config.yaml 的 agent.thinking 段注入，各节点按需读取。
+	ThinkingPlan    bool
+	ThinkingExecute bool
+	ThinkingDeliver bool
+
 	// 记忆预取管线：由 service 层启动的后台检索 goroutine 写入。
 	// channel 携带已渲染的文本内容（非原始 ItemDTO），节点直接写入 pinned block。
 	MemoryFuture   chan string // buffered(1)，携带 renderMemoryPinnedContentByMode 的输出
--- a/backend/newAgent/model/plan_contract.go
+++ b/backend/newAgent/model/plan_contract.go
@@ -52,7 +52,6 @@ type PlanDecision struct {
 	Action          PlanAction     `json:"action"`
 	Reason          string         `json:"reason,omitempty"`
 	Complexity      PlanComplexity `json:"complexity"`
-	NeedThinking    bool           `json:"need_thinking"`
 	PlanSteps       []PlanStep     `json:"plan_steps,omitempty"`
 	NeedsRoughBuild bool           `json:"needs_rough_build,omitempty"`
 	TaskClassIDs    []int          `json:"task_class_ids,omitempty"`
--- a/backend/newAgent/node/agent_nodes.go
+++ b/backend/newAgent/node/agent_nodes.go
@@ -120,6 +120,7 @@ func (n *AgentNodes) Plan(ctx context.Context, st *newagentmodel.AgentGraphState
 			ChunkEmitter:        st.EnsureChunkEmitter(),
 			ResumeNode:          "plan",
 			AlwaysExecute:       st.Request.AlwaysExecute,
+			ThinkingEnabled:     st.Deps.ThinkingPlan,
 		},
 	); err != nil {
 		return nil, err
@@ -230,6 +231,7 @@ func (n *AgentNodes) Execute(ctx context.Context, st *newagentmodel.AgentGraphSt
 			WriteSchedulePreview:  st.Deps.WriteSchedulePreview,
 			OriginalScheduleState: st.OriginalScheduleState,
 			AlwaysExecute:         st.Request.AlwaysExecute,
+			ThinkingEnabled:       st.Deps.ThinkingExecute,
 		},
 	); err != nil {
 		return nil, err
@@ -277,6 +279,7 @@ func (n *AgentNodes) Deliver(ctx context.Context, st *newagentmodel.AgentGraphSt
 			ConversationContext: st.EnsureConversationContext(),
 			Client:              st.Deps.ResolveDeliverClient(),
 			ChunkEmitter:        st.EnsureChunkEmitter(),
+			ThinkingEnabled:     st.Deps.ThinkingDeliver,
 		},
 	); err != nil {
 		return nil, err
--- a/backend/newAgent/node/deliver.go
+++ b/backend/newAgent/node/deliver.go
@@ -32,6 +32,7 @@ type DeliverNodeInput struct {
 	ConversationContext *newagentmodel.ConversationContext
 	Client              *infrallm.Client
 	ChunkEmitter        *newagentstream.ChunkEmitter
+	ThinkingEnabled     bool // 是否开启 thinking，由 config.yaml 的 agent.thinking.deliver 注入
 }

 // RunDeliverNode 执行一轮交付节点逻辑。
@@ -64,7 +65,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
 	}

 	// 2. 调 LLM 生成交付总结。
-	summary := generateDeliverSummary(ctx, input.Client, flowState, conversationContext)
+	summary := generateDeliverSummary(ctx, input.Client, flowState, conversationContext, input.ThinkingEnabled)

 	// 3. 伪流式推送总结。
 	if strings.TrimSpace(summary) != "" {
@@ -98,6 +99,7 @@ func generateDeliverSummary(
 	client *infrallm.Client,
 	flowState *newagentmodel.CommonState,
 	conversationContext *newagentmodel.ConversationContext,
+	thinkingEnabled bool,
 ) string {
 	if flowState != nil {
 		switch {
@@ -119,7 +121,7 @@ func generateDeliverSummary(
 		infrallm.GenerateOptions{
 			Temperature: 0.5,
 			MaxTokens:   800,
-			Thinking:    infrallm.ThinkingModeDisabled,
+			Thinking:    resolveThinkingMode(thinkingEnabled),
 			Metadata: map[string]any{
 				"stage": deliverStageName,
 			},
--- a/backend/newAgent/node/execute.go
+++ b/backend/newAgent/node/execute.go
@@ -59,6 +59,7 @@ type ExecuteNodeInput struct {
 	WriteSchedulePreview  newagentmodel.WriteSchedulePreviewFunc
 	OriginalScheduleState *schedule.ScheduleState
 	AlwaysExecute         bool // true 时写工具跳过确认闸门直接执行
+	ThinkingEnabled       bool // 是否开启 thinking，由 config.yaml 的 agent.thinking.execute 注入
 }

 // ExecuteRoundObservation 记录执行阶段每轮的关键观察。
@@ -203,7 +204,7 @@ func RunExecuteNode(ctx context.Context, input ExecuteNodeInput) error {
 		infrallm.GenerateOptions{
 			Temperature: 1.0,   // thinking 模式强制要求 temperature=1
 			MaxTokens:   16000, // 需为 thinking chain 留出足够预算
-			Thinking:    infrallm.ThinkingModeEnabled,
+			Thinking:    resolveThinkingMode(input.ThinkingEnabled),
 			Metadata: map[string]any{
 				"stage":      executeStageName,
 				"step_index": flowState.CurrentStep,
--- a/backend/newAgent/node/plan.go
+++ b/backend/newAgent/node/plan.go
@@ -35,19 +35,19 @@ type PlanNodeInput struct {
 	ChunkEmitter        *newagentstream.ChunkEmitter
 	ResumeNode          string
 	AlwaysExecute       bool // true 时计划生成后自动确认，不进入 confirm 节点
+	ThinkingEnabled     bool // 是否开启 thinking，由 config.yaml 的 agent.thinking.plan 注入
 }

 // RunPlanNode 执行一轮规划节点逻辑。
 //
 // 步骤说明：
-//  1. 先校验最小依赖，并推送一条”正在规划”的状态，避免用户空等；
-//  2. Phase 1（快速评估）：不开 thinking，让 LLM 同时产出复杂度评估和规划结果；
-//  3. Phase 2（深度规划）：若 LLM 自评需要深度思考且规划已完成，开 thinking 重跑；
-//  4. 若模型先对用户说了话，则先把 speak 伪流式推给前端，并写回 history；
-//  5. 最后按 action 推进流程：
-//     5.1 continue：继续停留在 planning；
-//     5.2 ask_user：打开 pending interaction，后续交给 interrupt 收口；
-//     5.3 plan_done：固化完整计划，刷新 pinned context，并进入 waiting_confirm。
+//  1. 先校验最小依赖，并推送一条"正在规划"的状态，避免用户空等；
+//  2. 单轮深度规划：开 thinking、无 token 上限，让 LLM 一步到位产出完整计划；
+//  3. 若模型先对用户说了话，则先把 speak 伪流式推给前端，并写回 history；
+//  4. 最后按 action 推进流程：
+//     4.1 continue：继续停留在 planning；
+//     4.2 ask_user：打开 pending interaction，后续交给 interrupt 收口；
+//     4.3 plan_done：固化完整计划，刷新 pinned context，并进入 waiting_confirm。
 func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 	runtimeState, conversationContext, emitter, err := preparePlanNodeInput(input)
 	if err != nil {
@@ -69,68 +69,31 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 	// 2. 构造本轮规划输入。
 	messages := newagentprompt.BuildPlanMessages(flowState, conversationContext, input.UserInput)

-	// 3. Phase 1：快速评估（开 thinking），让 LLM 同时产出复杂度评估和规划结果。
+	// 3. 单轮深度规划：由配置决定是否开启 thinking，不做 token 上限约束。
 	decision, rawResult, err := infrallm.GenerateJSON[newagentmodel.PlanDecision](
 		ctx,
 		input.Client,
 		messages,
 		infrallm.GenerateOptions{
 			Temperature: 0.2,
-			MaxTokens:   1600,
-			Thinking:    infrallm.ThinkingModeEnabled,
+			Thinking:    resolveThinkingMode(input.ThinkingEnabled),
 			Metadata: map[string]any{
 				"stage": planStageName,
-				"phase": "assessment",
+				"phase": "planning",
 			},
 		},
 	)
 	if err != nil {
 		if rawResult != nil && strings.TrimSpace(rawResult.Text) != "" {
-			return fmt.Errorf("规划评估解析失败，原始输出=%s，错误=%w", strings.TrimSpace(rawResult.Text), err)
+			return fmt.Errorf("规划解析失败，原始输出=%s，错误=%w", strings.TrimSpace(rawResult.Text), err)
 		}
-		return fmt.Errorf("规划评估阶段模型调用失败: %w", err)
+		return fmt.Errorf("规划阶段模型调用失败: %w", err)
 	}
 	if err := decision.Validate(); err != nil {
-		return fmt.Errorf("规划评估决策不合法: %w", err)
+		return fmt.Errorf("规划决策不合法: %w", err)
 	}

-	// 4. Phase 2：若 LLM 自评需要深度思考且本轮规划已完成，则开启 thinking 重跑。
-	//    条件：NeedThinking=true + Action=plan_done → 说明 LLM 认为当前无 thinking 的计划质量不够。
-	//    其他 action（continue / ask_user）不需要 thinking，直接用 Phase 1 结果。
-	if decision.NeedThinking && decision.Action == newagentmodel.PlanActionDone {
-		if err := emitter.EmitStatus(
-			planStatusBlockID,
-			planStageName,
-			"deep_planning",
-			"正在深入思考，生成更完善的计划。",
-			false,
-		); err != nil {
-			return fmt.Errorf("深度规划状态推送失败: %w", err)
-		}
-
-		deepDecision, _, deepErr := infrallm.GenerateJSON[newagentmodel.PlanDecision](
-			ctx,
-			input.Client,
-			messages,
-			infrallm.GenerateOptions{
-				Temperature: 0.2,
-				MaxTokens:   3200,
-				Thinking:    infrallm.ThinkingModeEnabled,
-				Metadata: map[string]any{
-					"stage": planStageName,
-					"phase": "deep_planning",
-				},
-			},
-		)
-		if deepErr == nil && deepDecision != nil {
-			if validateErr := deepDecision.Validate(); validateErr == nil {
-				decision = deepDecision
-			}
-		}
-		// 深度规划失败时静默降级到 Phase 1 结果，不中断流程。
-	}
-
-	// 5. 若模型先对用户说了话，且不是 ask_user（ask_user 交给 interrupt 收口），则先以伪流式推送，再写回 history。
+	// 4. 若模型先对用户说了话，且不是 ask_user（ask_user 交给 interrupt 收口），则先以伪流式推送，再写回 history。
 	if strings.TrimSpace(decision.Speak) != "" && decision.Action != newagentmodel.PlanActionAskUser {
 		if err := emitter.EmitPseudoAssistantText(
 			ctx,
@@ -144,7 +107,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 		conversationContext.AppendHistory(schema.AssistantMessage(decision.Speak, nil))
 	}

-	// 6. 按规划动作推进流程状态。
+	// 5. 按规划动作推进流程状态。
 	switch decision.Action {
 	case newagentmodel.PlanActionContinue:
 		flowState.Phase = newagentmodel.PhasePlanning
@@ -169,10 +132,10 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 			}
 		}
 		// always_execute 开启时，计划层跳过确认闸门，直接进入执行阶段。
-		// 这样可以与 Execute 节点的“写工具跳过确认”语义保持一致。
+		// 这样可以与 Execute 节点的"写工具跳过确认"语义保持一致。
 		if input.AlwaysExecute {
 			// 1. 自动执行模式不会经过 Confirm 卡片，因此这里先把完整计划明确展示给用户。
-			// 2. 摘要格式复用 Confirm 节点，保证“手动确认”和“自动执行”两条链路文案一致。
+			// 2. 摘要格式复用 Confirm 节点，保证"手动确认"和"自动执行"两条链路文案一致。
 			// 3. 推流后同步写入历史，确保后续 Execute 阶段的上下文也能看到这份计划。
 			summary := strings.TrimSpace(buildPlanSummary(decision.PlanSteps))
 			if summary != "" {
@@ -296,3 +259,12 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
 	}
 	return strings.TrimSpace(strings.Join(lines, "\n\n"))
 }
+
+// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
+// 供 plan / execute / deliver 节点统一使用。
+func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
+	if enabled {
+		return infrallm.ThinkingModeEnabled
+	}
+	return infrallm.ThinkingModeDisabled
+}
--- a/backend/newAgent/prompt/plan.go
+++ b/backend/newAgent/prompt/plan.go
@@ -21,8 +21,7 @@ const planSystemPrompt = `
 5. plan_steps 必须使用自然语言，便于后端将完整 plan 重新注入到后续上下文顶部。
 6. 只输出 JSON，不要输出 markdown，不要输出额外解释，不要在 JSON 外再补文字。
 7. 每次输出前先评估任务复杂度：simple（简单明确，无复杂依赖）、moderate（多步操作，需要一定推理）、complex（需要深度推理、多方案比较或复杂依赖关系）。
-8. 根据复杂度判断 need_thinking：你是否需要深度思考才能生成高质量计划？当不确定时倾向于 false。
-9. 粗排识别规则：若满足以下两个条件，在 action=plan_done 时附加 needs_rough_build=true 和 task_class_ids：
+8. 粗排识别规则：若满足以下两个条件，在 action=plan_done 时附加 needs_rough_build=true 和 task_class_ids：
   条件1：用户输入中存在"任务类 ID"字段（见上下文"任务类 ID"部分）；
   条件2：用户意图明确是"批量安排/帮我排课/把任务类排进日程"等批量调度需求。
   满足时：后端会在用户确认计划后自动运行粗排算法（硬性约束已由算法保证，无需 LLM 校验）。
@@ -99,7 +98,6 @@ func BuildPlanDecisionContractText() string {
 - action：只能是 %s / %s / %s
 - reason：给后端和日志看的简短说明
 - complexity：任务复杂度，只能是 simple / moderate / complex
- need_thinking：是否需要深度思考才能生成高质量计划，只能是 true / false
 - plan_steps：仅当 action=%s 时允许返回；返回时必须是完整计划，不是增量
 - plan_steps[].content：步骤正文，必填
 - plan_steps[].done_when：可选，建议写"什么情况下算这一步做完"
@@ -112,7 +110,6 @@ func BuildPlanDecisionContractText() string {
  "action": "%s",
  "reason": "当前信息已足够继续规划",
  "complexity": "moderate",
-  "need_thinking": false
 }

 {
@@ -120,7 +117,6 @@ func BuildPlanDecisionContractText() string {
  "action": "%s",
  "reason": "当前时间范围仍不明确",
  "complexity": "simple",
-  "need_thinking": false
 }

 {
@@ -128,7 +124,7 @@ func BuildPlanDecisionContractText() string {
  "action": "%s",
  "reason": "当前计划已具备执行条件",
  "complexity": "simple",
-  "need_thinking": false,
+  
  "plan_steps": [
    {
      "content": "先确认本周可用时间范围",
--- a/backend/service/agentsvc/agent.go
+++ b/backend/service/agentsvc/agent.go
@@ -104,14 +104,10 @@ func thinkingModeToBool(mode string) bool {

 // pickChatModel 根据请求选择模型。
 // 当前约定：
-// - strategist：策略模型；
-// - 其余值默认 worker（包含空字符串场景）。
+// - 旧链路已全面切到 newAgent graph，这里仅作为 runNormalChatFlow 回退时的模型选择入口；
+// - 统一返回 Pro 模型，旧 strategist 参数不再生效。
 func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
-	modelName := strings.TrimSpace(requestModel)
-	if strings.EqualFold(modelName, "strategist") {
-		return s.AIHub.Strategist, "strategist"
-	}
-	return s.AIHub.Worker, "worker"
+	return s.AIHub.Pro, "pro"
 }

 // PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。
--- a/backend/service/agentsvc/agent_meta.go
+++ b/backend/service/agentsvc/agent_meta.go
@@ -278,15 +278,15 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
 }

 // pickTitleModel 选择用于标题生成的模型。
-// 优先 worker（成本低、速度快）；worker 不可用时回退 strategist。
+// 优先 Lite（成本低、速度快）；Lite 不可用时回退 Pro。
 func (s *AgentService) pickTitleModel() *ark.ChatModel {
 	if s.AIHub == nil {
 		return nil
 	}
-	if s.AIHub.Worker != nil {
-		return s.AIHub.Worker
+	if s.AIHub.Lite != nil {
+		return s.AIHub.Lite
 	}
-	return s.AIHub.Strategist
+	return s.AIHub.Pro
 }

 // buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。
--- a/backend/service/agentsvc/agent_newagent.go
+++ b/backend/service/agentsvc/agent_newagent.go
@@ -15,6 +15,7 @@ import (
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
 	schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
 	"github.com/cloudwego/eino/schema"
+	"github.com/spf13/viper"

 	agentchat "github.com/LoveLosita/smartflow/backend/agent/chat"
 	"github.com/LoveLosita/smartflow/backend/conv"
@@ -149,10 +150,12 @@ func (s *AgentService) runNewAgentGraph(
 	graphRequest.Normalize()

 	// 7. 适配 LLM clients（从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client）。
-	chatClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	planClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	executeClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	deliverClient := infrallm.WrapArkClient(s.AIHub.Worker)
+	// 7.1 Chat/Deliver 使用 Pro 模型：路由分流、闲聊、交付总结属于标准复杂度。
+	// 7.2 Plan/Execute 使用 Max 模型：规划和 ReAct 循环需要深度推理能力。
+	chatClient := infrallm.WrapArkClient(s.AIHub.Pro)
+	planClient := infrallm.WrapArkClient(s.AIHub.Max)
+	executeClient := infrallm.WrapArkClient(s.AIHub.Max)
+	deliverClient := infrallm.WrapArkClient(s.AIHub.Pro)

 	// 8. 适配 SSE emitter。
 	sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
@@ -173,6 +176,9 @@ func (s *AgentService) runNewAgentGraph(
 		RoughBuildFunc:       s.makeRoughBuildFunc(),
 		WriteSchedulePreview: s.makeWriteSchedulePreviewFunc(),
 		MemoryFuture:         memoryFuture,
+		ThinkingPlan:         viper.GetBool("agent.thinking.plan"),
+		ThinkingExecute:      viper.GetBool("agent.thinking.execute"),
+		ThinkingDeliver:      viper.GetBool("agent.thinking.deliver"),
 	}

 	// 10. 构造 AgentGraphRunInput 并运行 graph。
@@ -190,8 +196,8 @@ func (s *AgentService) runNewAgentGraph(
 		log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr)
 		pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr))

-		// Graph 出错时回退普通聊天，保证可用性。
-		s.runNormalChatFlow(requestCtx, s.AIHub.Worker, resolvedModelName, userMessage, "", nil, retryMeta, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
+		// Graph 出错时回退普通聊天，保证可用性。回退使用 Pro 模型。
+		s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, "", nil, retryMeta, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
 		return
 	}