From d47a8bcabd0275c64817517b27d9bb7375cb9106 Mon Sep 17 00:00:00 2001
From: Losita <2810873701@qq.com>
Date: Fri, 17 Apr 2026 12:27:04 +0800
Subject: [PATCH] =?UTF-8?q?Version:=200.9.25.dev.260417=20=E5=90=8E?=
 =?UTF-8?q?=E7=AB=AF=EF=BC=9A=201.=20AIHub=20=E6=A8=A1=E5=9E=8B=E5=88=86?=
 =?UTF-8?q?=E7=BA=A7=E4=BB=8E=20Worker/Strategist=20=E4=B8=A4=E7=BA=A7?=
 =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=B8=BA=20Lite/Pro/Max=20=E4=B8=89=E7=BA=A7?=
 =?UTF-8?q?=20-=20AIHub=20=E7=BB=93=E6=9E=84=E4=BD=93=E4=BB=8E=20Worker=20?=
 =?UTF-8?q?+=20Strategist=20=E6=94=B9=E4=B8=BA=20Lite=20+=20Pro=20+=20Max?=
 =?UTF-8?q?=EF=BC=8C=E5=88=86=E5=88=AB=E5=AF=B9=E5=BA=94=E8=BD=BB=E9=87=8F?=
 =?UTF-8?q?=EF=BC=88=E6=A0=87=E9=A2=98=E7=94=9F=E6=88=90=EF=BC=89=E3=80=81?=
 =?UTF-8?q?=E6=A0=87=E5=87=86=EF=BC=88Chat=20=E8=B7=AF=E7=94=B1/=E9=97=B2?=
 =?UTF-8?q?=E8=81=8A/=E4=BA=A4=E4=BB=98=E6=80=BB=E7=BB=93=EF=BC=89?=
 =?UTF-8?q?=E3=80=81=E9=AB=98=E8=83=BD=E5=8A=9B=EF=BC=88Plan=20=E8=A7=84?=
 =?UTF-8?q?=E5=88=92/Execute=20ReAct=EF=BC=89=E4=B8=89=E4=B8=AA=E8=83=BD?=
 =?UTF-8?q?=E5=8A=9B=E5=B1=82=E7=BA=A7=20-=20config.example.yaml=20?=
 =?UTF-8?q?=E6=96=B0=E5=A2=9E=20liteModel=20/=20proModel=20/=20maxModel=20?=
 =?UTF-8?q?=E4=B8=89=E4=B8=AA=E6=A8=A1=E5=9E=8B=E9=85=8D=E7=BD=AE=E9=A1=B9?=
 =?UTF-8?q?=EF=BC=8C=E6=9B=BF=E4=BB=A3=E5=8E=9F=20workerModel=20/=20strate?=
 =?UTF-8?q?gistModel=20-=20=E5=90=AF=E5=8A=A8=E5=B1=82=20InitEino=20?=
 =?UTF-8?q?=E6=94=B9=E4=B8=BA=E5=88=9B=E5=BB=BA=E4=B8=89=E4=B8=AA=E7=8B=AC?=
 =?UTF-8?q?=E7=AB=8B=E6=A8=A1=E5=9E=8B=E5=AE=9E=E4=BE=8B=EF=BC=8C=E6=8A=BD?=
 =?UTF-8?q?=E5=8F=96=E5=85=AC=E5=85=B1=20baseURL=20=E5=92=8C=20apiKey=20?=
 =?UTF-8?q?=E5=87=8F=E5=B0=91=E9=87=8D=E5=A4=8D=20-=20pickChatModel=20?=
 =?UTF-8?q?=E7=BB=9F=E4=B8=80=E8=BF=94=E5=9B=9E=20Pro=20=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=EF=BC=8C=E6=97=A7=20strategist=20=E5=8F=82=E6=95=B0=E4=B8=8D?=
 =?UTF-8?q?=E5=86=8D=E7=94=9F=E6=95=88=EF=BC=9BpickTitleModel=20=E4=BB=8E?=
 =?UTF-8?q?=20Worker=20=E5=88=87=E5=88=B0=20Lite=20-=20runNewAgentGraph=20?=
 =?UTF-8?q?=E6=8C=89=20Plan/Execute=E2=86=92Max=E3=80=81Chat/Deliver?=
 =?UTF-8?q?=E2=86=92Pro=20=E5=88=86=E7=BA=A7=E6=B3=A8=E5=85=A5=EF=BC=9BGra?=
 =?UTF-8?q?ph=20=E5=87=BA=E9=94=99=E5=9B=9E=E9=80=80=E4=B9=9F=E5=88=87?=
 =?UTF-8?q?=E5=88=B0=20Pro=20-=20Memory=20=E6=A8=A1=E5=9D=97=E5=88=9D?=
 =?UTF-8?q?=E5=A7=8B=E5=8C=96=E4=BB=8E=20Worker=20=E6=94=B9=E4=B8=BA=20Pro?=
 =?UTF-8?q?=202.=20Plan=20=E8=8A=82=E7=82=B9=E4=BB=8E"=E4=B8=A4=E9=98=B6?=
 =?UTF-8?q?=E6=AE=B5=E8=AF=84=E4=BC=B0"=E7=AE=80=E5=8C=96=E4=B8=BA"?=
 =?UTF-8?q?=E5=8D=95=E8=BD=AE=E6=B7=B1=E5=BA=A6=E8=A7=84=E5=88=92"?=
 =?UTF-8?q?=EF=BC=8Cthinking=20=E5=BC=80=E5=85=B3=E6=94=B9=E4=B8=BA?=
 =?UTF-8?q?=E5=85=A8=E9=85=8D=E7=BD=AE=E5=8C=96=20-=20=E7=A7=BB=E9=99=A4?=
 =?UTF-8?q?=20Phase=201=EF=BC=88=E5=BF=AB=E9=80=9F=E8=AF=84=E4=BC=B0=20160?=
 =?UTF-8?q?0=20token=EF=BC=89+=20Phase=202=EF=BC=88=E6=B7=B1=E5=BA=A6?=
 =?UTF-8?q?=E8=A7=84=E5=88=92=203200=20token=EF=BC=89=E7=9A=84=E4=B8=A4?=
 =?UTF-8?q?=E8=BD=AE=E8=B0=83=E7=94=A8=E9=80=BB=E8=BE=91=EF=BC=8C=E6=94=B9?=
 =?UTF-8?q?=E4=B8=BA=E5=8D=95=E8=BD=AE=E4=B8=8D=E9=99=90=20token=20?=
 =?UTF-8?q?=E6=B7=B1=E5=BA=A6=E8=A7=84=E5=88=92=20-=20PlanDecision=20?=
 =?UTF-8?q?=E7=A7=BB=E9=99=A4=20need=5Fthinking=20=E5=AD=97=E6=AE=B5?=
 =?UTF-8?q?=EF=BC=8Cprompt=20=E8=A7=84=E5=88=99=E5=92=8C=20JSON=20contract?=
 =?UTF-8?q?=20=E5=90=8C=E6=AD=A5=E5=88=A0=E9=99=A4=E8=AF=A5=E5=AD=97?=
 =?UTF-8?q?=E6=AE=B5=20-=20=E5=90=84=E8=8A=82=E7=82=B9=EF=BC=88Plan=20/=20?=
 =?UTF-8?q?Execute=20/=20Deliver=EF=BC=89thinking=20=E5=BC=80=E5=85=B3?=
 =?UTF-8?q?=E4=BB=8E=E7=A1=AC=E7=BC=96=E7=A0=81=E6=94=B9=E4=B8=BA=E4=BB=8E?=
 =?UTF-8?q?=20AgentGraphDeps=20=E8=AF=BB=E5=8F=96=EF=BC=8C=E7=94=B1=20conf?=
 =?UTF-8?q?ig.yaml=20=E7=9A=84=20agent.thinking=20=E6=AE=B5=E6=8C=89?=
 =?UTF-8?q?=E8=8A=82=E7=82=B9=E6=B3=A8=E5=85=A5=20-=20=E6=96=B0=E5=A2=9E?=
 =?UTF-8?q?=20agent.thinking=20=E9=85=8D=E7=BD=AE=E6=AE=B5=EF=BC=88plan=20?=
 =?UTF-8?q?/=20execute=20/=20deliver=20/=20memory=20=E5=9B=9B=E4=B8=AA?=
 =?UTF-8?q?=E7=8B=AC=E7=AB=8B=E5=B8=83=E5=B0=94=E5=BC=80=E5=85=B3=EF=BC=89?=
 =?UTF-8?q?=EF=BC=8Cconfig.example.yaml=20=E8=A1=A5=E9=BD=90=E9=BB=98?=
 =?UTF-8?q?=E8=AE=A4=E5=80=BC=20-=20=E6=96=B0=E5=A2=9E=20resolveThinkingMo?=
 =?UTF-8?q?de=20=E5=85=AC=E5=85=B1=E5=87=BD=E6=95=B0=EF=BC=8Cplan=20/=20ex?=
 =?UTF-8?q?ecute=20/=20deliver=20=E5=92=8C=20memory=20=E5=86=B3=E7=AD=96/?=
 =?UTF-8?q?=E6=8A=BD=E5=8F=96=E9=93=BE=E8=B7=AF=E7=BB=9F=E4=B8=80=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8=203.=20Memory=20=E6=A8=A1=E5=9D=97=20LLM=20=E8=B0=83?=
 =?UTF-8?q?=E7=94=A8=E6=94=AF=E6=8C=81=20thinking=20=E5=BC=80=E5=85=B3=20-?=
 =?UTF-8?q?=20Config=20=E6=96=B0=E5=A2=9E=20LLMThinking=20=E5=AD=97?=
 =?UTF-8?q?=E6=AE=B5=EF=BC=8Cconfig=5Floader=20=E4=BB=8E=20agent.thinking.?=
 =?UTF-8?q?memory=20=E8=AF=BB=E5=8F=96=20-=20LLMDecisionOrchestrator.Compa?=
 =?UTF-8?q?re=20=E5=92=8C=20LLMWriteOrchestrator.ExtractFacts=20=E7=9A=84?=
 =?UTF-8?q?=20thinking=20=E6=A8=A1=E5=BC=8F=E4=BB=8E=E7=A1=AC=E7=BC=96?=
 =?UTF-8?q?=E7=A0=81=20Disabled=20=E6=94=B9=E4=B8=BA=E8=AF=BB=E5=8F=96?=
 =?UTF-8?q?=E9=85=8D=E7=BD=AE=20=E5=89=8D=E7=AB=AF=EF=BC=9A=201.=20?=
 =?UTF-8?q?=E7=A7=BB=E9=99=A4=E5=8A=A9=E6=89=8B=E8=BE=93=E5=85=A5=E5=8C=BA?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E9=80=89=E6=8B=A9=E5=99=A8=E5=8F=8A=E5=85=A8?=
 =?UTF-8?q?=E9=83=A8=E5=81=8F=E5=A5=BD=E6=8C=81=E4=B9=85=E5=8C=96=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91=20-=20=E5=88=A0=E9=99=A4=20ModelType=20=E7=B1=BB?=
 =?UTF-8?q?=E5=9E=8B=E3=80=81selectedModel=20ref=E3=80=81MODEL=5FPREFERENC?=
 =?UTF-8?q?E=5FSTORAGE=5FKEY=20=E5=B8=B8=E9=87=8F=20-=20=E5=88=A0=E9=99=A4?=
 =?UTF-8?q?=20isModelType=20/=20loadModelPreferenceMap=20/=20persistModelP?=
 =?UTF-8?q?referenceMap=20/=20savePreferredModel=20/=20resolvePreferredMod?=
 =?UTF-8?q?el=20/=20applyPreferredModelForConversation=20=E5=85=AD?=
 =?UTF-8?q?=E4=B8=AA=E5=87=BD=E6=95=B0=E5=8F=8A=20modelPreferenceMap=20ref?=
 =?UTF-8?q?=20-=20=E5=88=A0=E9=99=A4=20selectedModel=20watch=20=E7=9B=91?=
 =?UTF-8?q?=E5=90=AC=E3=80=81=E5=8F=91=E9=80=81=E6=B6=88=E6=81=AF=E6=97=B6?=
 =?UTF-8?q?=E7=9A=84=20savePreferredModel=20=E8=B0=83=E7=94=A8=E3=80=81?=
 =?UTF-8?q?=E5=88=87=E4=BC=9A=E8=AF=9D=E6=97=B6=E7=9A=84=20applyPreferredM?=
 =?UTF-8?q?odelForConversation=20=E8=B0=83=E7=94=A8=E3=80=81=E4=BC=9A?=
 =?UTF-8?q?=E8=AF=9D=E8=BF=81=E7=A7=BB=E6=97=B6=E7=9A=84=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=E5=81=8F=E5=A5=BD=E8=BF=81=E7=A7=BB=20-=20fetchChatStream=20?=
 =?UTF-8?q?=E7=9A=84=20model=20=E5=8F=82=E6=95=B0=E7=A1=AC=E7=BC=96?=
 =?UTF-8?q?=E7=A0=81=E4=B8=BA=20'worker'=20-=20=E5=88=A0=E9=99=A4=E6=A8=A1?=
 =?UTF-8?q?=E6=9D=BF=E4=B8=AD"=E6=A8=A1=E5=9E=8B"=E4=B8=8B=E6=8B=89?=
 =?UTF-8?q?=E9=80=89=E6=8B=A9=E5=99=A8=EF=BC=88=E6=A0=87=E5=87=86/?=
 =?UTF-8?q?=E7=AD=96=E7=95=A5=EF=BC=89=E5=8F=8A=E5=AF=B9=E5=BA=94=E7=9A=84?=
 =?UTF-8?q?=E5=85=A8=E5=B1=80=E6=A0=B7=E5=BC=8F=20.assistant-model-select-?=
 =?UTF-8?q?panel=202.=20=E4=B8=8A=E4=B8=8B=E6=96=87=E7=AA=97=E5=8F=A3?=
 =?UTF-8?q?=E6=8C=87=E7=A4=BA=E5=99=A8=E7=AE=80=E5=8C=96=E4=B8=BA=E4=BB=85?=
 =?UTF-8?q?=E6=98=BE=E7=A4=BA=E6=80=BB=E5=8D=A0=E7=94=A8=20-=20ContextWind?=
 =?UTF-8?q?owMeter=20=E7=A7=BB=E9=99=A4=20msg0~msg3=20=E5=9B=9B=E6=AE=B5?=
 =?UTF-8?q?=E5=BD=A9=E8=89=B2=E5=88=86=E6=AE=B5=E9=80=BB=E8=BE=91=EF=BC=88?=
 =?UTF-8?q?ContextSegment=20=E6=8E=A5=E5=8F=A3=E3=80=81segments=20computed?=
 =?UTF-8?q?=E3=80=81v-for=20=E6=B8=B2=E6=9F=93=EF=BC=89=20-=20=E8=BF=9B?=
 =?UTF-8?q?=E5=BA=A6=E6=9D=A1=E6=94=B9=E4=B8=BA=E5=8D=95=E4=B8=80=E8=93=9D?=
 =?UTF-8?q?=E8=89=B2=E6=9D=A1=EF=BC=8C=E6=8C=89=20total/budget=20=E6=AF=94?=
 =?UTF-8?q?=E4=BE=8B=E5=A1=AB=E5=85=85=EF=BC=9B=E8=B6=85=E9=A2=84=E7=AE=97?=
 =?UTF-8?q?=E6=97=B6=E5=8F=98=E7=BA=A2=20-=20Tooltip=20=E7=AE=80=E5=8C=96?=
 =?UTF-8?q?=E4=B8=BA=E4=BB=85=E6=98=BE=E7=A4=BA"=E6=80=BB=E8=AE=A1=20X=20/?=
 =?UTF-8?q?=20=E9=A2=84=E7=AE=97=20Y=EF=BC=88Z%=EF=BC=89"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

仓库：无
---
 backend/cmd/start.go                          |   2 +-
 backend/config.example.yaml                   |  19 ++-
 backend/inits/eino.go                         |  47 ++++--
 backend/memory/model/config.go                |   3 +
 .../orchestrator/llm_decision_orchestrator.go |  13 +-
 .../orchestrator/llm_write_orchestrator.go    |   2 +-
 backend/memory/service/config_loader.go       |   1 +
 backend/newAgent/model/graph_run_state.go     |   5 +
 backend/newAgent/model/plan_contract.go       |   1 -
 backend/newAgent/node/agent_nodes.go          |   3 +
 backend/newAgent/node/deliver.go              |   6 +-
 backend/newAgent/node/execute.go              |   3 +-
 backend/newAgent/node/plan.go                 |  82 ++++------
 backend/newAgent/prompt/plan.go               |   8 +-
 backend/service/agentsvc/agent.go             |  10 +-
 backend/service/agentsvc/agent_meta.go        |   8 +-
 backend/service/agentsvc/agent_newagent.go    |  18 ++-
 .../assistant/ContextWindowMeter.vue          |  69 +++-----
 .../components/dashboard/AssistantPanel.vue   | 153 +-----------------
 19 files changed, 147 insertions(+), 306 deletions(-)

diff --git a/backend/cmd/start.go b/backend/cmd/start.go
index e9252ee..8883a73 100644
--- a/backend/cmd/start.go
+++ b/backend/cmd/start.go
@@ -83,7 +83,7 @@ func Start() {
 	memoryMetrics := memoryobserve.NewMetricsRegistry()
 	memoryModule := memory.NewModuleWithObserve(
 		db,
-		infrallm.WrapArkClient(aiHub.Worker),
+		infrallm.WrapArkClient(aiHub.Pro),
 		ragRuntime,
 		memoryCfg,
 		memory.ObserveDeps{
diff --git a/backend/config.example.yaml b/backend/config.example.yaml
index d72acd1..6400a43 100644
--- a/backend/config.example.yaml
+++ b/backend/config.example.yaml
@@ -67,16 +67,27 @@ time:
 
 # 智能体模型与规划参数。
 agent:
-  # 日常执行链路使用的主模型。
-  workerModel: "doubao-seed-2-0-code-preview-260215"
-  # 规划、拆解、策略推导使用的模型。
-  strategistModel: "doubao-seed-2-0-code-preview-260215"
+  # 轻量模型：标题生成等低复杂度、低延迟场景。
+  liteModel: "doubao-seed-2-0-code-preview-260215"
+  # 标准模型：Chat 路由/闲聊/深度回答/Deliver 总结。
+  proModel: "doubao-seed-2-0-code-preview-260215"
+  # 高能力模型：Plan 规划 + Execute ReAct 等深度推理场景。
+  maxModel: "doubao-seed-2-0-code-preview-260215"
   # 模型服务根路径。
   baseURL: "https://ark.cn-beijing.volces.com/api/v3"
   # 日内并发优化并发度，建议按模型配额调整。
   dailyRefineConcurrency: 7
   # 周级跨天配平额度上限，防止过度调整。
   weeklyAdjustBudget: 5
+  thinking:
+    # plan 节点（单轮深度规划），默认开 thinking。
+    plan: true
+    # execute 节点（ReAct 深度推理），默认开 thinking。
+    execute: true
+    # deliver 节点（交付总结），默认关 thinking。
+    deliver: false
+    # 记忆模块（决策比对 + 抽取），默认关 thinking。
+    memory: false
 
 # 通用 RAG 配置。
 rag:
diff --git a/backend/inits/eino.go b/backend/inits/eino.go
index 3a00007..3865599 100644
--- a/backend/inits/eino.go
+++ b/backend/inits/eino.go
@@ -8,32 +8,53 @@ import (
 	"github.com/spf13/viper"
 )
 
-// AIHub 存储不同能力的模型实例
+// AIHub 存储三级模型的实例，按能力分级调度。
+//
+// 分级策略：
+// 1. Lite：轻量模型，用于标题生成等低复杂度、低延迟场景；
+// 2. Pro：标准模型，用于 Chat 路由/闲聊/深度回答/Deliver 总结；
+// 3. Max：高能力模型，用于 Plan 规划和 Execute ReAct 等需要深度推理的场景。
 type AIHub struct {
-	Strategist *ark.ChatModel // 智力担当：处理复杂排程逻辑
-	Worker     *ark.ChatModel // 效率担当：处理简单任务、总结
+	Lite *ark.ChatModel // 轻量模型：标题生成等低复杂度任务
+	Pro  *ark.ChatModel // 标准模型：Chat 路由、闲聊、交付总结
+	Max  *ark.ChatModel // 高能力模型：Plan 规划、Execute ReAct
 }
 
 func InitEino() (*AIHub, error) {
 	ctx := context.Background()
-	worker, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
-		Model:   viper.GetString("agent.workerModel"), // 使用的模型版本
-		BaseURL: viper.GetString("agent.baseURL"),     // Eino API 的基础 URL
-		APIKey:  os.Getenv("ARK_API_KEY"),             // API 密钥
+	baseURL := viper.GetString("agent.baseURL")
+	apiKey := os.Getenv("ARK_API_KEY")
+
+	// 1. Lite 模型：标题生成等低复杂度场景，优先控制成本和延迟。
+	lite, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		Model:   viper.GetString("agent.liteModel"),
+		BaseURL: baseURL,
+		APIKey:  apiKey,
 	})
 	if err != nil {
 		return nil, err
 	}
-	strategist, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
-		Model:   viper.GetString("agent.strategistModel"), // 使用的模型版本
-		BaseURL: viper.GetString("agent.baseURL"),         // Eino API 的基础 URL
-		APIKey:  os.Getenv("ARK_API_KEY"),                 // API 密钥
+	// 2. Pro 模型：Chat 路由/闲聊/交付总结等标准复杂度场景。
+	pro, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		Model:   viper.GetString("agent.proModel"),
+		BaseURL: baseURL,
+		APIKey:  apiKey,
+	})
+	if err != nil {
+		return nil, err
+	}
+	// 3. Max 模型：Plan 规划和 Execute ReAct 等需要深度推理的场景。
+	maxModel, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
+		Model:   viper.GetString("agent.maxModel"),
+		BaseURL: baseURL,
+		APIKey:  apiKey,
 	})
 	if err != nil {
 		return nil, err
 	}
 	return &AIHub{
-		Strategist: strategist,
-		Worker:     worker,
+		Lite: lite,
+		Pro:  pro,
+		Max:  maxModel,
 	}, nil
 }
diff --git a/backend/memory/model/config.go b/backend/memory/model/config.go
index bd2b729..810b616 100644
--- a/backend/memory/model/config.go
+++ b/backend/memory/model/config.go
@@ -72,6 +72,9 @@ type Config struct {
 	// 2. 默认 0.5，与"守门员"prompt 的 confidence>=0.5 输出规则配合；
 	// 3. fallback 路径 confidence 设为 0.45，低于默认阈值，LLM 不可用时不写入。
 	WriteMinConfidence float64
+
+	// 记忆模块 LLM 调用是否开启 thinking，由 config.yaml 的 agent.thinking.memory 注入。
+	LLMThinking bool
 }
 
 // NormalizeReadMode 统一读取模式字符串。
diff --git a/backend/memory/orchestrator/llm_decision_orchestrator.go b/backend/memory/orchestrator/llm_decision_orchestrator.go
index 01425ee..7f45da7 100644
--- a/backend/memory/orchestrator/llm_decision_orchestrator.go
+++ b/backend/memory/orchestrator/llm_decision_orchestrator.go
@@ -62,10 +62,7 @@ func (o *LLMDecisionOrchestrator) Compare(
 		infrallm.GenerateOptions{
 			Temperature: 0.1,
 			MaxTokens:   defaultDecisionCompareMaxTokens,
-			Thinking:    infrallm.ThinkingModeDisabled,
-			Metadata: map[string]any{
-				"stage": "memory_decision_compare",
-			},
+			Thinking:    resolveMemoryThinkingMode(o.cfg.LLMThinking),
 		},
 	)
 	if err != nil {
@@ -128,3 +125,11 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
 		candidate.MemoryType, candidate.Content,
 	)
 }
+
+// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
+func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode {
+	if enabled {
+		return infrallm.ThinkingModeEnabled
+	}
+	return infrallm.ThinkingModeDisabled
+}
diff --git a/backend/memory/orchestrator/llm_write_orchestrator.go b/backend/memory/orchestrator/llm_write_orchestrator.go
index b3c16ab..648df72 100644
--- a/backend/memory/orchestrator/llm_write_orchestrator.go
+++ b/backend/memory/orchestrator/llm_write_orchestrator.go
@@ -67,7 +67,7 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
 		infrallm.GenerateOptions{
 			Temperature: clampTemperature(o.cfg.LLMTemperature),
 			MaxTokens:   defaultMemoryExtractMaxTokens,
-			Thinking:    infrallm.ThinkingModeDisabled,
+			Thinking:    resolveMemoryThinkingMode(o.cfg.LLMThinking),
 			Metadata: map[string]any{
 				"stage":           "memory_extract",
 				"user_id":         payload.UserID,
diff --git a/backend/memory/service/config_loader.go b/backend/memory/service/config_loader.go
index b87e6c8..7301c2c 100644
--- a/backend/memory/service/config_loader.go
+++ b/backend/memory/service/config_loader.go
@@ -40,6 +40,7 @@ func LoadConfigFromViper() memorymodel.Config {
 		DecisionFallbackMode:      viper.GetString("memory.decision.fallbackMode"),
 		WriteMode:                 viper.GetString("memory.write.mode"),
 		WriteMinConfidence:        viper.GetFloat64("memory.write.minConfidence"),
+		LLMThinking:               viper.GetBool("agent.thinking.memory"),
 	}
 
 	if cfg.Threshold <= 0 {
diff --git a/backend/newAgent/model/graph_run_state.go b/backend/newAgent/model/graph_run_state.go
index 1628070..8e2d1db 100644
--- a/backend/newAgent/model/graph_run_state.go
+++ b/backend/newAgent/model/graph_run_state.go
@@ -72,6 +72,11 @@ type AgentGraphDeps struct {
 	RoughBuildFunc       RoughBuildFunc           // 按 Service 注入，粗排算法入口
 	WriteSchedulePreview WriteSchedulePreviewFunc // 按 Service 注入，排程预览写入入口
 
+	// thinking 开关：由 config.yaml 的 agent.thinking 段注入，各节点按需读取。
+	ThinkingPlan    bool
+	ThinkingExecute bool
+	ThinkingDeliver bool
+
 	// 记忆预取管线：由 service 层启动的后台检索 goroutine 写入。
 	// channel 携带已渲染的文本内容（非原始 ItemDTO），节点直接写入 pinned block。
 	MemoryFuture   chan string // buffered(1)，携带 renderMemoryPinnedContentByMode 的输出
diff --git a/backend/newAgent/model/plan_contract.go b/backend/newAgent/model/plan_contract.go
index ee14be4..b04e4d1 100644
--- a/backend/newAgent/model/plan_contract.go
+++ b/backend/newAgent/model/plan_contract.go
@@ -52,7 +52,6 @@ type PlanDecision struct {
 	Action          PlanAction     `json:"action"`
 	Reason          string         `json:"reason,omitempty"`
 	Complexity      PlanComplexity `json:"complexity"`
-	NeedThinking    bool           `json:"need_thinking"`
 	PlanSteps       []PlanStep     `json:"plan_steps,omitempty"`
 	NeedsRoughBuild bool           `json:"needs_rough_build,omitempty"`
 	TaskClassIDs    []int          `json:"task_class_ids,omitempty"`
diff --git a/backend/newAgent/node/agent_nodes.go b/backend/newAgent/node/agent_nodes.go
index 42b42e7..29c37df 100644
--- a/backend/newAgent/node/agent_nodes.go
+++ b/backend/newAgent/node/agent_nodes.go
@@ -120,6 +120,7 @@ func (n *AgentNodes) Plan(ctx context.Context, st *newagentmodel.AgentGraphState
 			ChunkEmitter:        st.EnsureChunkEmitter(),
 			ResumeNode:          "plan",
 			AlwaysExecute:       st.Request.AlwaysExecute,
+			ThinkingEnabled:     st.Deps.ThinkingPlan,
 		},
 	); err != nil {
 		return nil, err
@@ -230,6 +231,7 @@ func (n *AgentNodes) Execute(ctx context.Context, st *newagentmodel.AgentGraphSt
 			WriteSchedulePreview:  st.Deps.WriteSchedulePreview,
 			OriginalScheduleState: st.OriginalScheduleState,
 			AlwaysExecute:         st.Request.AlwaysExecute,
+			ThinkingEnabled:       st.Deps.ThinkingExecute,
 		},
 	); err != nil {
 		return nil, err
@@ -277,6 +279,7 @@ func (n *AgentNodes) Deliver(ctx context.Context, st *newagentmodel.AgentGraphSt
 			ConversationContext: st.EnsureConversationContext(),
 			Client:              st.Deps.ResolveDeliverClient(),
 			ChunkEmitter:        st.EnsureChunkEmitter(),
+			ThinkingEnabled:     st.Deps.ThinkingDeliver,
 		},
 	); err != nil {
 		return nil, err
diff --git a/backend/newAgent/node/deliver.go b/backend/newAgent/node/deliver.go
index 28978b8..d72ca07 100644
--- a/backend/newAgent/node/deliver.go
+++ b/backend/newAgent/node/deliver.go
@@ -32,6 +32,7 @@ type DeliverNodeInput struct {
 	ConversationContext *newagentmodel.ConversationContext
 	Client              *infrallm.Client
 	ChunkEmitter        *newagentstream.ChunkEmitter
+	ThinkingEnabled     bool // 是否开启 thinking，由 config.yaml 的 agent.thinking.deliver 注入
 }
 
 // RunDeliverNode 执行一轮交付节点逻辑。
@@ -64,7 +65,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
 	}
 
 	// 2. 调 LLM 生成交付总结。
-	summary := generateDeliverSummary(ctx, input.Client, flowState, conversationContext)
+	summary := generateDeliverSummary(ctx, input.Client, flowState, conversationContext, input.ThinkingEnabled)
 
 	// 3. 伪流式推送总结。
 	if strings.TrimSpace(summary) != "" {
@@ -98,6 +99,7 @@ func generateDeliverSummary(
 	client *infrallm.Client,
 	flowState *newagentmodel.CommonState,
 	conversationContext *newagentmodel.ConversationContext,
+	thinkingEnabled bool,
 ) string {
 	if flowState != nil {
 		switch {
@@ -119,7 +121,7 @@ func generateDeliverSummary(
 		infrallm.GenerateOptions{
 			Temperature: 0.5,
 			MaxTokens:   800,
-			Thinking:    infrallm.ThinkingModeDisabled,
+			Thinking:    resolveThinkingMode(thinkingEnabled),
 			Metadata: map[string]any{
 				"stage": deliverStageName,
 			},
diff --git a/backend/newAgent/node/execute.go b/backend/newAgent/node/execute.go
index 8ada052..26532d1 100644
--- a/backend/newAgent/node/execute.go
+++ b/backend/newAgent/node/execute.go
@@ -59,6 +59,7 @@ type ExecuteNodeInput struct {
 	WriteSchedulePreview  newagentmodel.WriteSchedulePreviewFunc
 	OriginalScheduleState *schedule.ScheduleState
 	AlwaysExecute         bool // true 时写工具跳过确认闸门直接执行
+	ThinkingEnabled       bool // 是否开启 thinking，由 config.yaml 的 agent.thinking.execute 注入
 }
 
 // ExecuteRoundObservation 记录执行阶段每轮的关键观察。
@@ -203,7 +204,7 @@ func RunExecuteNode(ctx context.Context, input ExecuteNodeInput) error {
 		infrallm.GenerateOptions{
 			Temperature: 1.0,   // thinking 模式强制要求 temperature=1
 			MaxTokens:   16000, // 需为 thinking chain 留出足够预算
-			Thinking:    infrallm.ThinkingModeEnabled,
+			Thinking:    resolveThinkingMode(input.ThinkingEnabled),
 			Metadata: map[string]any{
 				"stage":      executeStageName,
 				"step_index": flowState.CurrentStep,
diff --git a/backend/newAgent/node/plan.go b/backend/newAgent/node/plan.go
index 28664ce..a7cb50d 100644
--- a/backend/newAgent/node/plan.go
+++ b/backend/newAgent/node/plan.go
@@ -35,19 +35,19 @@ type PlanNodeInput struct {
 	ChunkEmitter        *newagentstream.ChunkEmitter
 	ResumeNode          string
 	AlwaysExecute       bool // true 时计划生成后自动确认，不进入 confirm 节点
+	ThinkingEnabled     bool // 是否开启 thinking，由 config.yaml 的 agent.thinking.plan 注入
 }
 
 // RunPlanNode 执行一轮规划节点逻辑。
 //
 // 步骤说明：
-//  1. 先校验最小依赖，并推送一条”正在规划”的状态，避免用户空等；
-//  2. Phase 1（快速评估）：不开 thinking，让 LLM 同时产出复杂度评估和规划结果；
-//  3. Phase 2（深度规划）：若 LLM 自评需要深度思考且规划已完成，开 thinking 重跑；
-//  4. 若模型先对用户说了话，则先把 speak 伪流式推给前端，并写回 history；
-//  5. 最后按 action 推进流程：
-//     5.1 continue：继续停留在 planning；
-//     5.2 ask_user：打开 pending interaction，后续交给 interrupt 收口；
-//     5.3 plan_done：固化完整计划，刷新 pinned context，并进入 waiting_confirm。
+//  1. 先校验最小依赖，并推送一条"正在规划"的状态，避免用户空等；
+//  2. 单轮深度规划：开 thinking、无 token 上限，让 LLM 一步到位产出完整计划；
+//  3. 若模型先对用户说了话，则先把 speak 伪流式推给前端，并写回 history；
+//  4. 最后按 action 推进流程：
+//     4.1 continue：继续停留在 planning；
+//     4.2 ask_user：打开 pending interaction，后续交给 interrupt 收口；
+//     4.3 plan_done：固化完整计划，刷新 pinned context，并进入 waiting_confirm。
 func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 	runtimeState, conversationContext, emitter, err := preparePlanNodeInput(input)
 	if err != nil {
@@ -69,68 +69,31 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 	// 2. 构造本轮规划输入。
 	messages := newagentprompt.BuildPlanMessages(flowState, conversationContext, input.UserInput)
 
-	// 3. Phase 1：快速评估（开 thinking），让 LLM 同时产出复杂度评估和规划结果。
+	// 3. 单轮深度规划：由配置决定是否开启 thinking，不做 token 上限约束。
 	decision, rawResult, err := infrallm.GenerateJSON[newagentmodel.PlanDecision](
 		ctx,
 		input.Client,
 		messages,
 		infrallm.GenerateOptions{
 			Temperature: 0.2,
-			MaxTokens:   1600,
-			Thinking:    infrallm.ThinkingModeEnabled,
+			Thinking:    resolveThinkingMode(input.ThinkingEnabled),
 			Metadata: map[string]any{
 				"stage": planStageName,
-				"phase": "assessment",
+				"phase": "planning",
 			},
 		},
 	)
 	if err != nil {
 		if rawResult != nil && strings.TrimSpace(rawResult.Text) != "" {
-			return fmt.Errorf("规划评估解析失败，原始输出=%s，错误=%w", strings.TrimSpace(rawResult.Text), err)
+			return fmt.Errorf("规划解析失败，原始输出=%s，错误=%w", strings.TrimSpace(rawResult.Text), err)
 		}
-		return fmt.Errorf("规划评估阶段模型调用失败: %w", err)
+		return fmt.Errorf("规划阶段模型调用失败: %w", err)
 	}
 	if err := decision.Validate(); err != nil {
-		return fmt.Errorf("规划评估决策不合法: %w", err)
+		return fmt.Errorf("规划决策不合法: %w", err)
 	}
 
-	// 4. Phase 2：若 LLM 自评需要深度思考且本轮规划已完成，则开启 thinking 重跑。
-	//    条件：NeedThinking=true + Action=plan_done → 说明 LLM 认为当前无 thinking 的计划质量不够。
-	//    其他 action（continue / ask_user）不需要 thinking，直接用 Phase 1 结果。
-	if decision.NeedThinking && decision.Action == newagentmodel.PlanActionDone {
-		if err := emitter.EmitStatus(
-			planStatusBlockID,
-			planStageName,
-			"deep_planning",
-			"正在深入思考，生成更完善的计划。",
-			false,
-		); err != nil {
-			return fmt.Errorf("深度规划状态推送失败: %w", err)
-		}
-
-		deepDecision, _, deepErr := infrallm.GenerateJSON[newagentmodel.PlanDecision](
-			ctx,
-			input.Client,
-			messages,
-			infrallm.GenerateOptions{
-				Temperature: 0.2,
-				MaxTokens:   3200,
-				Thinking:    infrallm.ThinkingModeEnabled,
-				Metadata: map[string]any{
-					"stage": planStageName,
-					"phase": "deep_planning",
-				},
-			},
-		)
-		if deepErr == nil && deepDecision != nil {
-			if validateErr := deepDecision.Validate(); validateErr == nil {
-				decision = deepDecision
-			}
-		}
-		// 深度规划失败时静默降级到 Phase 1 结果，不中断流程。
-	}
-
-	// 5. 若模型先对用户说了话，且不是 ask_user（ask_user 交给 interrupt 收口），则先以伪流式推送，再写回 history。
+	// 4. 若模型先对用户说了话，且不是 ask_user（ask_user 交给 interrupt 收口），则先以伪流式推送，再写回 history。
 	if strings.TrimSpace(decision.Speak) != "" && decision.Action != newagentmodel.PlanActionAskUser {
 		if err := emitter.EmitPseudoAssistantText(
 			ctx,
@@ -144,7 +107,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 		conversationContext.AppendHistory(schema.AssistantMessage(decision.Speak, nil))
 	}
 
-	// 6. 按规划动作推进流程状态。
+	// 5. 按规划动作推进流程状态。
 	switch decision.Action {
 	case newagentmodel.PlanActionContinue:
 		flowState.Phase = newagentmodel.PhasePlanning
@@ -169,10 +132,10 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 			}
 		}
 		// always_execute 开启时，计划层跳过确认闸门，直接进入执行阶段。
-		// 这样可以与 Execute 节点的“写工具跳过确认”语义保持一致。
+		// 这样可以与 Execute 节点的"写工具跳过确认"语义保持一致。
 		if input.AlwaysExecute {
 			// 1. 自动执行模式不会经过 Confirm 卡片，因此这里先把完整计划明确展示给用户。
-			// 2. 摘要格式复用 Confirm 节点，保证“手动确认”和“自动执行”两条链路文案一致。
+			// 2. 摘要格式复用 Confirm 节点，保证"手动确认"和"自动执行"两条链路文案一致。
 			// 3. 推流后同步写入历史，确保后续 Execute 阶段的上下文也能看到这份计划。
 			summary := strings.TrimSpace(buildPlanSummary(decision.PlanSteps))
 			if summary != "" {
@@ -296,3 +259,12 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
 	}
 	return strings.TrimSpace(strings.Join(lines, "\n\n"))
 }
+
+// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
+// 供 plan / execute / deliver 节点统一使用。
+func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
+	if enabled {
+		return infrallm.ThinkingModeEnabled
+	}
+	return infrallm.ThinkingModeDisabled
+}
diff --git a/backend/newAgent/prompt/plan.go b/backend/newAgent/prompt/plan.go
index 6b8a0ce..c5e3bd6 100644
--- a/backend/newAgent/prompt/plan.go
+++ b/backend/newAgent/prompt/plan.go
@@ -21,8 +21,7 @@ const planSystemPrompt = `
 5. plan_steps 必须使用自然语言，便于后端将完整 plan 重新注入到后续上下文顶部。
 6. 只输出 JSON，不要输出 markdown，不要输出额外解释，不要在 JSON 外再补文字。
 7. 每次输出前先评估任务复杂度：simple（简单明确，无复杂依赖）、moderate（多步操作，需要一定推理）、complex（需要深度推理、多方案比较或复杂依赖关系）。
-8. 根据复杂度判断 need_thinking：你是否需要深度思考才能生成高质量计划？当不确定时倾向于 false。
-9. 粗排识别规则：若满足以下两个条件，在 action=plan_done 时附加 needs_rough_build=true 和 task_class_ids：
+8. 粗排识别规则：若满足以下两个条件，在 action=plan_done 时附加 needs_rough_build=true 和 task_class_ids：
    条件1：用户输入中存在"任务类 ID"字段（见上下文"任务类 ID"部分）；
    条件2：用户意图明确是"批量安排/帮我排课/把任务类排进日程"等批量调度需求。
    满足时：后端会在用户确认计划后自动运行粗排算法（硬性约束已由算法保证，无需 LLM 校验）。
@@ -99,7 +98,6 @@ func BuildPlanDecisionContractText() string {
 - action：只能是 %s / %s / %s
 - reason：给后端和日志看的简短说明
 - complexity：任务复杂度，只能是 simple / moderate / complex
-- need_thinking：是否需要深度思考才能生成高质量计划，只能是 true / false
 - plan_steps：仅当 action=%s 时允许返回；返回时必须是完整计划，不是增量
 - plan_steps[].content：步骤正文，必填
 - plan_steps[].done_when：可选，建议写"什么情况下算这一步做完"
@@ -112,7 +110,6 @@ func BuildPlanDecisionContractText() string {
   "action": "%s",
   "reason": "当前信息已足够继续规划",
   "complexity": "moderate",
-  "need_thinking": false
 }
 
 {
@@ -120,7 +117,6 @@ func BuildPlanDecisionContractText() string {
   "action": "%s",
   "reason": "当前时间范围仍不明确",
   "complexity": "simple",
-  "need_thinking": false
 }
 
 {
@@ -128,7 +124,7 @@ func BuildPlanDecisionContractText() string {
   "action": "%s",
   "reason": "当前计划已具备执行条件",
   "complexity": "simple",
-  "need_thinking": false,
+  
   "plan_steps": [
     {
       "content": "先确认本周可用时间范围",
diff --git a/backend/service/agentsvc/agent.go b/backend/service/agentsvc/agent.go
index c777bc8..97c1b51 100644
--- a/backend/service/agentsvc/agent.go
+++ b/backend/service/agentsvc/agent.go
@@ -104,14 +104,10 @@ func thinkingModeToBool(mode string) bool {
 
 // pickChatModel 根据请求选择模型。
 // 当前约定：
-// - strategist：策略模型；
-// - 其余值默认 worker（包含空字符串场景）。
+// - 旧链路已全面切到 newAgent graph，这里仅作为 runNormalChatFlow 回退时的模型选择入口；
+// - 统一返回 Pro 模型，旧 strategist 参数不再生效。
 func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
-	modelName := strings.TrimSpace(requestModel)
-	if strings.EqualFold(modelName, "strategist") {
-		return s.AIHub.Strategist, "strategist"
-	}
-	return s.AIHub.Worker, "worker"
+	return s.AIHub.Pro, "pro"
 }
 
 // PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。
diff --git a/backend/service/agentsvc/agent_meta.go b/backend/service/agentsvc/agent_meta.go
index e63c501..c3ea28f 100644
--- a/backend/service/agentsvc/agent_meta.go
+++ b/backend/service/agentsvc/agent_meta.go
@@ -278,15 +278,15 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
 }
 
 // pickTitleModel 选择用于标题生成的模型。
-// 优先 worker（成本低、速度快）；worker 不可用时回退 strategist。
+// 优先 Lite（成本低、速度快）；Lite 不可用时回退 Pro。
 func (s *AgentService) pickTitleModel() *ark.ChatModel {
 	if s.AIHub == nil {
 		return nil
 	}
-	if s.AIHub.Worker != nil {
-		return s.AIHub.Worker
+	if s.AIHub.Lite != nil {
+		return s.AIHub.Lite
 	}
-	return s.AIHub.Strategist
+	return s.AIHub.Pro
 }
 
 // buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。
diff --git a/backend/service/agentsvc/agent_newagent.go b/backend/service/agentsvc/agent_newagent.go
index 159bb36..9521733 100644
--- a/backend/service/agentsvc/agent_newagent.go
+++ b/backend/service/agentsvc/agent_newagent.go
@@ -15,6 +15,7 @@ import (
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
 	schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
 	"github.com/cloudwego/eino/schema"
+	"github.com/spf13/viper"
 
 	agentchat "github.com/LoveLosita/smartflow/backend/agent/chat"
 	"github.com/LoveLosita/smartflow/backend/conv"
@@ -149,10 +150,12 @@ func (s *AgentService) runNewAgentGraph(
 	graphRequest.Normalize()
 
 	// 7. 适配 LLM clients（从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client）。
-	chatClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	planClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	executeClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	deliverClient := infrallm.WrapArkClient(s.AIHub.Worker)
+	// 7.1 Chat/Deliver 使用 Pro 模型：路由分流、闲聊、交付总结属于标准复杂度。
+	// 7.2 Plan/Execute 使用 Max 模型：规划和 ReAct 循环需要深度推理能力。
+	chatClient := infrallm.WrapArkClient(s.AIHub.Pro)
+	planClient := infrallm.WrapArkClient(s.AIHub.Max)
+	executeClient := infrallm.WrapArkClient(s.AIHub.Max)
+	deliverClient := infrallm.WrapArkClient(s.AIHub.Pro)
 
 	// 8. 适配 SSE emitter。
 	sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
@@ -173,6 +176,9 @@ func (s *AgentService) runNewAgentGraph(
 		RoughBuildFunc:       s.makeRoughBuildFunc(),
 		WriteSchedulePreview: s.makeWriteSchedulePreviewFunc(),
 		MemoryFuture:         memoryFuture,
+		ThinkingPlan:         viper.GetBool("agent.thinking.plan"),
+		ThinkingExecute:      viper.GetBool("agent.thinking.execute"),
+		ThinkingDeliver:      viper.GetBool("agent.thinking.deliver"),
 	}
 
 	// 10. 构造 AgentGraphRunInput 并运行 graph。
@@ -190,8 +196,8 @@ func (s *AgentService) runNewAgentGraph(
 		log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr)
 		pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr))
 
-		// Graph 出错时回退普通聊天，保证可用性。
-		s.runNormalChatFlow(requestCtx, s.AIHub.Worker, resolvedModelName, userMessage, "", nil, retryMeta, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
+		// Graph 出错时回退普通聊天，保证可用性。回退使用 Pro 模型。
+		s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, "", nil, retryMeta, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
 		return
 	}
 
diff --git a/frontend/src/components/assistant/ContextWindowMeter.vue b/frontend/src/components/assistant/ContextWindowMeter.vue
index 7978808..7ea4af0 100644
--- a/frontend/src/components/assistant/ContextWindowMeter.vue
+++ b/frontend/src/components/assistant/ContextWindowMeter.vue
@@ -3,14 +3,6 @@ import { computed } from 'vue'
 
 import type { ConversationContextStats } from '@/types/dashboard'
 
-interface ContextSegment {
-  key: 'msg0' | 'msg1' | 'msg2' | 'msg3'
-  label: string
-  value: number
-  widthPercent: number
-  color: string
-}
-
 const props = withDefaults(
   defineProps<{
     stats?: ConversationContextStats | null
@@ -33,6 +25,14 @@ const usagePercent = computed(() => {
   return Math.round((safeStats.value.total / safeStats.value.budget) * 100)
 })
 
+const barWidthPercent = computed(() => {
+  if (!safeStats.value || safeStats.value.budget <= 0) {
+    return 0
+  }
+  // 1. 按 total / budget 计算宽度，上限 100%（超预算时撑满进度条）。
+  return Math.min(100, (safeStats.value.total / safeStats.value.budget) * 100)
+})
+
 const isOverBudget = computed(() => {
   if (!safeStats.value) {
     return false
@@ -40,31 +40,6 @@ const isOverBudget = computed(() => {
   return safeStats.value.total > safeStats.value.budget
 })
 
-const segments = computed<ContextSegment[]>(() => {
-  const stats = safeStats.value
-  if (!stats) {
-    return []
-  }
-
-  // 1. 进度条固定做成紧凑胶囊，因此按 max(total, budget) 计算比例，既保留预算留白，也兼容超预算占满。
-  // 2. 四段颜色继续对应后端 msg0~msg3 的真实语义，避免前端为了视觉压缩而打乱统计含义。
-  // 3. 零值段不渲染，减少窄尺寸下的噪点，让小组件也能保留基本可读性。
-  const base = Math.max(stats.total, stats.budget, 1)
-  const rawSegments = [
-    { key: 'msg0', label: '规则', value: stats.msg0, color: 'linear-gradient(90deg, #2556c7, #3b82f6)' },
-    { key: 'msg1', label: '历史', value: stats.msg1, color: 'linear-gradient(90deg, #0f766e, #14b8a6)' },
-    { key: 'msg2', label: '执行', value: stats.msg2, color: 'linear-gradient(90deg, #b45309, #f59e0b)' },
-    { key: 'msg3', label: '当前', value: stats.msg3, color: 'linear-gradient(90deg, #15803d, #22c55e)' },
-  ] as const
-
-  return rawSegments
-    .filter((segment) => segment.value > 0)
-    .map((segment) => ({
-      ...segment,
-      widthPercent: Math.max(0, Math.min(100, (segment.value / base) * 100)),
-    }))
-})
-
 const usageText = computed(() => {
   if (props.loading) {
     return '...'
@@ -86,9 +61,7 @@ const tooltipText = computed(() => {
     return props.disabled ? '新会话发送首条消息后展示上下文窗口统计' : '当前会话暂无上下文窗口统计'
   }
 
-  const segmentText = segments.value.map((segment) => `${segment.label} ${segment.value}`).join(' / ')
-  const usageSummary = `总计 ${safeStats.value.total} / 预算 ${safeStats.value.budget}（${usagePercent.value}%）`
-  return segmentText ? `${usageSummary}；${segmentText}` : usageSummary
+  return `总计 ${safeStats.value.total} / 预算 ${safeStats.value.budget}（${usagePercent.value}%）`
 })
 </script>
 
@@ -106,18 +79,7 @@ const tooltipText = computed(() => {
 
     <div class="assistant-context-meter__track" aria-hidden="true">
       <div v-if="loading" class="assistant-context-meter__loading-bar" />
-
-      <template v-else>
-        <div
-          v-for="segment in segments"
-          :key="segment.key"
-          class="assistant-context-meter__segment"
-          :style="{
-            width: `${segment.widthPercent}%`,
-            background: segment.color,
-          }"
-        />
-      </template>
+      <div v-else-if="barWidthPercent > 0" class="assistant-context-meter__bar" :style="{ width: `${barWidthPercent}%` }" />
     </div>
 
     <span class="assistant-context-meter__value">{{ usageText }}</span>
@@ -195,7 +157,6 @@ const tooltipText = computed(() => {
   background:
     linear-gradient(180deg, rgba(232, 238, 246, 0.95), rgba(243, 247, 251, 0.95)),
     #edf2f7;
-  display: flex;
 }
 
 .assistant-context-meter--disabled .assistant-context-meter__track {
@@ -204,9 +165,15 @@ const tooltipText = computed(() => {
     #eef2f7;
 }
 
-.assistant-context-meter__segment {
+.assistant-context-meter__bar {
   height: 100%;
-  flex: 0 0 auto;
+  border-radius: inherit;
+  background: linear-gradient(90deg, #2556c7, #3b82f6);
+  transition: width 0.3s ease;
+}
+
+.assistant-context-meter--danger .assistant-context-meter__bar {
+  background: linear-gradient(90deg, #b42318, #ef4444);
 }
 
 .assistant-context-meter__loading-bar {
diff --git a/frontend/src/components/dashboard/AssistantPanel.vue b/frontend/src/components/dashboard/AssistantPanel.vue
index abb3d64..e262295 100644
--- a/frontend/src/components/dashboard/AssistantPanel.vue
+++ b/frontend/src/components/dashboard/AssistantPanel.vue
@@ -48,7 +48,6 @@ interface StreamEventPayload {
   error?: StreamErrorPayload
 }
 
-type ModelType = 'worker' | 'strategist'
 
 interface ConversationGroup {
   key: string
@@ -86,7 +85,7 @@ const conversationLoadingMore = ref(false)
 const chatLoading = ref(false)
 const historyExpanded = ref(true)
 const selectedConversationId = ref('')
-const selectedModel = ref<ModelType>('worker')
+
 const selectedThinkingMode = ref<ThinkingModeType>('auto')
 const messageInput = ref('')
 const historyPanelWidth = ref(props.initialHistoryWidth)
@@ -120,7 +119,7 @@ const quickActions = [
   '给我一个更稳妥的推进方案',
 ]
 
-const MODEL_PREFERENCE_STORAGE_KEY = 'smartflow.assistant.model.byConversation.v1'
+
 const DEFAULT_PLANNING_PROMPT = '请基于这些任务类帮我做一版智能编排。'
 
 let messageScrollRaf = 0
@@ -336,85 +335,6 @@ const contextStatsDisabled = computed(() => {
   return !selectedConversationId.value || isDraftConversationId(selectedConversationId.value)
 })
 
-function isModelType(value: unknown): value is ModelType {
-  return value === 'worker' || value === 'strategist'
-}
-
-function loadModelPreferenceMap() {
-  if (typeof window === 'undefined') {
-    return {} as Record<string, ModelType>
-  }
-
-  try {
-    const raw = window.localStorage.getItem(MODEL_PREFERENCE_STORAGE_KEY)
-    if (!raw) {
-      return {} as Record<string, ModelType>
-    }
-
-    const parsed = JSON.parse(raw) as unknown
-    const normalized: Record<string, ModelType> = {}
-    const entries = typeof parsed === 'object' && parsed ? Object.entries(parsed) : []
-
-    // 1. 只接收结构合法且值在白名单内的记录，避免脏数据把模型值污染为非法字符串。
-    // 2. 键为空字符串的记录直接丢弃，防止“新建会话未落库”场景写入无效索引。
-    // 3. 解析失败时回退为空对象，不阻塞聊天主流程。
-    for (const [conversationId, model] of entries) {
-      if (!conversationId || !isModelType(model)) {
-        continue
-      }
-      normalized[conversationId] = model
-    }
-
-    return normalized
-  } catch {
-    return {} as Record<string, ModelType>
-  }
-}
-
-const modelPreferenceMap = ref<Record<string, ModelType>>(loadModelPreferenceMap())
-
-function persistModelPreferenceMap() {
-  if (typeof window === 'undefined') {
-    return
-  }
-
-  try {
-    window.localStorage.setItem(MODEL_PREFERENCE_STORAGE_KEY, JSON.stringify(modelPreferenceMap.value))
-  } catch {
-    // 1. 本地存储失败只影响“记忆体验”，不影响消息收发主链路。
-    // 2. 这里静默处理，避免用户每次切模型都被错误提示打断。
-    // 3. 若用户清理缓存或隐私模式限制写入，后续会自动退化为会话内临时选择。
-  }
-}
-
-function savePreferredModel(conversationId: string, model: ModelType) {
-  if (!conversationId || modelPreferenceMap.value[conversationId] === model) {
-    return
-  }
-
-  modelPreferenceMap.value = {
-    ...modelPreferenceMap.value,
-    [conversationId]: model,
-  }
-  persistModelPreferenceMap()
-}
-
-function resolvePreferredModel(conversationId: string) {
-  if (!conversationId) {
-    return null
-  }
-
-  return modelPreferenceMap.value[conversationId] ?? null
-}
-
-function applyPreferredModelForConversation(conversationId: string) {
-  const preferredModel = resolvePreferredModel(conversationId)
-  if (!preferredModel || preferredModel === selectedModel.value) {
-    return
-  }
-
-  selectedModel.value = preferredModel
-}
 
 function ensureConversationBucket(conversationId: string) {
   if (!conversationMessagesMap[conversationId]) {
@@ -476,16 +396,6 @@ function migrateConversationState(fromConversationId: string, toConversationId:
     delete conversationMetaMap[fromConversationId]
   }
 
-  if (modelPreferenceMap.value[fromConversationId]) {
-    const migratedModelMap = { ...modelPreferenceMap.value }
-    if (!migratedModelMap[toConversationId]) {
-      migratedModelMap[toConversationId] = migratedModelMap[fromConversationId]!
-    }
-    delete migratedModelMap[fromConversationId]
-    modelPreferenceMap.value = migratedModelMap
-    persistModelPreferenceMap()
-  }
-
   const latestMap = new Map<string, ConversationListItem>()
   const deduplicated: ConversationListItem[] = []
   const seen = new Set<string>()
@@ -1299,7 +1209,6 @@ async function loadConversationContextStats(conversationId: string, forceReload
 async function selectConversation(conversationId: string) {
   cancelEditUserMessage()
   selectedConversationId.value = conversationId
-  applyPreferredModelForConversation(conversationId)
   await Promise.allSettled([
     loadConversationMessages(conversationId),
     ensureConversationMeta(conversationId),
@@ -1502,7 +1411,7 @@ async function streamAssistantReply(
   const response = await fetchChatStream({
     conversation_id: isDraftConversationId(draftConversationId) ? undefined : draftConversationId,
     message: text,
-    model: selectedModel.value,
+    model: 'worker',
     thinking: selectedThinkingMode.value,
     extra: requestExtra,
   })
@@ -1577,8 +1486,6 @@ async function sendMessage(preset?: string) {
   if (!selectedConversationId.value || shouldStartFreshPlanningConversation) {
     selectedConversationId.value = draftConversationId
   }
-  savePreferredModel(draftConversationId, selectedModel.value)
-
   ensureConversationBucket(draftConversationId)
   unavailableHistoryMap[draftConversationId] = false
 
@@ -1734,16 +1641,6 @@ watch(
   },
 )
 
-watch(
-  selectedModel,
-  (nextModel) => {
-    const conversationId = selectedConversationId.value
-    if (!conversationId) {
-      return
-    }
-    savePreferredModel(conversationId, nextModel)
-  },
-)
 
 onMounted(async () => {
   reasoningTicker = window.setInterval(() => {
@@ -2126,20 +2023,6 @@ onBeforeUnmount(() => {
                     </el-select>
                   </div>
 
-                  <div class="assistant-toolbar__pill assistant-toolbar__pill--select assistant-toolbar__pill--ds-model">
-                    <span class="assistant-toolbar__select-label">模型</span>
-                    <el-select
-                      v-model="selectedModel"
-                      class="assistant-toolbar__select-box"
-                      size="small"
-                      popper-class="assistant-model-select-panel"
-                      placement="top-start"
-                      :teleported="true"
-                    >
-                      <el-option value="worker" label="标准" />
-                      <el-option value="strategist" label="策略" />
-                    </el-select>
-                  </div>
 
                   <ContextWindowMeter
                     class="assistant-toolbar__context-meter"
@@ -3183,7 +3066,6 @@ onBeforeUnmount(() => {
   font-weight: 600;
 }
 
-.assistant-toolbar__pill--ds-model,
 .assistant-toolbar__pill--ds-thinking {
   height: 32px;
   padding: 0 8px 0 10px;
@@ -3200,10 +3082,6 @@ onBeforeUnmount(() => {
   min-width: 138px;
 }
 
-.assistant-toolbar__pill--ds-model {
-  min-width: 144px;
-}
-
 .assistant-toolbar__context-meter {
   width: 144px;
   min-width: 144px;
@@ -3435,30 +3313,5 @@ onBeforeUnmount(() => {
 }
 </style>
 <style>
-.assistant-model-select-panel.el-popper {
-  border-radius: 12px;
-  border: 1px solid rgba(15, 23, 42, 0.1);
-  box-shadow: 0 10px 28px rgba(15, 23, 42, 0.14);
-  padding: 6px;
-}
 
-.assistant-model-select-panel .el-select-dropdown__item {
-  height: 36px;
-  line-height: 36px;
-  border-radius: 8px;
-  padding: 0 12px;
-  color: #4d5d73;
-  font-size: 14px;
-  font-weight: 600;
-}
-
-.assistant-model-select-panel .el-select-dropdown__item.hover,
-.assistant-model-select-panel .el-select-dropdown__item:hover {
-  background: rgba(51, 95, 194, 0.1);
-}
-
-.assistant-model-select-panel .el-select-dropdown__item.is-selected {
-  color: #2f56b0;
-  background: rgba(51, 95, 194, 0.16);
-}
 </style>