Version: 0.9.22.dev.260416

后端： 1. 品牌文案与聊天定位统一切到 SmartMate，并放宽非排程问答能力 - 系统人设、路由、排程、查询、交付提示统一从 SmartFlow 改为 SmartMate - 明确普通问答/生活建议/开放讨论可正常回答，deep_answer 不再输出“让我想想”等占位话术 - thinkingMode=auto 时，deep_answer 默认开启 thinking，execute 继续跟随路由决策，其余路由默认关闭 2. Memory 读取链路升级为“结构化强约束 + 语义候选”hybrid 模式，并补齐注入渲染 / Execute 消费 - 新增 read.mode、四类记忆预算、inject.renderMode 等配置及默认值 - 落地 HybridRetrieve，统一 MySQL/RAG 读侧作用域、三级去重（ID/hash/text）、统一重排与按类型预算裁剪 - 新增 FindPinnedByUser、content_hash DTO/兜底补算、legacy/RAG 共用读侧查询口径与 fallback 逻辑 - 记忆注入支持 flat/typed_v2 两种渲染，execute msg3 正式消费 memory_context，主链路注入 MemoryReader 时同步透传 memory 配置 3. Memory 第二步/第三步 handoff 与治理文档补齐 - HANDOFF_Memory向Mem0靠拢三步冲刺计划.md 从 newAgent 迁到 memory 目录，并补充“我的记忆”增删改查与最小留痕口径 - 新增 backend/memory/记忆模块第二步计划.md、backend/memory/第三步治理与观测落地计划.md，分别拆解 hybrid 读取注入闭环与治理/观测/清理路线 - 同步更新 backend/memory/Log.txt 调试日志前端： 1. 助手输入区新增“智能编排”任务类选择器，并把 task_class_ids 作为请求 extra 透传 - 新建 frontend/src/components/assistant/TaskClassPlanningPicker.vue，支持拉取任务类列表、临时勾选、已选标签回显与清空 - 更新 frontend/src/components/dashboard/AssistantPanel.vue、frontend/src/types/dashboard.ts：Chat extra 正式建模 task_class_ids / retry 字段；当本轮带编排任务类时强制新起会话，避免把现有会话历史误混入新编排 2. 会话上下文窗口统计接入前端展示 - 更新 frontend/src/api/agent.ts、新建 frontend/src/components/assistant/ContextWindowMeter.vue、更新 frontend/src/components/dashboard/AssistantPanel.vue、frontend/src/types/dashboard.ts：接入 /agent/context-stats，兼容 object/string/null 三种返回；在输入工具栏展示 msg0~msg3 占比与预算使用率 3. 助手面板交互细节优化 - 更新 frontend/src/components/dashboard/AssistantPanel.vue：thinking 开关改为 auto/true/false 三态选择；切会话与重试后同步刷新 context stats；历史列表首屏不足时自动继续分页直到形成滚动区仓库：无
2026-04-16 18:29:17 +08:00
parent 634a9fb926
commit a1b2ffedb8
38 changed files with 3150 additions and 277 deletions
--- a/backend/agent/chat/prompt.go
+++ b/backend/agent/chat/prompt.go
@@ -1,8 +1,11 @@
 package agentchat

 const (
-	// SystemPrompt 全局系统人设：定义 SmartFlow 的基本调性
-	SystemPrompt = `你叫 SmartFlow，是专为重邮（CQUPT）学子打造的智能排程专家。
-你的回复应当专业、干练，偶尔可以带一点程序员式的冷幽默。
+	// SystemPrompt 全局系统人设：定义 SmartMate 的基本调性
+	SystemPrompt = `你叫 SmartMate，是时伴（SmartMate）的中文 AI 排程伙伴，面向大学生提供陪伴式日程管理与日常协助。
+你擅长课表与任务安排、任务管理、学习规划和随口记，也可以正常回答日常问答、生活建议、信息整理、分析讨论等非排程问题。
+你的目标是像一个越用越懂用户的伙伴一样，结合历史对话、长期记忆和当前上下文，给出贴心、清晰、可信的帮助。
+你的回复应当专业、自然、有陪伴感，偶尔可以带一点轻松幽默。
+如果用户的问题与日程无关，不要因为“不属于排程”就拒绝、回避或强行转到任务安排；只要不需要工具且你有把握，就直接回答。
 重要约束：你无法直接写入数据库。除非系统明确告知“任务已落库成功”，否则禁止使用“已安排/已记录/已帮你记下”等完成态表述。`
 )
--- a/backend/agent/prompt/quicknote.go
+++ b/backend/agent/prompt/quicknote.go
@@ -2,7 +2,7 @@ package agentprompt

 const (
 	// QuickNotePlanPrompt 用于“单请求聚合规划”。
-	QuickNotePlanPrompt = `你是 SmartFlow 的任务聚合规划器。
+	QuickNotePlanPrompt = `你是 SmartMate 的任务聚合规划器。
 你将基于用户输入，一次性输出任务规划结果，供后端直接写库。

 必须完成以下五件事：
@@ -20,7 +20,7 @@ const (
 - banter 不得新增或修改任务事实（任务名、时间、优先级）。`

 	// QuickNoteIntentPrompt 用于第一阶段：判断用户输入是否属于“随口记”。
-	QuickNoteIntentPrompt = `你是 SmartFlow 的“随口记分诊器”。
+	QuickNoteIntentPrompt = `你是 SmartMate 的“随口记分诊器”。
 请判断用户输入是否表达了“帮我记一个任务/日程”的需求。
 - 若是，请提取任务标题与时间线索。
 - 时间处理必须严谨：若出现相对时间（如明天/后天/下周一/今晚），必须基于上文给出的“当前时间”换算为绝对时间。
@@ -28,14 +28,14 @@ const (
 - 不要声称已经写入数据库。`

 	// QuickNotePriorityPrompt 用于第二阶段：将任务归类到四象限优先级，并评估紧急分界线。
-	QuickNotePriorityPrompt = `你是 SmartFlow 的任务优先级评估器。
+	QuickNotePriorityPrompt = `你是 SmartMate 的任务优先级评估器。
 根据任务内容、时间约束和执行成本，输出优先级 priority_group：
 1=重要且紧急，2=重要不紧急，3=简单不重要，4=不简单不重要。
 请给出简短理由，理由必须可解释。
 若你认为该任务需要后续自动平移，请额外输出 urgency_threshold_at（绝对时间，yyyy-MM-dd HH:mm）；否则输出空字符串。`

 	// QuickNoteReplyBanterPrompt 用于随口记成功后的“轻松跟进句”生成。
-	QuickNoteReplyBanterPrompt = `你是 SmartFlow 的中文口语化回复润色助手。
+	QuickNoteReplyBanterPrompt = `你是 SmartMate 的中文口语化回复润色助手。
 请根据用户原话生成一句轻松自然的跟进话术，让回复更有温度。
 要求：
 - 只输出一句中文，不超过30字。
--- a/backend/agent/prompt/route.go
+++ b/backend/agent/prompt/route.go
@@ -6,7 +6,7 @@ import (
 )

 const routeSystemPrompt = `
-你是 SmartFlow 的一级路由助手。
+你是 SmartMate 的一级路由助手。
 你的职责不是回答用户，而是判断这条消息更适合走哪条能力链路。

 当前 Agent 仍在逐批迁移阶段，因此这里只先保留 prompt 落点与职责说明。
--- a/backend/agent/prompt/schedule.go
+++ b/backend/agent/prompt/schedule.go
@@ -7,7 +7,7 @@ const (
 	// 1. 负责把自然语言转成结构化 JSON，供后端节点分流与执行；
 	// 2. 负责抽取 task_class_ids / strategy / task_tags 等关键字段；
 	// 3. 不负责做排程计算，不负责做工具调用。
-	SchedulePlanIntentPrompt = `你是 SmartFlow 的排程意图分析器。
+	SchedulePlanIntentPrompt = `你是 SmartMate 的排程意图分析器。
 请根据用户输入，提取排程意图与约束条件。

 必须完成以下任务：
@@ -52,7 +52,7 @@ const (
 	// 1. 只处理“单天”数据，避免跨天决策污染；
 	// 2. 通过工具调用做小步调整；
 	// 3. 不负责周级配平，不负责最终总结。
-	SchedulePlanDailyReactPrompt = `你是 SmartFlow 日内排程优化器。
+	SchedulePlanDailyReactPrompt = `你是 SmartMate 日内排程优化器。

 你将收到一天内的日程安排（JSON 数组），其中：
 - status="existing"：已确定的课程或任务，不可移动
@@ -96,7 +96,7 @@ const (
 	// 2. 显式区分总预算与有效预算，避免模型对“次数扣减”产生困惑；
 	// 3. 明确“输入数据已过后端硬校验”，避免模型把合法嵌入误判为冲突；
 	// 4. 工具失败结果会回传到下一轮，模型只需“走一步看一步”。
-	SchedulePlanWeeklyReactPrompt = `你是 SmartFlow 周级排程配平器。
+	SchedulePlanWeeklyReactPrompt = `你是 SmartMate 周级排程配平器。

 单日内的排程已优化完毕，你当前只负责“单周微调”。

@@ -159,7 +159,7 @@ const (
 	// 1. 只做读数据总结，不参与工具调用与状态修改；
 	// 2. 输出面向用户的自然语言；
 	// 3. 失败由上层兜底文案处理。
-	SchedulePlanFinalCheckPrompt = `你是 SmartFlow 排程方案总结专家。
+	SchedulePlanFinalCheckPrompt = `你是 SmartMate 排程方案总结专家。
 你的任务是为用户生成一段友好、自然的排程总结。

 要求：
--- a/backend/agent/prompt/schedule_refine.go
+++ b/backend/agent/prompt/schedule_refine.go
@@ -2,7 +2,7 @@ package agentprompt

 const (
 	// ScheduleRefineContractPrompt 负责把用户自然语言微调请求抽取为结构化契约。
-	ScheduleRefineContractPrompt = `你是 SmartFlow 的排程微调契约分析器。
+	ScheduleRefineContractPrompt = `你是 SmartMate 的排程微调契约分析器。
 你会收到：当前时间、用户请求、已有排程摘要。
 请只输出 JSON，不要 Markdown，不要解释，不要代码块：
 {
@@ -32,7 +32,7 @@ const (
 5. hard_assertions 必须尽量结构化，避免只给自然语言目标。`

 	// ScheduleRefinePlannerPrompt 只负责生成“执行路径”，不直接执行动作。
-	ScheduleRefinePlannerPrompt = `你是 SmartFlow 的排程微调 Planner。
+	ScheduleRefinePlannerPrompt = `你是 SmartMate 的排程微调 Planner。
 你会收到：用户请求、契约、最近动作观察。
 请只输出 JSON，不要 Markdown，不要解释，不要代码块：
 {
@@ -48,7 +48,7 @@ const (
 5. 不要输出半截 JSON。`

 	// ScheduleRefineReactPrompt 用于“单任务微步 ReAct”执行器。
-	ScheduleRefineReactPrompt = `你是 SmartFlow 的单任务微步 ReAct 执行器。
+	ScheduleRefineReactPrompt = `你是 SmartMate 的单任务微步 ReAct 执行器。
 当前只处理一个任务（CURRENT_TASK），不能发散到其它任务的主动改动。
 你每轮只能做两件事之一：
 1) 调用一个工具（基础工具或复合工具）
@@ -122,7 +122,7 @@ const (
 18. 为保证解析稳定：goal_check<=50字，decision<=90字，summary<=60字。`

 	// ScheduleRefinePostReflectPrompt 要求模型基于真实工具结果做复盘，不允许“脑补成功”。
-	ScheduleRefinePostReflectPrompt = `你是 SmartFlow 的 ReAct 复盘器。
+	ScheduleRefinePostReflectPrompt = `你是 SmartMate 的 ReAct 复盘器。
 你会收到：本轮工具参数、后端真实执行结果、上一轮上下文。
 请只输出 JSON，不要 Markdown，不要解释：
 {
@@ -137,7 +137,7 @@ const (
 3. should_stop=true 仅用于“目标已满足”或“继续收益很低”。`

 	// ScheduleRefineReviewPrompt 用于终审语义校验。
-	ScheduleRefineReviewPrompt = `你是 SmartFlow 的终审校验器。
+	ScheduleRefineReviewPrompt = `你是 SmartMate 的终审校验器。
 请判断“当前排程”是否满足“本轮用户微调请求 + 契约硬要求”。
 只输出 JSON：
 {
@@ -151,7 +151,7 @@ const (
 2. pass=false 时 reason 必须给出核心差距。`

 	// ScheduleRefineSummaryPrompt 用于最终面向用户的自然语言总结。
-	ScheduleRefineSummaryPrompt = `你是 SmartFlow 的排程结果解读助手。
+	ScheduleRefineSummaryPrompt = `你是 SmartMate 的排程结果解读助手。
 请基于输入输出 2~4 句中文总结：
 1) 先说明本轮改了什么；
 2) 再说明改动收益；
@@ -159,7 +159,7 @@ const (
 不要输出 JSON。`

 	// ScheduleRefineRepairPrompt 用于终审失败后的单次修复动作。
-	ScheduleRefineRepairPrompt = `你是 SmartFlow 的修复执行器。
+	ScheduleRefineRepairPrompt = `你是 SmartMate 的修复执行器。
 当前方案未通过终审，请根据“未满足点”只做一次修复动作。
 只允许输出一个 tool_call（Move 或 Swap），不允许 done。

--- a/backend/agent/prompt/taskquery.go
+++ b/backend/agent/prompt/taskquery.go
@@ -5,7 +5,7 @@ import (
 	"strings"
 )

-const TaskQueryPlanPrompt = `你是 SmartFlow 的任务查询规划器。请根据用户原话，输出结构化查询计划 JSON，供后端直接执行。
+const TaskQueryPlanPrompt = `你是 SmartMate 的任务查询规划器。请根据用户原话，输出结构化查询计划 JSON，供后端直接执行。
 只允许输出 JSON，不要输出解释、代码块或多余文字。

 输出字段：
@@ -30,7 +30,7 @@ const TaskQueryPlanPrompt = `你是 SmartFlow 的任务查询规划器。请根
 6. 如果用户语义更偏向“来点事做做”“给我点轻松的任务”，优先考虑 3、4 象限。
 7. 允许多选象限。`

-const TaskQueryReflectPrompt = `你是 SmartFlow 的任务查询结果审阅器。你会看到：用户原话、当前查询计划、查询结果摘要、当前重试次数。
+const TaskQueryReflectPrompt = `你是 SmartMate 的任务查询结果审阅器。你会看到：用户原话、当前查询计划、查询结果摘要、当前重试次数。
 请只输出 JSON，不要输出解释、代码块或多余文字。

 输出字段：
--- a/backend/agent/router/action_route.go
+++ b/backend/agent/router/action_route.go
@@ -36,7 +36,7 @@ var (
 	routeReasonRegex = regexp.MustCompile(`(?is)<\s*smartflow_reason\s*>(.*?)<\s*/\s*smartflow_reason\s*>`)
 )

-const routeControlPrompt = `你是 SmartFlow 的请求分流控制器。
+const routeControlPrompt = `你是 SmartMate 的请求分流控制器。
 你的唯一任务是给后端返回“可机读控制码”，不要做用户可见回复，不要解释。

 动作定义：
--- a/backend/cmd/start.go
+++ b/backend/cmd/start.go
@@ -171,7 +171,7 @@ func Start() {
 	agentService.SetScheduleProvider(newagentconv.NewScheduleProvider(scheduleRepo, taskClassRepo))
 	agentService.SetSchedulePersistor(newagentconv.NewSchedulePersistorAdapter(manager))
 	agentService.SetCompactionStore(agentRepo)
-	agentService.SetMemoryReader(memoryModule)
+	agentService.SetMemoryReader(memoryModule, memoryCfg)

 	// API 层初始化。
 	userApi := api.NewUserHandler(userService)
--- a/backend/config.example.yaml
+++ b/backend/config.example.yaml
@@ -1,12 +1,20 @@
-# 应用配置文件示例
-# 包含服务器、数据库等基础配置
-# 请根据实际情况修改并保存为 config.yaml
+# SmartFlow 后端配置示例
+#
+# 使用说明：
+# 1. 请复制为 config.yaml 后再按实际环境填写。
+# 2. 示例文件强调“结构清晰”和“字段语义明确”，不是生产推荐值。
+# 3. 若你只想看 memory 相关配置，优先看本文件下半部分的 memory / rag / websearch 段。

+# 服务启动与 HTTP 行为。
 server:
+  # HTTP 监听端口。
  port: 8080
+  # gin 运行模式：debug / release。
  mode: debug
+  # 单次请求默认超时时间。
  timeout: 30s

+# MySQL 主库配置。
 database:
  host: localhost
  port: 3306
@@ -17,22 +25,28 @@ database:
  parseTime: true
  loc: Local

+# 登录态与鉴权令牌配置。
 jwt:
  accessSecret: "put_your_jwt_access_secret_here"
  refreshSecret: "put_your_jwt_refresh_secret_here"
+  # access token 有效期，面向接口鉴权。
  accessTokenExpire: 15min
+  # refresh token 有效期，面向续签。
  refreshTokenExpire: 7d

+# 应用日志输出配置。
 log:
  level: info
  path: logs/

+# Redis 缓存与轻量状态存储。
 redis:
  host: localhost
  port: 6379
  password: ""
  db: 0

+# Kafka outbox 事件总线配置。
 kafka:
  enabled: true
  brokers:
@@ -43,75 +57,148 @@ kafka:
  retryBatchSize: 100
  maxRetry: 20

+# 时间与学期边界配置。
 time:
-    zone: "Asia/Shanghai"
-    semesterStartDate: "2026-03-02" #学期开始日期，一定要设定为周一，以便于计算周数
-    semesterEndDate: "2026-07-19" #学期结束日期，一定要设定为周日，确保最后一周完整
+  zone: "Asia/Shanghai"
+  # 学期开始日期，一定要设定为周一，以便于计算周数。
+  semesterStartDate: "2026-03-02"
+  # 学期结束日期，一定要设定为周日，确保最后一周完整。
+  semesterEndDate: "2026-07-19"

+# 智能体模型与规划参数。
 agent:
-  workerModel: "doubao-seed-2-0-code-preview-260215" # 智能体使用的Worker模型，需根据实际情况调整
-  strategistModel: "doubao-seed-2-0-code-preview-260215" # 策略师使用的Worker模型，需根据实际情况调整
-  baseURL: "https://ark.cn-beijing.volces.com/api/v3" # Worker服务的基础URL，需根据实际情况调整
-  dailyRefineConcurrency: 7 # 日内并发优化并发度，建议按模型配额调整
-  weeklyAdjustBudget: 5 # 周级跨天配平额度上限，防止过度调整
+  # 日常执行链路使用的主模型。
+  workerModel: "doubao-seed-2-0-code-preview-260215"
+  # 规划、拆解、策略推导使用的模型。
+  strategistModel: "doubao-seed-2-0-code-preview-260215"
+  # 模型服务根路径。
+  baseURL: "https://ark.cn-beijing.volces.com/api/v3"
+  # 日内并发优化并发度，建议按模型配额调整。
+  dailyRefineConcurrency: 7
+  # 周级跨天配平额度上限，防止过度调整。
+  weeklyAdjustBudget: 5

+# 通用 RAG 配置。
 rag:
+  # 总开关；关闭后不再走通用向量检索链路。
  enabled: true
-  store: "milvus" # 可选：inmemory / milvus
+  # 当前向量存储类型，可选：inmemory / milvus。
+  store: "milvus"
+  # 召回候选上限。
  topK: 8
+  # 召回相似度阈值。
  threshold: 0.55
  retrieve:
+    # 单次检索超时时间，避免主链路长时间阻塞。
    timeoutMs: 1500
  ingest:
+    # 文档切块大小；过大影响召回精度，过小影响上下文完整度。
    chunkSize: 400
+    # 相邻 chunk 重叠字符数。
    chunkOverlap: 80
  embed:
-    provider: "eino" # 可选：mock / eino
-    model: "doubao-embedding-vision-251215" # 例如 Ark/OpenAI 兼容 embedding 模型名
-    baseURL: "https://ark.cn-beijing.volces.com/api/v3" # 这里填服务根路径，SDK 会自动拼接 /embeddings；API Key 统一从环境变量 ARK_API_KEY 读取
+    # embedding 供应商实现，可选：mock / eino。
+    provider: "eino"
+    # embedding 模型名。
+    model: "doubao-embedding-vision-251215"
+    # embedding 服务根路径；API Key 统一从环境变量读取。
+    baseURL: "https://ark.cn-beijing.volces.com/api/v3"
    timeoutMs: 1200
+    # 向量维度，必须与向量库 collection 配置一致。
    dimension: 1024
  reranker:
+    # 是否启用重排。
    enabled: false
-    provider: "noop" # 当前默认 noop，后续可扩展
+    # 当前默认 noop，后续可扩展。
+    provider: "noop"
  milvus:
-    address: "http://localhost:19530" # Milvus REST 入口，当前联调确认不要填 9091 健康检查口
+    # Milvus REST 地址，不要填健康检查口。
+    address: "http://localhost:19530"
    token: "root:Milvus"
    dbName: ""
+    # 通用 RAG chunk collection。
    collectionName: "smartflow_rag_chunks"
    metricType: "COSINE"
    requestTimeoutMs: 1500

+# 记忆模块配置。
 memory:
+  # memory 总开关；关闭后不做抽取、写入、召回、注入。
  enabled: true
  rag:
+    # 是否允许 memory 读写链路使用向量召回能力。
+    # 关闭后，memory 里的“语义候选”会退回 MySQL 路径，不等于整个 memory 模块关闭。
    enabled: true
+  read:
+    # 读取模式：
+    # 1. legacy：旧读链路，语义上是“RAG 优先，失败再走 legacy”。
+    # 2. hybrid：新读链路，先取强约束，再补语义候选，再统一去重/排序/预算裁剪。
+    # 3. 如果你想强制纯 MySQL 召回，建议同时设置 read.mode=legacy 且 memory.rag.enabled=false。
+    mode: legacy
+    # constraint 类型最大注入条数。
+    constraintLimit: 5
+    # preference 类型最大注入条数。
+    preferenceLimit: 5
+    # fact 类型最大注入条数。
+    factLimit: 5
+    # todo_hint 类型最大注入条数。
+    todoHintLimit: 3
+  inject:
+    # 注入渲染模式：
+    # flat 为旧扁平列表；typed_v2 为按类型分段，便于模型区分“硬约束”和“参考事实”。
+    renderMode: flat
  prompt:
+    # 留空表示走代码内默认抽取 prompt。
    extract: ""
+    # 留空表示走代码内默认决策 prompt。
    decision: ""
+  # memory 向量召回阈值。
  threshold: 0.55
+  # 是否启用重排；当前默认关闭。
  enableReranker: false
  llm:
+    # 记忆抽取/决策使用的 LLM 随机度，默认尽量保守，提升可复现性。
    temperature: 0.1
    topP: 0.2
  job:
+    # 异步记忆任务最大重试次数。
    maxRetry: 6
  worker:
+    # worker 轮询间隔。
    pollEvery: 2s
+    # 单次认领任务数。
    claimBatch: 1
  decision:
-    enabled: false            # 决策层总开关，默认关闭。开启后写入链路走"召回→比对→汇总"流程
-    candidateTopK: 5          # Milvus 语义召回候选数上限
-    candidateMinScore: 0.6    # Milvus 语义召回最低相似度阈值
-    fallbackMode: legacy_add  # 决策流程整体失败时的降级策略：legacy_add（退回旧路径）/ drop（丢弃）
-  write:
-    mode: legacy              # 写入模式：legacy（旧路径）/ decision（决策流程），仅 decision.enabled=true 时生效
-
-websearch:
-  provider: bocha         # 可选：mock | bocha（mock 为空实现，跑通链路用）
-  apiKey: ""              # 搜索供应商 API Key（bocha 模式必填，否则降级为 mock）
-  timeout: 10s            # 单次搜索请求超时
-  fetchTimeout: 15s       # 单次 URL 抓取超时
-  fetchMaxChars: 4000     # 抓取正文最大字符数
-  rag:
+    # 决策层总开关。
+    # 开启后，写入链路会从“直接新增”升级成“召回旧记忆 -> 比对 -> 决策动作”。
+    enabled: false
+    # 决策层语义候选数上限。
+    candidateTopK: 5
+    # 决策层语义候选最低相似度阈值。
+    candidateMinScore: 0.6
+    # 决策流程整体失败时的降级策略：
+    # legacy_add：退回旧路径直接新增
+    # drop：直接丢弃本次写入
+    fallbackMode: legacy_add
+  write:
+    # 写入模式：
+    # legacy：沿用旧写入路径
+    # decision：启用决策式写入
+    # 注意：只有 decision.enabled=true 时，这个值才真正生效。
+    mode: legacy
+
+# 联网搜索能力配置。
+websearch:
+  # 可选：mock | bocha。
+  provider: bocha
+  # 搜索供应商 API Key；bocha 模式必填，否则会降级为 mock。
+  apiKey: ""
+  # 单次搜索请求超时。
+  timeout: 10s
+  # 单次 URL 抓取超时。
+  fetchTimeout: 15s
+  # 抓取正文最大字符数。
+  fetchMaxChars: 4000
+  rag:
+    # 是否把 websearch 结果继续送入 RAG 处理。
    enabled: false
--- a/backend/newAgent/HANDOFF_Memory向Mem0靠拢三步冲刺计划.md
+++ b/backend/newAgent/HANDOFF_Memory向Mem0靠拢三步冲刺计划.md
@@ -477,7 +477,7 @@

 1. 为写入决策、读取召回、注入渲染补齐结构化日志和指标。
 2. 增加历史重复清理能力。
-3. 补齐 `update/restore` 等审计语义。
+3. 补齐“我的记忆”增删改查语义，以及必要的最小变更留痕。
 4. 明确 feature flag 切流策略与回滚手册。
 5. 更新文档，避免后续维护者只看到旧 README。

@@ -502,14 +502,14 @@

 ### 8.4 历史数据清理建议

-建议不要直接写危险 SQL 一把梭清表，而是通过可审计的治理流程清理历史脏数据：
+建议不要直接写危险 SQL 一把梭清表，而是通过可留痕的治理流程清理历史脏数据：

 1. 按 `user_id + memory_type + content_hash + status=active` 扫描重复组。
 2. 为每组挑一个保留主记录：
   - 优先保留最近更新
   - 或优先保留置信度更高
 3. 其余重复项改为 `archived` 或 `deleted`。
-4. 对每次治理动作写审计日志。
+4. 对每次治理动作写最小变更留痕。

 建议第一版优先做“离线治理工具”或“手动触发 job”，不要直接绑到主 worker 周期任务里。

@@ -549,7 +549,7 @@

 1. 能从日志看清某条记忆为何被 `ADD/UPDATE/DELETE/NONE`。
 2. 能从指标看清读侧命中、去重、降级、回滚情况。
-3. 能对历史重复数据做可审计清理。
+3. 能对历史重复数据做可留痕清理。
 4. 出现异常时可在分钟级通过开关退回 legacy。
 5. 文档与代码现状一致，不再依赖口头传递。

--- a/backend/memory/Log.txt
+++ b/backend/memory/Log.txt
@@ -1,43 +1,78 @@
-2026/04/16 11:24:55 D:/SmartFlow-Agent/backend/dao/agent.go:306 record not found
-[44.328ms] [rows:0] SELECT * FROM `agent_chats` WHERE user_id = 1 AND chat_id = 'df7ce26d-6952-493d-ac7f-3bfe98cbc338' ORDER BY `agent_chats`.`id` LIMIT 1
-2026/04/16 11:24:55 [DEBUG] loadOrCreateRuntimeState chatID=df7ce26d-6952-493d-ac7f-3bfe98cbc338 ok=false err=<nil> hasRuntime=false hasPending=false hasCtx=false hasSchedule=false hasOriginal=false
-2026/04/16 11:24:55 [GORM-Cache] Invalidated conversation history cache for user 1 conversation df7ce26d-6952-493d-ac7f-3bfe98cbc338
-2026/04/16 11:24:56 rag level=info component=store operation=ensure_collection action=search collection=smartflow_rag_chunks corpus=memory latency_ms=40 metric_type=COSINE status=created store=milvus vector_dim=1024
-2026/04/16 11:24:57 rag level=error component=store operation=search action=search collection=smartflow_rag_chunks corpus=memory error=Post "http://localhost:19530/v2/vectordb/entities/search": context deadline exceeded error_code=DEADLINE_EXCEEDED filter_count=3 latency_ms=1304 status=failed store=milvus top_k=5 vector_dim=1024
-2026/04/16 11:24:57 rag level=error component=runtime operation=retrieve action=search corpus=memory error=Post "http://localhost:19530/v2/vectordb/entities/search": context deadline exceeded error_code=DEADLINE_EXCEEDED latency_ms=1500 query_len=48 status=failed threshold=0.55 top_k=5
-2026/04/16 11:25:03 [DEBUG] chat routing chat=df7ce26d-6952-493d-ac7f-3bfe98cbc338 route=direct_reply needs_rough_build=false needs_refine_after_rough_build=false allow_reorder=false thinking=false has_rough_build_done=false task_class_count=0 raw=<SMARTFLOW_ROUTE nonce="84656bca-1aa3-4308-bb7d-5127badf9d47" route="direct_reply"/>
-[GIN] 2026/04/16 - 11:25:04 | 200 |    9.3560115s |       127.0.0.1 | POST     "/api/v1/agent/chat"
-2026/04/16 11:25:05 outbox due messages=3, start dispatch
-2026/04/16 11:25:06 [GORM-Cache] Invalidated conversation history cache for user 1 conversation df7ce26d-6952-493d-ac7f-3bfe98cbc338
-2026/04/16 11:25:07 [GORM-Cache] Invalidated conversation history cache for user 1 conversation df7ce26d-6952-493d-ac7f-3bfe98cbc338
-2026/04/16 11:25:08 outbox due messages=1, start dispatch
-2026/04/16 11:25:09 outbox due messages=1, start dispatch
-2026/04/16 11:25:18 rag level=info component=store operation=search action=search collection=smartflow_rag_chunks corpus=memory filter_count=3 latency_ms=7 result_count=0 status=success store=milvus top_k=5 vector_dim=1024
-2026/04/16 11:25:18 rag level=info component=runtime operation=retrieve action=search corpus=memory fallback_used=false hit_count=0 latency_ms=100 query_len=21 raw_count=0 status=success threshold=0.6 top_k=5
-2026/04/16 11:25:18 [DEBUG][去重] 语义召回候选: job_id=18 user_id=1 memory_type=preference candidate_count=0
-2026/04/16 11:25:18 [DEBUG][去重] 汇总决策: job_id=18 action=ADD target_id=0 reason="无相关旧记忆，直接新增"
-2026/04/16 11:25:19 rag level=info component=store operation=upsert action=add collection=smartflow_rag_chunks corpus=memory latency_ms=53 row_count=1 status=success store=milvus vector_dim=1024
-2026/04/16 11:25:19 rag level=info component=runtime operation=ingest action=add chunk_count=1 corpus=memory document_count=1 latency_ms=158 status=success
-2026/04/16 11:25:19 [去重] 决策流程完成: job_id=18 user_id=1 新增=1 更新=0 删除=0 跳过=0
+GOROOT=C:\Program Files\Go #gosetup
+GOPATH=C:\Users\Dev\go #gosetup
+"C:\Program Files\Go\bin\go.exe" build -o C:\Users\Dev\AppData\Local\JetBrains\GoLand2025.3\tmp\GoLand\___7go_build_main_go.exe D:\SmartFlow-Agent\backend\main.go #gosetup
+C:\Users\Dev\AppData\Local\JetBrains\GoLand2025.3\tmp\GoLand\___7go_build_main_go.exe #gosetup
+2026/04/16 16:00:27 Config loaded successfully
+2026/04/16 16:00:36 Database connected successfully
+2026/04/16 16:00:36 Database auto migration completed
+2026/04/16 16:00:36 RAG runtime initialized: store=milvus embed=eino reranker=noop
+2026/04/16 16:00:36 outbox engine starting: topic=smartflow.agent.outbox brokers=[localhost:9092] retry_scan=1s batch=100
+2026/04/16 16:00:36 Kafka topic is ready: smartflow.agent.outbox
+2026/04/16 16:00:36 Outbox event bus started
+2026/04/16 16:00:36 Memory worker started
+2026/04/16 16:00:36 WebSearch provider: bocha
+2026/04/16 16:00:36 Routes setup completed
+2026/04/16 16:00:36 Server starting on port 8080...
+[GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

-2026/04/16 11:25:44 D:/SmartFlow-Agent/backend/dao/agent.go:306 record not found
-[2.018ms] [rows:0] SELECT * FROM `agent_chats` WHERE user_id = 1 AND chat_id = '6279c9f0-0685-4484-bb33-d4216ef6107c' ORDER BY `agent_chats`.`id` LIMIT 1
-2026/04/16 11:25:44 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 6279c9f0-0685-4484-bb33-d4216ef6107c
-2026/04/16 11:25:44 [DEBUG] loadOrCreateRuntimeState chatID=6279c9f0-0685-4484-bb33-d4216ef6107c ok=false err=<nil> hasRuntime=false hasPending=false hasCtx=false hasSchedule=false hasOriginal=false
-2026/04/16 11:25:44 rag level=info component=store operation=search action=search collection=smartflow_rag_chunks corpus=memory filter_count=3 latency_ms=46 result_count=0 status=success store=milvus top_k=5 vector_dim=1024
-2026/04/16 11:25:44 rag level=info component=runtime operation=retrieve action=search corpus=memory fallback_used=false hit_count=0 latency_ms=145 query_len=45 raw_count=0 status=success threshold=0.55 top_k=5
-2026/04/16 11:25:48 [DEBUG] chat routing chat=6279c9f0-0685-4484-bb33-d4216ef6107c route=direct_reply needs_rough_build=false needs_refine_after_rough_build=false allow_reorder=false thinking=false has_rough_build_done=false task_class_count=0 raw=<SMARTFLOW_ROUTE nonce="a868c365-4f8c-4d56-ac90-a8504842f81c" route="direct_reply"/>
-[GIN] 2026/04/16 - 11:25:49 | 200 |    5.3825319s |       127.0.0.1 | POST     "/api/v1/agent/chat"
-2026/04/16 11:25:50 outbox due messages=3, start dispatch
-2026/04/16 11:25:51 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 6279c9f0-0685-4484-bb33-d4216ef6107c
-2026/04/16 11:25:52 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 6279c9f0-0685-4484-bb33-d4216ef6107c
-2026/04/16 11:25:53 outbox due messages=2, start dispatch
-2026/04/16 11:25:58 rag level=info component=store operation=search action=search collection=smartflow_rag_chunks corpus=memory filter_count=3 latency_ms=53 result_count=1 status=success store=milvus top_k=5 vector_dim=1024
-2026/04/16 11:25:58 rag level=info component=runtime operation=retrieve action=search corpus=memory fallback_used=false hit_count=1 latency_ms=143 query_len=18 raw_count=1 status=success threshold=0.6 top_k=5
-2026/04/16 11:25:58 [WARN][去重] DocumentID 解析失败，跳过候选: document_id="memory:uid:1:6bf14130e4dfc8bd"
-2026/04/16 11:25:58 [WARN][去重] Milvus 返回 1 条结果但 DocumentID 全部解析失败，降级到 MySQL: user_id=1 memory_type=preference
-2026/04/16 11:25:58 [DEBUG][去重] 语义召回候选: job_id=19 user_id=1 memory_type=preference candidate_count=1
-2026/04/16 11:25:58 [DEBUG][去重] 候选详情: memory_id=17 score=0.0000 content="用户喜欢听音乐"
-2026/04/16 11:26:04 [DEBUG][去重] LLM 比对结果: candidate_id=17 score=0.0000 relation=duplicate reason="听歌和听音乐表达相同意思" candidate_content="用户喜欢听音乐"
-2026/04/16 11:26:04 [DEBUG][去重] 汇总决策: job_id=19 action=NONE target_id=0 reason="存在完全重复的旧记忆，跳过写入"
-2026/04/16 11:26:04 [去重] 决策流程完成: job_id=19 user_id=1 新增=0 更新=0 删除=0 跳过=1
+[GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
+ - using env:	export GIN_MODE=release
+ - using code:	gin.SetMode(gin.ReleaseMode)
+
+[GIN-debug] GET    /api/v1/health            --> github.com/LoveLosita/smartflow/backend/routers.RegisterRouters.func1 (3 handlers)
+[GIN-debug] POST   /api/v1/user/register     --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).UserRegister-fm (3 handlers)
+[GIN-debug] POST   /api/v1/user/login        --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).UserLogin-fm (3 handlers)
+[GIN-debug] POST   /api/v1/user/refresh-token --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).RefreshTokenHandler-fm (3 handlers)
+[GIN-debug] POST   /api/v1/user/logout       --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).UserLogout-fm (5 handlers)
+[GIN-debug] POST   /api/v1/task/create       --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).AddTask-fm (6 handlers)
+[GIN-debug] PUT    /api/v1/task/complete     --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).CompleteTask-fm (6 handlers)
+[GIN-debug] PUT    /api/v1/task/undo-complete --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).UndoCompleteTask-fm (6 handlers)
+[GIN-debug] GET    /api/v1/task/get          --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).GetUserTasks-fm (5 handlers)
+[GIN-debug] POST   /api/v1/course/validate   --> github.com/LoveLosita/smartflow/backend/api.(*CourseHandler).CheckUserCourse-fm (5 handlers)
+[GIN-debug] POST   /api/v1/course/import     --> github.com/LoveLosita/smartflow/backend/api.(*CourseHandler).AddUserCourses-fm (6 handlers)
+[GIN-debug] POST   /api/v1/task-class/add    --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserAddTaskClass-fm (6 handlers)
+[GIN-debug] GET    /api/v1/task-class/list   --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserGetTaskClassInfos-fm (5 handlers)
+[GIN-debug] GET    /api/v1/task-class/get    --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserGetCompleteTaskClass-fm (5 handlers)
+[GIN-debug] PUT    /api/v1/task-class/update --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserUpdateTaskClass-fm (6 handlers)
+[GIN-debug] POST   /api/v1/task-class/insert-into-schedule --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserAddTaskClassItemIntoSchedule-fm (6 handlers)
+[GIN-debug] DELETE /api/v1/task-class/delete-item --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).DeleteTaskClassItem-fm (6 handlers)
+[GIN-debug] DELETE /api/v1/task-class/delete-class --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).DeleteTaskClass-fm (6 handlers)
+[GIN-debug] PUT    /api/v1/task-class/apply-batch-into-schedule --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserInsertBatchTaskClassItemsIntoSchedule-fm (6 handlers)
+[GIN-debug] GET    /api/v1/schedule/today    --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserTodaySchedule-fm (5 handlers)
+[GIN-debug] GET    /api/v1/schedule/week     --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserWeeklySchedule-fm (5 handlers)
+[GIN-debug] DELETE /api/v1/schedule/delete   --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).DeleteScheduleEvent-fm (6 handlers)
+[GIN-debug] GET    /api/v1/schedule/recent-completed --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserRecentCompletedSchedules-fm (5 handlers)
+[GIN-debug] GET    /api/v1/schedule/current  --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserOngoingSchedule-fm (5 handlers)
+[GIN-debug] DELETE /api/v1/schedule/undo-task-item --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).UserRevocateTaskItemFromSchedule-fm (6 handlers)
+[GIN-debug] GET    /api/v1/schedule/smart-planning --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).SmartPlanning-fm (5 handlers)
+[GIN-debug] POST   /api/v1/schedule/smart-planning-multi --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).SmartPlanningMulti-fm (5 handlers)
+[GIN-debug] POST   /api/v1/agent/chat        --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).ChatAgent-fm (6 handlers)
+[GIN-debug] GET    /api/v1/agent/conversation-meta --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetConversationMeta-fm (5 handlers)
+[GIN-debug] GET    /api/v1/agent/conversation-list --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetConversationList-fm (5 handlers)
+[GIN-debug] GET    /api/v1/agent/conversation-history --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetConversationHistory-fm (5 handlers)
+[GIN-debug] GET    /api/v1/agent/schedule-preview --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetSchedulePlanPreview-fm (5 handlers)
+[GIN-debug] GET    /api/v1/agent/context-stats --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetContextStats-fm (5 handlers)
+[GIN-debug] [WARNING] You trusted all proxies, this is NOT safe. We recommend you to set a value.
+Please check https://github.com/gin-gonic/gin/blob/master/docs/doc.md#dont-trust-all-proxies for details.
+[GIN-debug] Listening and serving HTTP on :8080
+[GIN] 2026/04/16 - 16:00:38 | 200 |     47.9273ms |       127.0.0.1 | GET      "/api/v1/agent/conversation-list?page=1&page_size=12&limit=12&status=active"
+[GIN] 2026/04/16 - 16:00:38 | 200 |     12.4182ms |       127.0.0.1 | GET      "/api/v1/agent/conversation-meta?conversation_id=d1dda8e1-b7f0-4721-ad84-529ecad5d637"
+[GIN] 2026/04/16 - 16:00:38 | 200 |     88.1335ms |       127.0.0.1 | GET      "/api/v1/agent/conversation-history?conversation_id=d1dda8e1-b7f0-4721-ad84-529ecad5d637"
+
+2026/04/16 16:01:07 D:/SmartFlow-Agent/backend/dao/agent.go:306 record not found
+[42.474ms] [rows:0] SELECT * FROM `agent_chats` WHERE user_id = 1 AND chat_id = '284c4b76-d6cc-40a6-b3de-fa4c8288022b' ORDER BY `agent_chats`.`id` LIMIT 1
+2026/04/16 16:01:07 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 284c4b76-d6cc-40a6-b3de-fa4c8288022b
+2026/04/16 16:01:07 [DEBUG] loadOrCreateRuntimeState chatID=284c4b76-d6cc-40a6-b3de-fa4c8288022b ok=false err=<nil> hasRuntime=false hasPending=false hasCtx=false hasSchedule=false hasOriginal=false
+2026/04/16 16:01:08 rag level=info component=store operation=ensure_collection action=search collection=smartflow_rag_chunks corpus=memory latency_ms=4 metric_type=COSINE status=already_exists store=milvus vector_dim=1024
+2026/04/16 16:01:08 rag level=info component=store operation=search action=search collection=smartflow_rag_chunks corpus=memory filter_count=3 latency_ms=51 result_count=0 status=success store=milvus top_k=18 vector_dim=1024
+2026/04/16 16:01:08 rag level=info component=runtime operation=retrieve action=search corpus=memory fallback_used=false hit_count=0 latency_ms=255 query_len=51 raw_count=0 status=success threshold=0.55 top_k=18
+2026/04/16 16:01:26 [DEBUG] chat routing chat=284c4b76-d6cc-40a6-b3de-fa4c8288022b route=direct_reply needs_rough_build=false needs_refine_after_rough_build=false allow_reorder=false thinking=false has_rough_build_done=false task_class_count=0 raw=<SMARTFLOW_ROUTE nonce="9b04f5df-3452-4a15-a39f-0449c1851729" route="direct_reply"/>
+[GIN] 2026/04/16 - 16:01:27 | 200 |   19.3318195s |       127.0.0.1 | POST     "/api/v1/agent/chat"
+[GIN] 2026/04/16 - 16:01:27 | 200 |     84.0901ms |       127.0.0.1 | GET      "/api/v1/agent/conversation-list?page=1&page_size=12&limit=12&status=active"
+2026/04/16 16:01:27 outbox due messages=3, start dispatch
+[GIN] 2026/04/16 - 16:01:27 | 200 |        2.24ms |       127.0.0.1 | GET      "/api/v1/agent/conversation-meta?conversation_id=284c4b76-d6cc-40a6-b3de-fa4c8288022b"
+[GIN] 2026/04/16 - 16:01:27 | 200 |     46.4062ms |       127.0.0.1 | GET      "/api/v1/agent/conversation-history?conversation_id=284c4b76-d6cc-40a6-b3de-fa4c8288022b"
+2026/04/16 16:01:28 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 284c4b76-d6cc-40a6-b3de-fa4c8288022b
+2026/04/16 16:01:29 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 284c4b76-d6cc-40a6-b3de-fa4c8288022b
+2026/04/16 16:01:30 outbox due messages=1, start dispatch
+2026/04/16 16:01:31 异步生成会话标题失败(模型生成失败) chat=284c4b76-d6cc-40a6-b3de-fa4c8288022b err=failed to create chat completion: context deadline exceeded
--- a/backend/memory/model/config.go
+++ b/backend/memory/model/config.go
@@ -1,6 +1,30 @@
 package model

-import "time"
+import (
+	"strings"
+	"time"
+)
+
+const (
+	// MemoryReadModeLegacy 表示读取侧沿用“RAG 优先，失败再走 legacy”旧链路。
+	MemoryReadModeLegacy = "legacy"
+	// MemoryReadModeHybrid 表示读取侧走“结构化强约束 + 语义候选”混合链路。
+	MemoryReadModeHybrid = "hybrid"
+
+	// MemoryInjectRenderModeFlat 表示沿用扁平列表渲染。
+	MemoryInjectRenderModeFlat = "flat"
+	// MemoryInjectRenderModeTypedV2 表示按记忆类型分段渲染。
+	MemoryInjectRenderModeTypedV2 = "typed_v2"
+
+	// DefaultReadConstraintLimit 是 constraint 默认预算上限。
+	DefaultReadConstraintLimit = 5
+	// DefaultReadPreferenceLimit 是 preference 默认预算上限。
+	DefaultReadPreferenceLimit = 5
+	// DefaultReadFactLimit 是 fact 默认预算上限。
+	DefaultReadFactLimit = 5
+	// DefaultReadTodoHintLimit 是 todo_hint 默认预算上限。
+	DefaultReadTodoHintLimit = 3
+)

 // Config 是记忆模块配置对象（Day1 首版）。
 //
@@ -11,6 +35,13 @@ type Config struct {
 	Enabled    bool
 	RAGEnabled bool

+	ReadMode            string
+	ReadConstraintLimit int
+	ReadPreferenceLimit int
+	ReadFactLimit       int
+	ReadTodoHintLimit   int
+	InjectRenderMode    string
+
 	ExtractPrompt  string
 	DecisionPrompt string

@@ -35,3 +66,68 @@ type Config struct {
 	DecisionFallbackMode      string  // "legacy_add"（退回旧路径直接新增）/ "drop"（丢弃）
 	WriteMode                 string  // "legacy"（旧路径）/ "decision"（决策流程），仅 DecisionEnabled=true 时生效
 }
+
+// NormalizeReadMode 统一读取模式字符串。
+func NormalizeReadMode(mode string) string {
+	switch strings.ToLower(strings.TrimSpace(mode)) {
+	case MemoryReadModeHybrid:
+		return MemoryReadModeHybrid
+	default:
+		return MemoryReadModeLegacy
+	}
+}
+
+// NormalizeInjectRenderMode 统一注入渲染模式字符串。
+func NormalizeInjectRenderMode(mode string) string {
+	switch strings.ToLower(strings.TrimSpace(mode)) {
+	case MemoryInjectRenderModeTypedV2:
+		return MemoryInjectRenderModeTypedV2
+	default:
+		return MemoryInjectRenderModeFlat
+	}
+}
+
+// EffectiveReadConstraintLimit 返回 constraint 生效预算。
+func (c Config) EffectiveReadConstraintLimit() int {
+	return normalizePositiveLimit(c.ReadConstraintLimit, DefaultReadConstraintLimit)
+}
+
+// EffectiveReadPreferenceLimit 返回 preference 生效预算。
+func (c Config) EffectiveReadPreferenceLimit() int {
+	return normalizePositiveLimit(c.ReadPreferenceLimit, DefaultReadPreferenceLimit)
+}
+
+// EffectiveReadFactLimit 返回 fact 生效预算。
+func (c Config) EffectiveReadFactLimit() int {
+	return normalizePositiveLimit(c.ReadFactLimit, DefaultReadFactLimit)
+}
+
+// EffectiveReadTodoHintLimit 返回 todo_hint 生效预算。
+func (c Config) EffectiveReadTodoHintLimit() int {
+	return normalizePositiveLimit(c.ReadTodoHintLimit, DefaultReadTodoHintLimit)
+}
+
+// EffectiveReadMode 返回生效读取模式。
+func (c Config) EffectiveReadMode() string {
+	return NormalizeReadMode(c.ReadMode)
+}
+
+// EffectiveInjectRenderMode 返回生效渲染模式。
+func (c Config) EffectiveInjectRenderMode() string {
+	return NormalizeInjectRenderMode(c.InjectRenderMode)
+}
+
+// TotalReadBudget 返回四类记忆的总预算上限。
+func (c Config) TotalReadBudget() int {
+	return c.EffectiveReadConstraintLimit() +
+		c.EffectiveReadPreferenceLimit() +
+		c.EffectiveReadFactLimit() +
+		c.EffectiveReadTodoHintLimit()
+}
+
+func normalizePositiveLimit(value int, defaultValue int) int {
+	if value <= 0 {
+		return defaultValue
+	}
+	return value
+}
--- a/backend/memory/model/item.go
+++ b/backend/memory/model/item.go
@@ -16,6 +16,7 @@ type ItemDTO struct {
 	MemoryType       string
 	Title            string
 	Content          string
+	ContentHash      string
 	Confidence       float64
 	Importance       float64
 	SensitivityLevel int
--- a/backend/memory/repo/item_repo.go
+++ b/backend/memory/repo/item_repo.go
@@ -91,6 +91,72 @@ func (r *ItemRepo) FindByQuery(ctx context.Context, query memorymodel.ItemQuery)
 	return items, err
 }

+// FindPinnedByUser 读取“应优先注入”的结构化记忆。
+//
+// 步骤化说明：
+// 1. 先在同一组 user/conversation/assistant/run 作用域下查 constraint，保证硬约束不会因语义召回波动丢失；
+// 2. 再查高置信 preference，并按 importance 降序裁到预算，避免偏好噪声过多；
+// 3. 两路结果按“constraint 在前、preference 在后”拼接，后续由 service 层统一去重、排序和预算裁剪；
+// 4. 这里不直接做最终预算，是因为读取侧还要和语义候选合并后统一重排。
+func (r *ItemRepo) FindPinnedByUser(
+	ctx context.Context,
+	query memorymodel.ItemQuery,
+	preferenceLimit int,
+) ([]model.MemoryItem, error) {
+	if r == nil || r.db == nil {
+		return nil, errors.New("memory item repo is nil")
+	}
+	if query.UserID <= 0 {
+		return nil, errors.New("memory item query user_id is invalid")
+	}
+
+	includeConstraint := allowPinnedMemoryType(query.MemoryTypes, memorymodel.MemoryTypeConstraint)
+	includePreference := allowPinnedMemoryType(query.MemoryTypes, memorymodel.MemoryTypePreference)
+	if !includeConstraint && !includePreference {
+		return nil, nil
+	}
+
+	base := r.db.WithContext(ctx).Model(&model.MemoryItem{}).Where("user_id = ?", query.UserID)
+	base = applyScopedEquality(base, "conversation_id", query.ConversationID, query.IncludeGlobal)
+	base = applyScopedEquality(base, "assistant_id", query.AssistantID, query.IncludeGlobal)
+	base = applyScopedEquality(base, "run_id", query.RunID, query.IncludeGlobal)
+	base = applyPinnedUnexpiredScope(base, query)
+
+	result := make([]model.MemoryItem, 0, preferenceLimit+4)
+	if includeConstraint {
+		var constraints []model.MemoryItem
+		err := base.Session(&gorm.Session{}).
+			Where("memory_type = ? AND status = ?", memorymodel.MemoryTypeConstraint, model.MemoryItemStatusActive).
+			Order("importance DESC").
+			Order("updated_at DESC").
+			Find(&constraints).Error
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, constraints...)
+	}
+
+	if includePreference {
+		if preferenceLimit <= 0 {
+			preferenceLimit = memorymodel.DefaultReadPreferenceLimit
+		}
+
+		var preferences []model.MemoryItem
+		err := base.Session(&gorm.Session{}).
+			Where("memory_type = ? AND confidence >= ? AND status = ?", memorymodel.MemoryTypePreference, 0.8, model.MemoryItemStatusActive).
+			Order("importance DESC").
+			Order("updated_at DESC").
+			Limit(preferenceLimit).
+			Find(&preferences).Error
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, preferences...)
+	}
+
+	return result, nil
+}
+
 // GetByIDForUser 读取某个用户的一条记忆条目。
 func (r *ItemRepo) GetByIDForUser(ctx context.Context, userID int, memoryID int64) (*model.MemoryItem, error) {
 	if r == nil || r.db == nil {
@@ -292,3 +358,27 @@ func applyScopedEquality(db *gorm.DB, column, value string, includeGlobal bool)
 	}
 	return db.Where(column+" = ?", value)
 }
+
+func applyPinnedUnexpiredScope(db *gorm.DB, query memorymodel.ItemQuery) *gorm.DB {
+	if db == nil || !query.OnlyUnexpired {
+		return db
+	}
+	now := query.Now
+	if now.IsZero() {
+		now = time.Now()
+	}
+	return db.Where("(ttl_at IS NULL OR ttl_at > ?)", now)
+}
+
+func allowPinnedMemoryType(memoryTypes []string, target string) bool {
+	if len(memoryTypes) == 0 {
+		return true
+	}
+	target = memorymodel.NormalizeMemoryType(target)
+	for _, item := range memoryTypes {
+		if memorymodel.NormalizeMemoryType(item) == target {
+			return true
+		}
+	}
+	return false
+}
--- a/backend/memory/service/common.go
+++ b/backend/memory/service/common.go
@@ -4,6 +4,7 @@ import (
 	"strings"

 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	"github.com/LoveLosita/smartflow/backend/model"
 )

@@ -17,6 +18,7 @@ func toItemDTO(item model.MemoryItem) memorymodel.ItemDTO {
 		MemoryType:       item.MemoryType,
 		Title:            item.Title,
 		Content:          item.Content,
+		ContentHash:      fallbackContentHash(item.MemoryType, item.Content, strValue(item.ContentHash)),
 		Confidence:       item.Confidence,
 		Importance:       item.Importance,
 		SensitivityLevel: item.SensitivityLevel,
@@ -117,3 +119,31 @@ func strValue(v *string) string {
 	}
 	return strings.TrimSpace(*v)
 }
+
+// fallbackContentHash 返回条目可用于服务级去重的内容哈希。
+//
+// 说明：
+// 1. 优先复用库内已落表的 content_hash，避免同一条数据多套算法口径不一致；
+// 2. 若历史数据或 RAG metadata 没带 hash，则按“类型 + 规范化内容”补算；
+// 3. 若类型非法或正文为空，则返回空字符串，让上游继续走文本兜底去重。
+func fallbackContentHash(memoryType, content, currentHash string) string {
+	currentHash = strings.TrimSpace(currentHash)
+	if currentHash != "" {
+		return currentHash
+	}
+
+	normalizedType := memorymodel.NormalizeMemoryType(memoryType)
+	normalizedContent := normalizeContentForHash(content)
+	if normalizedType == "" || normalizedContent == "" {
+		return ""
+	}
+	return memoryutils.HashContent(normalizedType, normalizedContent)
+}
+
+func normalizeContentForHash(content string) string {
+	content = strings.TrimSpace(content)
+	if content == "" {
+		return ""
+	}
+	return strings.ToLower(strings.Join(strings.Fields(content), " "))
+}
--- a/backend/memory/service/config_loader.go
+++ b/backend/memory/service/config_loader.go
@@ -15,17 +15,23 @@ import (
 // 3. 轮询与重试参数给出保守默认值，避免对主链路造成压力。
 func LoadConfigFromViper() memorymodel.Config {
 	cfg := memorymodel.Config{
-		Enabled:          viper.GetBool("memory.enabled"),
-		RAGEnabled:       viper.GetBool("memory.rag.enabled"),
-		ExtractPrompt:    viper.GetString("memory.prompt.extract"),
-		DecisionPrompt:   viper.GetString("memory.prompt.decision"),
-		Threshold:        viper.GetFloat64("memory.threshold"),
-		EnableReranker:   viper.GetBool("memory.enableReranker"),
-		LLMTemperature:   viper.GetFloat64("memory.llm.temperature"),
-		LLMTopP:          viper.GetFloat64("memory.llm.topP"),
-		JobMaxRetry:      viper.GetInt("memory.job.maxRetry"),
-		WorkerPollEvery:  viper.GetDuration("memory.worker.pollEvery"),
-		WorkerClaimBatch: viper.GetInt("memory.worker.claimBatch"),
+		Enabled:             viper.GetBool("memory.enabled"),
+		RAGEnabled:          viper.GetBool("memory.rag.enabled"),
+		ReadMode:            memorymodel.NormalizeReadMode(viper.GetString("memory.read.mode")),
+		InjectRenderMode:    memorymodel.NormalizeInjectRenderMode(viper.GetString("memory.inject.renderMode")),
+		ExtractPrompt:       viper.GetString("memory.prompt.extract"),
+		DecisionPrompt:      viper.GetString("memory.prompt.decision"),
+		Threshold:           viper.GetFloat64("memory.threshold"),
+		EnableReranker:      viper.GetBool("memory.enableReranker"),
+		LLMTemperature:      viper.GetFloat64("memory.llm.temperature"),
+		LLMTopP:             viper.GetFloat64("memory.llm.topP"),
+		JobMaxRetry:         viper.GetInt("memory.job.maxRetry"),
+		WorkerPollEvery:     viper.GetDuration("memory.worker.pollEvery"),
+		WorkerClaimBatch:    viper.GetInt("memory.worker.claimBatch"),
+		ReadConstraintLimit: viper.GetInt("memory.read.constraintLimit"),
+		ReadPreferenceLimit: viper.GetInt("memory.read.preferenceLimit"),
+		ReadFactLimit:       viper.GetInt("memory.read.factLimit"),
+		ReadTodoHintLimit:   viper.GetInt("memory.read.todoHintLimit"),

 		// 决策层配置：默认关闭，灰度开启后才会生效。
 		DecisionEnabled:           viper.GetBool("memory.decision.enabled"),
@@ -53,6 +59,12 @@ func LoadConfigFromViper() memorymodel.Config {
 	if cfg.WorkerClaimBatch <= 0 {
 		cfg.WorkerClaimBatch = 1
 	}
+	cfg.ReadConstraintLimit = cfg.EffectiveReadConstraintLimit()
+	cfg.ReadPreferenceLimit = cfg.EffectiveReadPreferenceLimit()
+	cfg.ReadFactLimit = cfg.EffectiveReadFactLimit()
+	cfg.ReadTodoHintLimit = cfg.EffectiveReadTodoHintLimit()
+	cfg.ReadMode = cfg.EffectiveReadMode()
+	cfg.InjectRenderMode = cfg.EffectiveInjectRenderMode()

 	// 决策层配置默认值兜底。
 	// 说明：
--- a/backend/memory/service/read_scope.go
+++ b/backend/memory/service/read_scope.go
@@ -0,0 +1,83 @@
+package service
+
+import (
+	"time"
+
+	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
+	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+)
+
+// buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。
+//
+// 职责边界：
+// 1. 只负责把 RetrieveRequest 映射成“读侧作用域”查询参数；
+// 2. 不负责真正查库，也不负责排序、裁剪或注入；
+// 3. conversation_id 字段在这里刻意不参与过滤，仅保留在记忆记录元数据里供审计与溯源使用。
+//
+// 步骤化说明：
+// 1. 读侧始终按 user_id 作为硬隔离边界，避免跨用户串记忆。
+// 2. assistant_id / run_id 仍允许参与过滤，因为它们表达的是助手实例与执行轮次边界，而不是“是否跨对话召回”的问题。
+// 3. conversation_id 明确置空，原因是聊天上下文窗口已经覆盖同对话信息；记忆读侧的价值主要在跨对话补充。
+func buildReadScopedItemQuery(
+	req memorymodel.RetrieveRequest,
+	now time.Time,
+	statuses []string,
+	limit int,
+) memorymodel.ItemQuery {
+	return memorymodel.ItemQuery{
+		UserID:         req.UserID,
+		ConversationID: "",
+		AssistantID:    req.AssistantID,
+		RunID:          req.RunID,
+		Statuses:       statuses,
+		MemoryTypes:    normalizeRetrieveMemoryTypes(req.MemoryTypes),
+		IncludeGlobal:  true,
+		OnlyUnexpired:  true,
+		Limit:          limit,
+		Now:            now,
+	}
+}
+
+// buildReadScopedRAGRequest 构造读侧统一使用的 RAG 检索请求。
+//
+// 职责边界：
+// 1. 只负责生成 memory 检索请求，不负责执行向量检索；
+// 2. 不负责阈值外的重排、fallback 或去重；
+// 3. conversation_id 字段同样只保留在文档 metadata 中，不再作为聊天读侧的硬过滤条件。
+//
+// 步骤化说明：
+// 1. user_id 仍是唯一必须保留的硬过滤条件，确保召回范围限定在当前用户。
+// 2. conversation_id 明确置空，避免旧对话记忆在进入相似度计算前就被 metadata filter 提前挡掉。
+// 3. assistant_id / run_id 保持透传，方便后续若存在多助手场景时继续做更细粒度隔离。
+func buildReadScopedRAGRequest(
+	req memorymodel.RetrieveRequest,
+	topK int,
+	threshold float64,
+) infrarag.MemoryRetrieveRequest {
+	return infrarag.MemoryRetrieveRequest{
+		Query:          req.Query,
+		TopK:           topK,
+		Threshold:      threshold,
+		Action:         "search",
+		UserID:         req.UserID,
+		ConversationID: "",
+		AssistantID:    req.AssistantID,
+		RunID:          req.RunID,
+		MemoryTypes:    normalizeRetrieveMemoryTypes(req.MemoryTypes),
+	}
+}
+
+// shouldReturnSemanticRAGResult 判断当前是否可以直接采用 RAG 结果。
+//
+// 职责边界：
+// 1. 只负责表达“RAG 是否足以短路后续 MySQL fallback”这一条业务规则；
+// 2. 不负责执行任何检索，也不负责日志记录；
+// 3. 返回 false 不代表错误，只代表调用方应继续尝试数据库兜底。
+//
+// 步骤化说明：
+// 1. RAG 报错时，一定不能短路，必须继续走 MySQL fallback。
+// 2. RAG 0 命中时，同样不能短路；否则会把“成功执行但没有候选”误当成最终结果。
+// 3. 只有“无报错且结果非空”时，才允许直接返回 RAG 结果。
+func shouldReturnSemanticRAGResult(items []memorymodel.ItemDTO, err error) bool {
+	return err == nil && len(items) > 0
+}
--- a/backend/memory/service/read_service.go
+++ b/backend/memory/service/read_service.go
@@ -71,6 +71,9 @@ func (s *ReadService) Retrieve(ctx context.Context, req memorymodel.RetrieveRequ
 	}

 	limit := normalizeLimit(req.Limit, defaultRetrieveLimit, maxRetrieveLimit)
+	if s.cfg.EffectiveReadMode() == memorymodel.MemoryReadModeHybrid {
+		return s.HybridRetrieve(ctx, req, effectiveSetting, limit, now)
+	}
 	if s.cfg.RAGEnabled && s.ragRuntime != nil && strings.TrimSpace(req.Query) != "" {
 		items, ragErr := s.retrieveByRAG(ctx, req, effectiveSetting, limit, now)
 		if ragErr == nil && len(items) > 0 {
@@ -91,18 +94,12 @@ func (s *ReadService) retrieveByLegacy(
 	if !effectiveSetting.MemoryEnabled {
 		return nil, nil
 	}
-	query := memorymodel.ItemQuery{
-		UserID:         req.UserID,
-		ConversationID: req.ConversationID,
-		AssistantID:    req.AssistantID,
-		RunID:          req.RunID,
-		Statuses:       []string{model.MemoryItemStatusActive},
-		MemoryTypes:    normalizeRetrieveMemoryTypes(req.MemoryTypes),
-		IncludeGlobal:  true,
-		OnlyUnexpired:  true,
-		Limit:          normalizeLimit(limit*3, limit*3, maxRetrieveLimit*3),
-		Now:            now,
-	}
+	query := buildReadScopedItemQuery(
+		req,
+		now,
+		[]string{model.MemoryItemStatusActive},
+		normalizeLimit(limit*3, limit*3, maxRetrieveLimit*3),
+	)

 	items, err := s.itemRepo.FindByQuery(ctx, query)
 	if err != nil {
@@ -114,8 +111,8 @@ func (s *ReadService) retrieveByLegacy(
 	}

 	sort.SliceStable(items, func(i, j int) bool {
-		left := scoreRetrievedItem(items[i], now, req.ConversationID)
-		right := scoreRetrievedItem(items[j], now, req.ConversationID)
+		left := scoreRetrievedItem(items[i], now)
+		right := scoreRetrievedItem(items[j], now)
 		if left == right {
 			return items[i].ID > items[j].ID
 		}
@@ -140,17 +137,7 @@ func (s *ReadService) retrieveByRAG(
 		return nil, nil
 	}

-	result, err := s.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{
-		Query:          req.Query,
-		TopK:           limit,
-		Threshold:      s.cfg.Threshold,
-		Action:         "search",
-		UserID:         req.UserID,
-		ConversationID: req.ConversationID,
-		AssistantID:    req.AssistantID,
-		RunID:          req.RunID,
-		MemoryTypes:    normalizeRetrieveMemoryTypes(req.MemoryTypes),
-	})
+	result, err := s.ragRuntime.RetrieveMemory(ctx, buildReadScopedRAGRequest(req, limit, s.cfg.Threshold))
 	if err != nil || result == nil || len(result.Items) == 0 {
 		return nil, err
 	}
@@ -193,14 +180,17 @@ func normalizeRetrieveMemoryTypes(raw []string) []string {
 	}
 }

-func scoreRetrievedItem(item model.MemoryItem, now time.Time, conversationID string) float64 {
+// scoreRetrievedItem 计算 legacy 读链路的确定性排序分数。
+//
+// 说明：
+// 1. 这里只保留 importance / confidence / recency / explicit / type 这些稳定特征；
+// 2. conversation_id 已不再参与读侧打分，因为同对话信息本就已经在上下文窗口内；
+// 3. 若后续需要引入语义分或 reranker，应在 DTO 层补齐对应字段后再统一并入。
+func scoreRetrievedItem(item model.MemoryItem, now time.Time) float64 {
 	score := 0.35*clamp01(item.Importance) + 0.3*clamp01(item.Confidence) + 0.2*recencyScore(item, now)
 	if item.IsExplicit {
 		score += 0.1
 	}
-	if strValue(item.ConversationID) != "" && strValue(item.ConversationID) == conversationID {
-		score += 0.08
-	}
 	switch item.MemoryType {
 	case memorymodel.MemoryTypeConstraint:
 		score += 0.12
@@ -262,15 +252,18 @@ func collectMemoryIDs(items []model.MemoryItem) []int64 {
 func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) {
 	memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
 	metadata := hit.Metadata
+	content := strings.TrimSpace(hit.Text)
+	memoryType := readString(metadata["memory_type"])
 	dto := memorymodel.ItemDTO{
 		ID:               memoryID,
 		UserID:           int(readFloatLike(metadata["user_id"])),
 		ConversationID:   readString(metadata["conversation_id"]),
 		AssistantID:      readString(metadata["assistant_id"]),
 		RunID:            readString(metadata["run_id"]),
-		MemoryType:       readString(metadata["memory_type"]),
+		MemoryType:       memoryType,
 		Title:            readString(metadata["title"]),
-		Content:          strings.TrimSpace(hit.Text),
+		Content:          content,
+		ContentHash:      fallbackContentHash(memoryType, content, readString(metadata["content_hash"])),
 		Confidence:       readFloatLike(metadata["confidence"]),
 		Importance:       readFloatLike(metadata["importance"]),
 		SensitivityLevel: int(readFloatLike(metadata["sensitivity_level"])),
--- a/backend/memory/service/retrieve_merge.go
+++ b/backend/memory/service/retrieve_merge.go
@@ -0,0 +1,333 @@
+package service
+
+import (
+	"context"
+	"strings"
+	"time"
+
+	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
+	"github.com/LoveLosita/smartflow/backend/model"
+)
+
+// HybridRetrieve 统一承接读取侧混合召回链路。
+//
+// 步骤化说明：
+// 1. 结构化路由先取 constraint / 高置信 preference，给模型一份稳定“硬约束底座”；
+// 2. 再补语义候选，优先走 RAG；RAG 报错或 0 命中时都回退 MySQL，保证链路韧性；
+// 3. 两路结果统一做三级去重、排序与类型预算裁剪，只对最终真正注入的条目刷新 last_access_at；
+// 4. 旧 legacy 链路完全保留，方便通过配置快速回滚。
+func (s *ReadService) HybridRetrieve(
+	ctx context.Context,
+	req memorymodel.RetrieveRequest,
+	effectiveSetting model.MemoryUserSetting,
+	limit int,
+	now time.Time,
+) ([]memorymodel.ItemDTO, error) {
+	if s == nil || s.itemRepo == nil {
+		return nil, nil
+	}
+	if !effectiveSetting.MemoryEnabled {
+		return nil, nil
+	}
+
+	pinnedItems, err := s.retrievePinnedCandidates(ctx, req, effectiveSetting, now)
+	if err != nil {
+		return nil, err
+	}
+	semanticItems, err := s.retrieveSemanticCandidates(ctx, req, effectiveSetting, limit, now)
+	if err != nil {
+		return nil, err
+	}
+
+	merged := make([]memorymodel.ItemDTO, 0, len(pinnedItems)+len(semanticItems))
+	merged = append(merged, pinnedItems...)
+	merged = append(merged, semanticItems...)
+	if len(merged) == 0 {
+		return nil, nil
+	}
+
+	merged = dedupByID(merged)
+	merged = dedupByHash(merged)
+	merged = dedupByText(merged)
+	merged = RankItems(merged, now)
+	merged = applyTypeBudget(merged, s.cfg)
+	if len(merged) == 0 {
+		return nil, nil
+	}
+
+	_ = s.itemRepo.TouchLastAccessAt(ctx, collectItemDTOIDs(merged), now)
+	return merged, nil
+}
+
+func (s *ReadService) retrievePinnedCandidates(
+	ctx context.Context,
+	req memorymodel.RetrieveRequest,
+	effectiveSetting model.MemoryUserSetting,
+	now time.Time,
+) ([]memorymodel.ItemDTO, error) {
+	query := buildReadScopedItemQuery(req, now, nil, 0)
+	items, err := s.itemRepo.FindPinnedByUser(ctx, query, s.cfg.EffectiveReadPreferenceLimit())
+	if err != nil {
+		return nil, err
+	}
+	items = memoryutils.FilterItemsBySetting(items, effectiveSetting)
+	return toItemDTOs(items), nil
+}
+
+func (s *ReadService) retrieveSemanticCandidates(
+	ctx context.Context,
+	req memorymodel.RetrieveRequest,
+	effectiveSetting model.MemoryUserSetting,
+	limit int,
+	now time.Time,
+) ([]memorymodel.ItemDTO, error) {
+	queryText := strings.TrimSpace(req.Query)
+	if queryText == "" {
+		return nil, nil
+	}
+
+	candidateLimit := hybridSemanticTopK(s.cfg, limit)
+	if s.cfg.RAGEnabled && s.ragRuntime != nil {
+		items, err := s.retrieveSemanticCandidatesByRAG(ctx, req, effectiveSetting, candidateLimit, now)
+		if shouldReturnSemanticRAGResult(items, err) {
+			return items, nil
+		}
+	}
+	return s.retrieveSemanticCandidatesByMySQL(ctx, req, effectiveSetting, candidateLimit, now)
+}
+
+func (s *ReadService) retrieveSemanticCandidatesByRAG(
+	ctx context.Context,
+	req memorymodel.RetrieveRequest,
+	effectiveSetting model.MemoryUserSetting,
+	candidateLimit int,
+	now time.Time,
+) ([]memorymodel.ItemDTO, error) {
+	result, err := s.ragRuntime.RetrieveMemory(ctx, buildReadScopedRAGRequest(req, candidateLimit, s.cfg.Threshold))
+	if err != nil {
+		return nil, err
+	}
+	if result == nil || len(result.Items) == 0 {
+		return nil, nil
+	}
+
+	items := make([]memorymodel.ItemDTO, 0, len(result.Items))
+	for _, hit := range result.Items {
+		dto, memoryID := buildMemoryDTOFromRetrieveHit(hit)
+		if !effectiveSetting.ImplicitMemoryEnabled && !dto.IsExplicit {
+			continue
+		}
+		if !effectiveSetting.SensitiveMemoryEnabled && dto.SensitivityLevel > 0 {
+			continue
+		}
+		if dto.ID <= 0 && memoryID > 0 {
+			dto.ID = memoryID
+		}
+		items = append(items, dto)
+	}
+	return items, nil
+}
+
+func (s *ReadService) retrieveSemanticCandidatesByMySQL(
+	ctx context.Context,
+	req memorymodel.RetrieveRequest,
+	effectiveSetting model.MemoryUserSetting,
+	candidateLimit int,
+	now time.Time,
+) ([]memorymodel.ItemDTO, error) {
+	query := buildReadScopedItemQuery(
+		req,
+		now,
+		[]string{model.MemoryItemStatusActive},
+		normalizeLimit(candidateLimit*3, candidateLimit*3, maxRetrieveLimit*3),
+	)
+
+	items, err := s.itemRepo.FindByQuery(ctx, query)
+	if err != nil {
+		return nil, err
+	}
+	items = memoryutils.FilterItemsBySetting(items, effectiveSetting)
+	return toItemDTOs(items), nil
+}
+
+// dedupByID 按 memory_id 去重，后出现的结果覆盖先出现的结果。
+func dedupByID(items []memorymodel.ItemDTO) []memorymodel.ItemDTO {
+	if len(items) == 0 {
+		return nil
+	}
+
+	seen := make(map[int64]struct{}, len(items))
+	result := make([]memorymodel.ItemDTO, 0, len(items))
+	for i := len(items) - 1; i >= 0; i-- {
+		item := items[i]
+		if item.ID <= 0 {
+			result = append(result, item)
+			continue
+		}
+		if _, exists := seen[item.ID]; exists {
+			continue
+		}
+		seen[item.ID] = struct{}{}
+		result = append(result, item)
+	}
+	reverseItemDTOs(result)
+	return result
+}
+
+// dedupByHash 按 content_hash 去重；缺失 hash 时跳过，保留 importance 更高的条目。
+func dedupByHash(items []memorymodel.ItemDTO) []memorymodel.ItemDTO {
+	return dedupByKey(items, func(item memorymodel.ItemDTO) string {
+		return fallbackContentHash(item.MemoryType, item.Content, item.ContentHash)
+	})
+}
+
+// dedupByText 按“类型标签 + 文本”兜底去重，用于覆盖历史数据未带 hash 的场景。
+func dedupByText(items []memorymodel.ItemDTO) []memorymodel.ItemDTO {
+	return dedupByKey(items, func(item memorymodel.ItemDTO) string {
+		text := strings.TrimSpace(item.Content)
+		if text == "" {
+			text = strings.TrimSpace(item.Title)
+		}
+		if text == "" {
+			return ""
+		}
+		return renderMemoryTypeLabelForDedup(item.MemoryType) + "::" + normalizeContentForHash(text)
+	})
+}
+
+func dedupByKey(items []memorymodel.ItemDTO, keyBuilder func(item memorymodel.ItemDTO) string) []memorymodel.ItemDTO {
+	if len(items) == 0 {
+		return nil
+	}
+
+	selectedIndex := make(map[string]int, len(items))
+	for index, item := range items {
+		key := strings.TrimSpace(keyBuilder(item))
+		if key == "" {
+			continue
+		}
+		if previous, exists := selectedIndex[key]; exists {
+			if preferCurrentItem(items[previous], item) {
+				selectedIndex[key] = index
+			}
+			continue
+		}
+		selectedIndex[key] = index
+	}
+
+	result := make([]memorymodel.ItemDTO, 0, len(items))
+	for index, item := range items {
+		key := strings.TrimSpace(keyBuilder(item))
+		if key == "" {
+			result = append(result, item)
+			continue
+		}
+		if selectedIndex[key] == index {
+			result = append(result, item)
+		}
+	}
+	return result
+}
+
+func preferCurrentItem(previous memorymodel.ItemDTO, current memorymodel.ItemDTO) bool {
+	if current.Importance != previous.Importance {
+		return current.Importance > previous.Importance
+	}
+	if current.Confidence != previous.Confidence {
+		return current.Confidence > previous.Confidence
+	}
+	return true
+}
+
+// applyTypeBudget 在排序结果上应用四类记忆预算。
+//
+// 说明：
+// 1. 每种类型先保底自己的预算上限，避免 fact 抢掉 constraint 的位置；
+// 2. 裁剪时保持当前排序顺序，不在这里重新打分；
+// 3. 最终总量由四类预算之和共同决定，默认 18 条。
+func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memorymodel.ItemDTO {
+	if len(items) == 0 {
+		return nil
+	}
+
+	budgetByType := map[string]int{
+		memorymodel.MemoryTypeConstraint: cfg.EffectiveReadConstraintLimit(),
+		memorymodel.MemoryTypePreference: cfg.EffectiveReadPreferenceLimit(),
+		memorymodel.MemoryTypeFact:       cfg.EffectiveReadFactLimit(),
+		memorymodel.MemoryTypeTodoHint:   cfg.EffectiveReadTodoHintLimit(),
+	}
+	usedByType := make(map[string]int, len(budgetByType))
+	result := make([]memorymodel.ItemDTO, 0, minInt(len(items), cfg.TotalReadBudget()))
+	for _, item := range items {
+		if len(result) >= cfg.TotalReadBudget() {
+			break
+		}
+
+		memoryType := resolveBudgetMemoryType(item.MemoryType)
+		if usedByType[memoryType] >= budgetByType[memoryType] {
+			continue
+		}
+		usedByType[memoryType]++
+		result = append(result, item)
+	}
+	return result
+}
+
+func hybridSemanticTopK(cfg memorymodel.Config, limit int) int {
+	if cfg.TotalReadBudget() > limit {
+		return cfg.TotalReadBudget()
+	}
+	return limit
+}
+
+func resolveBudgetMemoryType(memoryType string) string {
+	normalized := memorymodel.NormalizeMemoryType(memoryType)
+	if normalized == "" {
+		return memorymodel.MemoryTypeFact
+	}
+	return normalized
+}
+
+func renderMemoryTypeLabelForDedup(memoryType string) string {
+	switch memorymodel.NormalizeMemoryType(memoryType) {
+	case memorymodel.MemoryTypePreference:
+		return "偏好"
+	case memorymodel.MemoryTypeConstraint:
+		return "约束"
+	case memorymodel.MemoryTypeTodoHint:
+		return "待办线索"
+	case memorymodel.MemoryTypeFact:
+		return "事实"
+	default:
+		return "记忆"
+	}
+}
+
+func collectItemDTOIDs(items []memorymodel.ItemDTO) []int64 {
+	if len(items) == 0 {
+		return nil
+	}
+
+	ids := make([]int64, 0, len(items))
+	for _, item := range items {
+		if item.ID <= 0 {
+			continue
+		}
+		ids = append(ids, item.ID)
+	}
+	return ids
+}
+
+func reverseItemDTOs(items []memorymodel.ItemDTO) {
+	for left, right := 0, len(items)-1; left < right; left, right = left+1, right-1 {
+		items[left], items[right] = items[right], items[left]
+	}
+}
+
+func minInt(left, right int) int {
+	if left < right {
+		return left
+	}
+	return right
+}
--- a/backend/memory/service/retrieve_rank.go
+++ b/backend/memory/service/retrieve_rank.go
@@ -0,0 +1,78 @@
+package service
+
+import (
+	"sort"
+	"time"
+
+	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+)
+
+// RankItems 对读取结果做统一重排。
+//
+// 步骤化说明：
+// 1. 先基于 importance / confidence / recency 构造基础分，保持和旧链路相近的排序直觉；
+// 2. 再叠加“显式记忆 / 类型优先级”奖励，让 constraint 与 preference 更稳定地排在前面；
+// 3. 同分按 ID 降序，保证排序在日志与测试里具备稳定性。
+func RankItems(items []memorymodel.ItemDTO, now time.Time) []memorymodel.ItemDTO {
+	if len(items) == 0 {
+		return nil
+	}
+
+	ranked := make([]memorymodel.ItemDTO, len(items))
+	copy(ranked, items)
+	sort.SliceStable(ranked, func(i, j int) bool {
+		left := scoreRankedItem(ranked[i], now)
+		right := scoreRankedItem(ranked[j], now)
+		if left == right {
+			return ranked[i].ID > ranked[j].ID
+		}
+		return left > right
+	})
+	return ranked
+}
+
+// scoreRankedItem 计算 hybrid 读链路的统一重排分数。
+//
+// 说明：
+// 1. 这里仍然只依赖条目自身属性，不引入 conversation_id 加分；
+// 2. 原因是同对话内容本就已经存在于上下文窗口，记忆读侧应专注跨对话补充；
+// 3. 类型加权仍然保留，用于确保 constraint / preference 的业务优先级稳定生效。
+func scoreRankedItem(item memorymodel.ItemDTO, now time.Time) float64 {
+	score := 0.35*clamp01(item.Importance) + 0.3*clamp01(item.Confidence) + 0.2*recencyScoreDTO(item, now)
+	if item.IsExplicit {
+		score += 0.1
+	}
+	switch memorymodel.NormalizeMemoryType(item.MemoryType) {
+	case memorymodel.MemoryTypeConstraint:
+		score += 0.15
+	case memorymodel.MemoryTypePreference:
+		score += 0.10
+	case memorymodel.MemoryTypeTodoHint:
+		score += 0.05
+	}
+	return score
+}
+
+func recencyScoreDTO(item memorymodel.ItemDTO, now time.Time) float64 {
+	base := item.UpdatedAt
+	if base == nil {
+		base = item.CreatedAt
+	}
+	if base == nil || now.Before(*base) {
+		return 0.5
+	}
+
+	age := now.Sub(*base)
+	switch {
+	case age <= 24*time.Hour:
+		return 1
+	case age <= 7*24*time.Hour:
+		return 0.85
+	case age <= 30*24*time.Hour:
+		return 0.65
+	case age <= 90*24*time.Hour:
+		return 0.45
+	default:
+		return 0.25
+	}
+}
--- a/backend/memory/第三步治理与观测落地计划.md
+++ b/backend/memory/第三步治理与观测落地计划.md
@@ -0,0 +1,521 @@
+# Memory 第三步治理与观测落地计划
+
+## 1. 这份文档解决什么问题
+
+这份文档只回答第三步要做什么，不再重复前两步已经完成的抽取、决策、召回细节。
+
+第三步的目标很简单：
+
+1. 把 memory 从“能跑”升级成“敢灰度、敢排障、敢清理、敢回滚”。
+2. 把“日志打在哪里、我怎么看、会不会给接口”说清楚。
+3. 把改动范围收敛在治理层，不再继续扩算法和能力边界。
+
+一句人话总结：
+
+前两步解决的是“有没有能力”，第三步解决的是“出了问题怎么查、怎么收、怎么退”。
+
+---
+
+## 2. 先说结论
+
+第三步我会分成两块做：
+
+1. 观测与切流
+2. 用户管理与清理
+
+为什么这么拆：
+
+1. 现在第二步最小闭环已经通了，最怕的不是“能力不够多”，而是“出了问题不知道卡在哪一层”。
+2. 如果没有统一日志、指标和开关，后面再继续加功能，只会让 memory 变成一个越来越难维护的黑箱。
+3. 历史重复脏数据不先治理，后面读链路和注入链路的数据噪音会越来越重。
+
+第三步不追求“更聪明”，追求“更稳、更可控”。
+
+---
+
+## 3. 你最关心的三个问题
+
+## 3.1 日志会打在哪里
+
+第三步不会把所有信息都塞进一个地方，而是分三层：
+
+### A. 运行日志
+
+运行日志打到后端服务本身的标准日志，也就是当前 `backend` 进程控制台 / 容器 stdout。
+
+这层主要看实时链路，适合排查：
+
+1. 这次写入为什么是 `ADD / UPDATE / DELETE / NONE`
+2. 这次召回为什么没命中
+3. 这次注入为什么降级到 `flat` 或 `legacy`
+4. 这次 worker 为什么走了 fallback
+
+这层的形态参考当前 RAG 轻量 Observer 的做法，不单独造一套散装日志方案。
+
+参考文件：
+
+1. `backend/cmd/start.go`
+2. `backend/infra/rag/core/observer.go`
+
+### B. 变更留痕
+
+变更留痕继续落库，不只打终端。
+
+当前已经有：
+
+1. `memory_audit_logs` 表
+2. `backend/model/memory.go`
+3. `backend/memory/repo/audit_repo.go`
+
+这层主要看“已经发生过的变更事实”，适合研发排查和后端自查：
+
+1. 哪条记忆被删了
+2. 删之前和删之后内容是什么
+3. 这次 dedup 清理保留了哪条，归档了哪条
+4. 某次 update / delete / restore 是谁触发的，原因是什么
+
+### C. 汇总指标
+
+第一版不先上完整 Prometheus / Grafana 平台，而是先把关键指标打稳，再视需要接统一观测平台。
+
+这层主要看趋势和健康度，适合回答：
+
+1. 最近写入成功率怎么样
+2. hybrid 召回到底有没有提升
+3. 去重到底丢了多少垃圾数据
+4. 是否频繁回滚到 legacy
+
+---
+
+## 3.2 我会怎么看
+
+开发和联调阶段，推荐分两种看法：
+
+### 看实时问题
+
+直接看后端运行日志。
+
+适合看：
+
+1. 单次请求链路
+2. 单次 worker 执行过程
+3. fallback / 降级 / 回滚是否发生
+
+### 看历史问题
+
+直接查数据库留痕表和主表。
+
+适合看：
+
+1. 某条 memory 历史上被怎么改过
+2. 某次清理动作具体处理了哪些记录
+3. 当前 active / archived / deleted 分布
+
+建议排查时优先查这几张表：
+
+1. `memory_jobs`
+2. `memory_items`
+3. `memory_audit_logs`
+
+第一版就够用了，不强依赖前端页面才能排查。
+
+---
+
+## 3.3 会不会提供接口
+
+会，但原则上只补“面向当前用户管理自己记忆”的接口，不补“原始运行日志接口”，也不把 `memory` 先做成全项目唯一完整的审计后台。
+
+原因很直接：
+
+1. 原始日志噪音很大，不适合直接给前端看。
+2. 原始日志字段会迭代，直接对外暴露会把内部实现绑死。
+3. 原始日志可能带内部 trace、错误细节，不适合直接外露。
+
+所以第三步对外提供的是“用户管理自己记忆”的接口，不是“把 stdout 原样吐给前端”，也不是“先给 memory 单独造一套管理后台接口”。
+
+第三步建议优先补这几类用户接口：
+
+### 第一组：当前用户查看自己的记忆
+
+1. `GET /api/v1/memory/items`
+   - 分页查看“我自己的记忆”
+2. `GET /api/v1/memory/items/:id`
+   - 查看“我自己的某条记忆”详情
+
+### 第二组：当前用户主动维护自己的记忆
+
+1. `POST /api/v1/memory/items`
+   - 手动新增一条记忆
+2. `PATCH /api/v1/memory/items/:id`
+   - 修改自己的一条记忆
+3. `DELETE /api/v1/memory/items/:id`
+   - 删除自己的一条记忆
+
+### 第三组：当前用户恢复误删内容
+
+1. `POST /api/v1/memory/items/:id/restore`
+   - 若底层采用软删或归档，可补恢复动作
+
+这些接口都默认只允许操作“当前登录用户自己的记忆”，不支持跨用户查询和跨用户修改。
+
+原则：
+
+1. 原始日志看后端 stdout
+2. 内部变更留痕优先给后端查表和排障使用，不急着做成前端正式能力
+3. 对外先开放用户真正会用到的“我的记忆”增删改查
+
+---
+
+## 4. 第三步到底要做什么
+
+## 4.1 观测与切流
+
+这是第三步的第一优先级。
+
+### 要做的事
+
+1. 给写入决策链路补统一结构化日志
+2. 给读侧召回链路补统一结构化日志
+3. 给注入渲染链路补统一结构化日志
+4. 给上述三条链路补关键计数指标
+5. 把现有配置字段整理成清晰的切流顺序和回滚手册
+
+### 为什么先做这个
+
+因为第三步如果先做 dedup 清理，但没有日志和切流能力，一旦清错了，排查成本会很高。
+
+---
+
+## 4.2 用户管理与清理
+
+这是第三步的第二优先级。
+
+### 要做的事
+
+1. 给“我的记忆”补完整增删改查语义
+2. 给历史重复数据补离线 dedup 工具
+3. 给关键变更动作补最小留痕
+4. 把 dedup 保持在后端内部治理流程，不急着做成前端接口
+
+### 为什么不一上来绑主 worker
+
+因为第一版 dedup 的目标是“可留痕、可回滚”，不是“全自动”，也不是先给 `memory` 单独造一个很重的治理后台。
+
+离线或手动触发更安全，出问题也更容易止血。
+
+---
+
+## 5. 具体改动计划
+
+## 5.1 第一轮：先把观测底座补起来
+
+### 目标
+
+先让系统“可看见”。
+
+### 预计改动
+
+新增：
+
+1. `backend/memory/observe/log_fields.go`
+
+修改：
+
+1. `backend/memory/worker/decision_flow.go`
+2. `backend/memory/worker/apply_actions.go`
+3. `backend/memory/service/read_service.go`
+4. `backend/memory/service/retrieve_merge.go`
+5. `backend/service/agentsvc/agent_memory.go`
+6. `backend/service/agentsvc/agent_memory_render.go`
+
+### 这一轮会补什么日志
+
+#### 写入决策日志
+
+至少记录这些字段：
+
+1. `trace_id`
+2. `user_id`
+3. `conversation_id`
+4. `job_id`
+5. `fact_type`
+6. `candidate_count`
+7. `final_action`
+8. `fallback_mode`
+9. `success`
+
+#### 读侧召回日志
+
+至少记录这些字段：
+
+1. `trace_id`
+2. `user_id`
+3. `read_mode`
+4. `query_len`
+5. `legacy_hit_count`
+6. `semantic_hit_count`
+7. `dedup_drop_count`
+8. `final_count`
+9. `degraded`
+
+#### 注入渲染日志
+
+至少记录这些字段：
+
+1. `trace_id`
+2. `user_id`
+3. `inject_mode`
+4. `input_count`
+5. `rendered_count`
+6. `token_budget`
+7. `fallback`
+
+---
+
+## 5.2 第二轮：补指标，不急着开 overview 接口
+
+### 目标
+
+先让系统“可量化”。
+
+### 第一版建议补的指标
+
+优先补这 8 个：
+
+1. `memory_job_success_rate`
+2. `memory_job_retry_rate`
+3. `memory_decision_distribution`
+4. `memory_decision_fallback_rate`
+5. `memory_retrieve_hit_count`
+6. `memory_retrieve_dedup_drop_count`
+7. `memory_inject_item_count`
+8. `memory_rag_fallback_rate`
+
+暂不强求第一版就补：
+
+1. `memory_wrong_mention_rate`
+2. `memory_user_correction_rate`
+
+因为这两个更依赖后续“用户纠错入口”。
+
+### 这一轮先不做什么
+
+这一轮先不单独新增 `GET /api/v1/memory/overview`。
+
+原因不是这个接口没价值，而是现在别的模块还没有统一的观测面板和汇总接口规范。`memory` 这一轮先把指标打稳，后续如果全项目一起做观测面板，再统一收口更对称。
+
+也就是说，这一轮优先把“数据先有”做出来，不急着把“看板接口先长出来”。
+
+---
+
+## 5.3 第三轮：补用户管理动作
+
+### 目标
+
+先让用户“能管理自己的记忆”。
+
+### 预计改动
+
+修改：
+
+1. `backend/memory/service/manage_service.go`
+2. `backend/memory/repo/item_repo.go`
+3. `backend/memory/utils/audit.go`
+4. `backend/memory/module.go`
+
+新增：
+
+1. `backend/api/memory.go`
+2. 路由注册文件中的 memory 接线
+
+### 要补的动作
+
+1. `list`
+2. `detail`
+3. `create`
+4. `update`
+5. `delete`
+6. 若底层保留软删语义，再补 `restore`
+
+### 接口建议
+
+新增：
+
+1. `GET /api/v1/memory/items`
+2. `GET /api/v1/memory/items/:id`
+3. `POST /api/v1/memory/items`
+4. `PATCH /api/v1/memory/items/:id`
+5. `DELETE /api/v1/memory/items/:id`
+6. `POST /api/v1/memory/items/:id/restore`
+   - 仅在底层采用软删或归档方案时开放
+
+### 设计要求
+
+1. 所有接口默认只作用于“当前登录用户自己的记忆”
+2. 后端仍保留最小变更留痕，但不把它包装成用户侧“审计接口”
+3. 接口返回给前端的是“人能看懂的记忆内容和操作结果”，不是底层日志
+
+---
+
+## 5.4 第四轮：做离线 dedup 治理
+
+### 目标
+
+先让系统“可清理”。
+
+### 预计新增
+
+1. `backend/memory/cleanup/dedup_runner.go`
+2. `backend/memory/cleanup/dedup_policy.go`
+
+### 第一版治理规则
+
+按以下维度扫描重复组：
+
+1. `user_id`
+2. `memory_type`
+3. `content_hash`
+4. `status = active`
+
+每组处理规则：
+
+1. 选一条主记录保留
+2. 优先保留最近更新的
+3. 若最近更新时间接近，则优先保留置信度更高的
+4. 其余记录改为 `archived`
+5. 每次治理动作都写最小变更留痕
+
+### 接口建议
+
+这一轮不对外新增 dedup 接口。
+
+dedup 先保留为后端内部治理能力，必要时通过离线任务、后台命令或内部 job 触发，避免 `memory` 先演化成一个比其他模块更重的专用治理后台。
+
+### 明确限制
+
+第一版不做：
+
+1. 直接危险 SQL 清表
+2. 自动定时常驻清理
+3. 无留痕的批量删除
+
+---
+
+## 6. 日志、留痕、接口分别给谁看
+
+这个地方一定要分清，不然第三步会越做越乱。
+
+### 运行日志
+
+给研发和排障看。
+
+特点：
+
+1. 实时
+2. 噪音大
+3. 字段多
+4. 不直接给前端
+
+### 变更留痕
+
+先给研发和后端排障使用。
+
+特点：
+
+1. 是持久化结果
+2. 适合看历史
+3. 这一轮不急着做成正式用户接口
+
+### 用户接口
+
+给用户和前端页面看。
+
+特点：
+
+1. 只暴露“我的记忆”内容和操作结果
+2. 不暴露内部 raw log
+3. 不承载平台级观测职责
+
+---
+
+## 7. 切流顺序
+
+第三步不允许一刀切。
+
+建议严格按下面顺序灰度：
+
+1. 阶段 A：决策层 shadow
+   - 真正写库仍然走 `legacy`
+   - 新决策层只记日志，不生效
+2. 阶段 B：决策层仅对显式记忆生效
+3. 阶段 C：决策层对全部写入生效
+4. 阶段 D：读侧切到 `hybrid`
+5. 阶段 E：注入切到 `typed_v2`
+6. 阶段 F：历史清理跑完，再考虑关闭 `legacy` 默认路径
+
+这里的配置基础已经存在，关键是把切流顺序写清、用清、能回退。
+
+参考文件：
+
+1. `backend/memory/model/config.go`
+2. `backend/memory/service/config_loader.go`
+
+---
+
+## 8. 回滚方案
+
+第三步的回滚不应影响前两步代码保留，只回切开关。
+
+### 最小回滚动作
+
+1. 写侧回到 `legacy`
+2. 读侧回到 `legacy`
+3. 注入回到 `flat`
+4. 停掉 dedup 清理任务
+
+### 回滚原则
+
+1. 先停治理动作，再回切主路径
+2. 不做破坏性 schema 回滚
+3. 不依赖人工热修逻辑判断
+
+---
+
+## 9. 第三步明确不做什么
+
+为了防止范围失控，这一轮明确不做：
+
+1. 不做图记忆
+2. 不做多 Provider 工厂化
+3. 不拆独立 memory 服务
+4. 不在这一轮给 `memory` 先单独做完整审计后台
+5. 不把 WebSearch 和 Memory 强行合并成一轮上线
+6. 不再扩新的召回算法分支
+
+---
+
+## 10. 完成标准
+
+满足以下条件，算第三步完成：
+
+1. 能从日志看清某条记忆为什么被判成 `ADD / UPDATE / DELETE / NONE`
+2. 能从指标看清召回命中、去重、降级、回滚情况
+3. 用户能通过接口管理自己的记忆
+4. 能对历史重复数据做可留痕清理
+5. 出异常时能通过开关在分钟级切回 `legacy`
+6. 文档和代码现状一致，不再靠口头传递
+
+---
+
+## 11. 如果只看一页，请看这个执行顺序
+
+第三步不要散着做，建议按这个顺序推进：
+
+1. 先补统一日志字段和结构化日志
+2. 再补指标，把观测数据打稳
+3. 再补“我的记忆”增删改查能力
+4. 最后做离线 dedup 和内部清理能力
+
+一句人话总结：
+
+先让系统“看得见”，再让系统“能管理”，最后再让系统“敢清理”。
--- a/backend/memory/记忆模块第二步计划.md
+++ b/backend/memory/记忆模块第二步计划.md
@@ -0,0 +1,363 @@
+# 第二步执行计划：读取与注入层升级
+
+## Context
+
+第一步（写入决策层）已完成，写侧已有"召回 → 比对 → ADD/UPDATE/DELETE/NONE"能力。
+但读侧仍是"查到就拼"，存在四个问题：
+
+1. RAG 和 legacy **互斥**，无法做到"MySQL 强约束 + RAG 语义补充"双路合并
+2. 去重仅 `seen[line]` 字符串级，无 `memory_id` / `content_hash` 级去重
+3. 所有类型平铺、limit=5 一刀切，constraint 可被 fact 挤掉
+4. `memory_context` 虽已写入 `PinnedBlocks`，但 Execute 阶段走自定义 `msg0~msg3` 骨架，当前并未消费这块内容
+
+---
+
+## 当前数据流（legacy）
+
+```
+用户发消息
+  │
+  ▼
+agent_newagent.go:114  injectMemoryContext()
+  │  调用 MemoryReader.Retrieve()
+  │  入参: userID, chatID, query=userMessage, limit=5
+  ▼
+ReadService.Retrieve()                          ← read_service.go:51
+  │  门控: 用户设置检查
+  │  分支: RAG成功→走RAG / 否则→走legacy
+  │  两路互斥，只走一条
+  ▼
+  ├── retrieveByRAG()                            ← read_service.go:132
+  │     ragRuntime.RetrieveMemory() → []RetrieveHit
+  │     转为 []ItemDTO, 用户设置过滤, 截断到 limit
+  │
+  └── retrieveByLegacy()                         ← read_service.go:84
+        itemRepo.FindByQuery(limit*3) → []MemoryItem
+        用户设置过滤 → scoreRetrievedItem排序 → 截断到 limit
+        toItemDTOs() 转换, TouchLastAccessAt
+  │
+  ▼ 返回 []ItemDTO（最多5条，无类型预算，无服务级去重）
+  │
+renderMemoryPinnedContent()                     ← agent_memory.go:105
+  │  遍历 items, 对每条生成 "[类型] 内容"
+  │  seen[line] 字符串级弱去重
+  ▼
+拼接为一段纯文本 → ConversationContext.UpsertPinnedBlock(key="memory_context")
+  │
+  ├── base.go:55  renderPinnedBlocks()
+  │     把所有 pinned blocks 拼成 system message
+  │     Chat / Plan / Deliver / 走通用 buildStageMessages 的节点可自动消费
+  │
+  └── execute_context.go:52  buildExecuteStageMessages()
+        Execute 走自定义 msg0~msg3 骨架
+        当前未渲染 memory_context，等价于 Execute 看不到这段记忆
+```
+
+---
+
+## 目标数据流（hybrid）
+
+```
+用户发消息
+  │
+  ▼
+agent_newagent.go:114  injectMemoryContext()     ← 不改触发点，改内部链路
+  │  调用 MemoryReader.Retrieve()
+  ▼
+ReadService.Retrieve()                            ← read_service.go
+  │  门控: 用户设置检查（不变）
+  │  分支: cfg.ReadMode == "hybrid" → 走新链路
+  │        否则 → 走旧链路（完全不变）
+  ▼ ══════════════════════════════════════════════
+ HybridRetrieve()                                 ← 新文件 retrieve_merge.go
+ ║                                                ← 整个混合链路收口在一个函数里
+ ║  ┌─────────────────────────────────────────┐
+ ║  │ 第一路：结构化强约束召回                    │
+ ║  │                                         │
+ ║  │ ItemRepo.FindPinnedByUser()             │ ← 新方法 item_repo.go
+ ║  │   → constraint: status=active, 全取     │
+ ║  │   → preference: confidence>=0.8,        │
+ ║  │     按 importance 降序取 limit 条        │
+ ║  │ 合并 → []MemoryItem → toItemDTOs()       │
+ ║  │ 结果 A                                   │
+ ║  └─────────────────────────────────────────┘
+ ║                    ↓
+ ║  ┌─────────────────────────────────────────┐
+ ║  │ 第二路：语义候选召回                       │
+ ║  │                                         │
+ ║  │ RAG 可用?                                │
+ ║  │   是 → ragRuntime.RetrieveMemory()       │ ← 复用现有 RAG 链路
+ ║  │        → []RetrieveHit                   │
+ ║  │        → buildMemoryDTOFromRetrieveHit() │ ← 复用 read_service.go 已有函数
+ ║  │        → 用户设置过滤                     │
+ ║  │   否 → itemRepo.FindByQuery()            │ ← 复用现有 FindByQuery
+ ║  │        → toItemDTOs()                    │
+ ║  │        → 用户设置过滤                     │
+ ║  │ 结果 B                                   │
+ ║  └─────────────────────────────────────────┘
+ ║                    ↓
+ ║          合并 A + B → []ItemDTO
+ ║                    ↓
+ ║  ┌─────────────────────────────────────────┐
+ ║  │ 三级去重                                  │
+ ║  │                                         │
+ ║  │ 1. dedupByID    — 按 memory_id 去重     │ ← 同 ID 只保留一条
+ ║  │                 后出现的覆盖先出现的      │
+ ║  │ 2. dedupByHash  — 按 content_hash 去重  │ ← 复用 HashContent 算法
+ ║  │                 hash 为空的跳过          │   (normalize_facts.go)
+ ║  │                 保留 importance 更高的    │
+ ║  │ 3. dedupByText  — 按渲染文本兜底去重     │ ← hash 缺失/空值兜底
+ ║  │                 用 localizeMemoryType +  │
+ ║  │                 Content 生成 key         │
+ ║  └─────────────────────────────────────────┘
+ ║                    ↓
+ ║  ┌─────────────────────────────────────────┐
+ ║  │ 排序                                      │
+ ║  │                                         │
+ ║  │ RankItems()                              │ ← 新文件 retrieve_rank.go
+ ║  │   类型优先级权重叠加原加权分:             │
+ ║  │   constraint +0.15                      │
+ ║  │   preference +0.10                      │
+ ║  │   todo_hint  +0.05                      │
+ ║  │   fact       +0                         │
+ ║  │   + 原 0.35*importance + 0.3*confidence │
+ ║  │   + 0.2*recency + 0.1*explicit          │
+ ║  │   + 0.08*同会话加分                     │
+ ║  │   同分按 ID 降序                         │
+ ║  └─────────────────────────────────────────┘
+ ║                    ↓
+ ║  ┌─────────────────────────────────────────┐
+ ║  │ 类型预算裁剪                              │
+ ║  │                                         │
+ ║  │ applyTypeBudget()                        │
+ ║  │   constraint:  最多 ConstraintLimit 条   │ ← 默认 5
+ ║  │   preference:  最多 PreferenceLimit 条   │ ← 默认 5
+ ║  │   todo_hint:   最多 TodoHintLimit 条     │ ← 默认 3
+ ║  │   fact:        最多 FactLimit 条         │ ← 默认 5
+ ║  │   类型内部保持 RankItems 排序结果        │
+ ║  │   总计最多 18 条（仍受 Execute 上下文预算约束）│
+ ║  └─────────────────────────────────────────┘
+ ║                    ↓
+ ║           返回 []ItemDTO（去重、排序、预算裁剪后的最终结果）
+ ══════════════════════════════════════════════
+  │
+  ▼ 返回到 injectMemoryContext()
+  │
+  │ cfg.InjectRenderMode == "typed_v2" ?
+  │
+  ├── typed_v2 → RenderTypedMemoryContent()    ← 新文件 agent_memory_render.go
+  │     按类型分组渲染:
+  │     ┌──────────────────────────────────┐
+  │     │ 以下是与当前对话相关的用户记忆，     │
+  │     │ 仅在确实有帮助时参考，不要机械复述。 │
+  │     │                                  │
+  │     │ 【必守约束】                       │
+  │     │ - 用户点外卖不要香菜。              │
+  │     │                                  │
+  │     │ 【用户偏好】                       │
+  │     │ - 用户偏爱黑咖啡。                 │
+  │     │                                  │
+  │     │ 【当前话题相关事实】                │
+  │     │ - 用户最近在准备周四的程序设计作业。  │
+  │     │                                  │
+  │     │ 【近期待办】                       │
+  │     │ - 周五前交英语作文。               │
+  │     └──────────────────────────────────┘
+  │     规则: 空段不输出, 段内 "- " 前缀
+  │
+  └── flat → RenderFlatMemoryContent()         ← 新文件 agent_memory_render.go
+        从 agent_memory.go 迁入现有 renderMemoryPinnedContent 逻辑，不变
+  │
+  ▼ 拼接为纯文本
+  │
+ConversationContext.UpsertPinnedBlock(key="memory_context")
+  │
+  ├── 通用阶段 → base.go:55  renderPinnedBlocks()        ← 不改
+  │     把所有 pinned blocks 拼成 system message
+  │     Chat / Plan / Deliver / 走通用组装的节点自动消费 memory_context
+  │
+  └── Execute 阶段 → buildExecuteMessage3()              ← 修改 execute_context.go
+        renderExecuteMemoryContext(ctx)                  ← 新文件 execute_pinned.go
+          → 只白名单读取 key="memory_context"
+          → 以“相关记忆”补充段拼入 msg3
+          → 不复用通用 renderPinnedBlocks，避免 execution_context/current_step 等块重复注入
+```
+
+---
+
+## 每个阶段对应的代码改动
+
+### 阶段 0：前置准备（配置 + DTO 补齐）
+
+改造开始前，先让配置和 DTO 能支撑后续链路。
+
+**改动 1：Config 新增读侧配置字段**
+- 文件：`backend/memory/model/config.go`
+- 新增 6 个字段：`ReadMode` / `ReadConstraintLimit` / `ReadPreferenceLimit` / `ReadFactLimit` / `ReadTodoHintLimit` / `InjectRenderMode`
+
+**改动 2：ConfigLoader 读取 + 默认值**
+- 文件：`backend/memory/service/config_loader.go`
+- 读取上述 6 个 viper key，默认值：ReadMode="legacy", ConstraintLimit=5, PreferenceLimit=5, FactLimit=5, TodoHintLimit=3, RenderMode="flat"
+
+**改动 3：ItemDTO 补齐 ContentHash**
+- 文件：`backend/memory/model/item.go` — ItemDTO 新增 `ContentHash string`
+- 文件：`backend/memory/service/common.go` — `toItemDTO` 补映射 `ContentHash: strValue(item.ContentHash)`
+- 原因：去重阶段需要 content_hash，当前 ItemDTO 没有这个字段
+
+### 阶段 1：第一路 — 结构化强约束召回
+
+**改动 4：ItemRepo 新增 FindPinnedByUser**
+- 文件：`backend/memory/repo/item_repo.go`
+- 两次查询合并：
+  - 查 1：`memory_type=constraint AND status=active AND user_id=? AND (未过期)`
+  - 查 2：`memory_type=preference AND confidence>=0.8 AND status=active AND user_id=? AND (未过期)` 按 importance DESC LIMIT preferenceLimit
+- 合并返回，约束在前偏好在后
+- 复用已有的 `applyScopedEquality` 模式构建 WHERE
+
+### 阶段 2：第二路 — 语义候选召回
+
+**无新文件**。直接在 HybridRetrieve 内部实现：
+- RAG 可用：调 `ragRuntime.RetrieveMemory()` → 复用 `buildMemoryDTOFromRetrieveHit()` 转 DTO
+- RAG 不可用：调 `itemRepo.FindByQuery()` → 复用 `toItemDTOs()` 转 DTO
+- 两路复用现有函数，不重写
+
+### 阶段 3：三级去重
+
+**新增文件：`backend/memory/service/retrieve_merge.go`**
+
+三个纯函数，输入 `[]ItemDTO` 输出 `[]ItemDTO`：
+
+1. `dedupByID` — map[int64]ItemDTO，后出现的覆盖先出现的
+2. `dedupByHash` — map[string]ItemDTO，保留 importance 更高的；hash 为空跳过
+3. `dedupByText` — map[string]ItemDTO，用 `localizeMemoryType + Content` 生成 key
+
+复用：`HashContent` 算法（来自 `normalize_facts.go`，已导出）
+
+### 阶段 4：排序
+
+**新增文件：`backend/memory/service/retrieve_rank.go`**
+
+- `RankItems(items, now, conversationID)` — 在原 `scoreRetrievedItem` 基础上叠加类型优先级权重
+- 原 `scoreRetrievedItem` 保留给 legacy 路径，不删除
+
+### 阶段 5：类型预算裁剪
+
+**同文件：`backend/memory/service/retrieve_merge.go`**
+
+- `applyTypeBudget(items, cfg)` — 按 4 个类型 limit 截断，类型内部保持排序结果
+
+### 阶段 6：ReadService 接入
+
+**改动 5：ReadService.Retrieve 新增 hybrid 分支**
+- 文件：`backend/memory/service/read_service.go`
+- 改动极小：在现有 Retrieve 方法中，门控通过后、limit 计算后，加一个 `if cfg.ReadMode == "hybrid"` 分支调 HybridRetrieve
+- 旧路径（RAG 优先 → legacy 兜底）完全不动
+
+### 阶段 7：渲染
+
+**新增文件：`backend/service/agentsvc/agent_memory_render.go`**
+
+- `RenderTypedMemoryContent(items)` — 按类型分组渲染，空段不输出
+- `RenderFlatMemoryContent(items)` — 迁入现有 `renderMemoryPinnedContent` 逻辑
+- 产物仍统一收口为 `ConversationContext.PinnedBlock(key="memory_context")`，后续 Execute 只消费这块内容，不再重复维护第二套 memory 渲染逻辑
+
+### 阶段 8：Execute 记忆消费补齐
+
+**新增文件：`backend/newAgent/prompt/execute_pinned.go`**
+- 新增 `renderExecuteMemoryContext(ctx)`：只白名单读取 `memory_context` 这一个 pinned block
+- 输出定位：作为 Execute `msg3` 的补充段，不进入 `msg1/msg2`，避免污染历史归档与 ReAct 窗口
+- 设计约束：**不**直接复用通用 `renderPinnedBlocks()`，避免 `execution_context` / `current_step` / `rough_build_done` 等 Execute 自有 pinned block 重复注入
+
+**改动 6：`execute_context.go` 接入 memory_context**
+- 文件：`backend/newAgent/prompt/execute_context.go`
+- 在 `buildExecuteMessage3()` 中拼接 `renderExecuteMemoryContext(ctx)` 的结果
+- 空记忆不输出；只追加“相关记忆”段，不改动 `msg0/msg1/msg2` 既有职责
+
+### 阶段 9：注入入口切换
+
+**改动 7：agent_memory.go 接入 renderMode**
+- 文件：`backend/service/agentsvc/agent.go` — AgentService 新增 `memoryCfg memorymodel.Config` 字段
+- 文件：`backend/service/agentsvc/agent_memory.go` — `SetMemoryReader` 签名增加 cfg 参数；`injectMemoryContext` 根据 cfg.InjectRenderMode 选渲染函数
+
+**改动 8：启动层传参**
+- 文件：`backend/cmd/start.go` — `SetMemoryReader(memoryModule)` → `SetMemoryReader(memoryModule, memoryCfg)`
+- memoryCfg 在同函数第 78 行已定义，无需额外引入
+
+---
+
+## 文件变更汇总
+
+| 文件 | 操作 | 对应阶段 |
+|---|---|---|
+| `backend/memory/model/config.go` | 修改 | 阶段 0 |
+| `backend/memory/service/config_loader.go` | 修改 | 阶段 0 |
+| `backend/memory/model/item.go` | 修改 | 阶段 0 |
+| `backend/memory/service/common.go` | 修改 | 阶段 0 |
+| `backend/memory/repo/item_repo.go` | 修改 | 阶段 1 |
+| `backend/memory/service/retrieve_merge.go` | **新增** | 阶段 3 + 5 |
+| `backend/memory/service/retrieve_rank.go` | **新增** | 阶段 4 |
+| `backend/memory/service/read_service.go` | 修改 | 阶段 6 |
+| `backend/service/agentsvc/agent_memory_render.go` | **新增** | 阶段 7 |
+| `backend/newAgent/prompt/execute_pinned.go` | **新增** | 阶段 8 |
+| `backend/newAgent/prompt/execute_context.go` | 修改 | 阶段 8 |
+| `backend/service/agentsvc/agent.go` | 修改 | 阶段 9 |
+| `backend/service/agentsvc/agent_memory.go` | 修改 | 阶段 9 |
+| `backend/cmd/start.go` | 修改 | 阶段 9 |
+
+---
+
+## 实施顺序（严格依赖链）
+
+```
+阶段 0（前置）:  config.go → config_loader.go → item.go + common.go
+    ↓
+阶段 1（Repo）:  item_repo.go (FindPinnedByUser)
+    ↓
+阶段 3+4（去重+排序）: retrieve_merge.go（去重函数）+ retrieve_rank.go（可并行）
+    ↓
+阶段 5（预算）:  retrieve_merge.go（HybridRetrieve 入口 + applyTypeBudget）
+    ↓                   ↑ 合并阶段 1~5 为完整 HybridRetrieve 函数
+阶段 6（接入）:  read_service.go（hybrid 分支）
+    ↓
+阶段 7（渲染）:  agent_memory_render.go（可和阶段 6 并行）
+    ↓
+阶段 8（Execute 消费）: execute_pinned.go + execute_context.go
+    ↓
+阶段 9（集成）:  agent.go + agent_memory.go + start.go
+```
+
+---
+
+## 回滚策略
+
+全部配置开关回滚，不改代码：
+
+| 配置 | 回滚值 | 效果 |
+|---|---|---|
+| `memory.read.mode` | `legacy` | 读侧回到当前行为 |
+| `memory.inject.renderMode` | `flat` | 注入渲染回到当前行为 |
+
+---
+
+## 验证方式
+
+1. **默认启动不变**：不配置任何新参数，系统行为与当前完全一致
+2. **hybrid 双路召回**：设 `memory.read.mode=hybrid`，日志确认两路召回 + 合并 + 去重生效
+3. **constraint 优先**：写入 5 条 fact + 2 条 constraint，确认 constraint 不被挤出
+4. **去重生效**：同一用户多条同义记忆，注入只保留一条
+5. **RAG 降级**：关 Milvus，hybrid 模式仍通过 MySQL fallback 正常工作
+6. **typed_v2 渲染**：设 `memory.inject.renderMode=typed_v2`，pinned block 按段输出
+7. **Execute 可见记忆**：进入 Execute 节点时，送入 LLM 的 `msg3` 含“相关记忆”段，且内容来自 `memory_context`
+8. **Execute 无重复注入**：`execution_context` / `current_step` 等 Execute 自有 pinned block 不因 memory 接入被重复渲染
+9. **单元测试**：对去重/预算/排序/渲染 / Execute 记忆桥接编写测试，跑完删除
+
+---
+
+## 本轮明确不做
+
+1. 不把 memory 改造成工具调用
+2. 不改 newAgent 的图路由结构
+3. 不把 WebSearch 并进统一召回
+4. 不清理历史重复脏数据
+5. 不动写入决策层代码
+6. 不让 Execute 无差别复用通用 `renderPinnedBlocks()`，避免把全部 pinned block 一股脑塞进 `msg3`
--- a/backend/newAgent/node/chat.go
+++ b/backend/newAgent/node/chat.go
@@ -217,7 +217,7 @@ func streamAndDispatch(
 		)

 		flowState.AllowReorder = resolveAllowReorder(input.UserInput, decision.AllowReorder)
-		effectiveThinking := resolveEffectiveThinking(flowState.ThinkingMode, decision.Thinking)
+		effectiveThinking := resolveEffectiveThinking(flowState.ThinkingMode, decision.Route, decision.Thinking)

 		switch decision.Route {
 		case newagentmodel.ChatRouteDirectReply:
@@ -243,16 +243,22 @@ func streamAndDispatch(
 // resolveEffectiveThinking 根据前端 ThinkingMode 和路由决策合并出最终 thinking 状态。
 //
 // 规则：
-// - "true" 强制开启；
-// - "false" 强制关闭；
-// - "auto"/"" 交给路由决策的 decisionThinking。
-func resolveEffectiveThinking(mode string, decisionThinking bool) bool {
+// 1. "true"：前端强制开启，所有路由统一开；
+// 2. "false"：前端强制关闭，所有路由统一关；
+// 3. "auto"/""：按路由语义兜底；
+// 3.1 deep_answer 的语义本身就是"复杂问答 + 原地深度思考"，因此默认开启；
+// 3.2 execute 继续沿用路由模型给出的 decisionThinking；
+// 3.3 其余路由默认关闭，避免把轻量闲聊误升成高成本推理。
+func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisionThinking bool) bool {
 	switch strings.TrimSpace(strings.ToLower(mode)) {
 	case "true":
 		return true
 	case "false":
 		return false
 	default:
+		if route == newagentmodel.ChatRouteDeepAnswer {
+			return true
+		}
 		return decisionThinking
 	}
 }
--- a/backend/newAgent/prompt/chat.go
+++ b/backend/newAgent/prompt/chat.go
@@ -10,14 +10,21 @@ import (
 )

 const chatRoutingSystemPrompt = `
-你是 SmartFlow 的智能路由器。你的回复必须以路由控制码开头，控制码后紧跟用户可见的内容。
+你是 SmartMate 的聊天路由助手。SmartMate 是时伴（SmartMate）的中文 AI 排程伙伴，面向大学生提供陪伴式日程管理与日常协助；它擅长日程安排、任务管理与学习规划，但不只会做排程。你的回复必须以路由控制码开头，控制码后紧跟用户可见的内容。

 路由规则：
- direct_reply：纯闲聊、简单问答、打招呼、感谢等。控制码后直接输出完整回复。
+- direct_reply：纯闲聊、简单问答、轻量生活建议、打招呼、感谢等不需要工具、也不需要长链路思考的请求。控制码后直接输出完整回复。
 - execute：需要用工具处理的请求（查询日程、移动课程、排课等），但不需要先制定计划。控制码后输出简短确认。
- deep_answer：复杂问题但不需要工具（如分析建议、深度解释等），需要深度思考后回答。控制码后输出过渡语（如"让我想想"）。
+- deep_answer：复杂问题但不需要工具（如分析建议、知识解释、方案比较、深度讨论等），需要深度思考后回答。控制码后不要输出任何占位过渡语，后端会直接进入第二次正式回答。
 - plan：用户明确要求先制定计划，或涉及多阶段复杂规划。控制码后输出简短确认。

+通用回答约束：
+- 非日程、非任务类问题，只要不需要工具，也应当正常回答。
+- 不要因为用户的问题不涉及排程，就说自己“只能处理日程/任务安排”。
+- 不要把普通问答、生活建议、开放式讨论，硬拐成排程请求。
+- route=direct_reply 时，控制码后的可见内容应直接回应用户问题，而不是先讲能力边界。
+- route=deep_answer 时，只输出控制码即可，不要补“让我想想”“这是个好问题”之类的占位话术。
+
 粗排判断：当用户意图包含"批量安排/排课/把任务类排进日程"，且上下文中有任务类 ID 时，设置 rough_build=true。
 二次粗排约束（强约束）：
 - 若上下文已出现 rough_build_done，且用户未明确要求"重新粗排/从头重排"，必须设置 rough_build=false。
@@ -50,7 +57,7 @@ const chatRoutingSystemPrompt = `
 合法示例：

 <SMARTFLOW_ROUTE nonce="给定nonce" route="direct_reply"/>
-你好！我是 SmartFlow 助手，有什么可以帮你的？
+当然可以，我先直接回答你这个问题。

 <SMARTFLOW_ROUTE nonce="给定nonce" route="execute"/>
 好的，我来帮你看看今天的安排。
@@ -62,7 +69,6 @@ const chatRoutingSystemPrompt = `
 好的，我来帮你排课并按你的偏好做微调。

 <SMARTFLOW_ROUTE nonce="给定nonce" route="deep_answer"/>
-这是个好问题，让我仔细想想。

 <SMARTFLOW_ROUTE nonce="给定nonce" route="plan"/>
 明白，我来帮你制定一个完整的学习计划。
@@ -125,12 +131,13 @@ func BuildChatRoutingUserPrompt(ctx *newagentmodel.ConversationContext, userInpu
 // --- 深度回答 prompt ---

 const deepAnswerSystemPrompt = `
-你是 SmartFlow 的深度分析助手。用户提出了一个需要深入思考的问题，请认真分析后给出详细、有价值的回答。
+你是 SmartMate 的深度分析助手。SmartMate 是时伴（SmartMate）的中文 AI 排程伙伴；即使问题与日程、任务无关，只要不需要工具，你也应当认真分析后给出详细、有价值的回答。

 请遵守以下规则：
-1. 充分利用上下文中已有的信息（任务类约束、日程数据、历史对话等）。
-2. 如果缺少关键信息，在回答中说明需要哪些额外信息。
-3. 直接输出你的回答，不要输出 JSON。
+1. 优先回答用户真实问题，不要把普通问答硬拐回排程、任务或计划制定。
+2. 充分利用上下文中已有的信息（历史对话、记忆、任务类约束、日程数据等），但不要无关硬套。
+3. 如果缺少关键信息，在回答中说明需要哪些额外信息。
+4. 直接输出你的回答，不要输出 JSON。
 `

 // BuildDeepAnswerSystemPrompt 返回深度回答阶段的系统提示词。
--- a/backend/newAgent/prompt/deliver.go
+++ b/backend/newAgent/prompt/deliver.go
@@ -9,7 +9,7 @@ import (
 )

 const deliverSystemPrompt = `
-你是 SmartFlow NewAgent 的交付器。
+你是 SmartMate 的交付器。
 你的职责是基于原始计划和执行历史，生成一份简洁、诚实的任务完成总结。

 请遵守以下规则：
--- a/backend/newAgent/prompt/execute.go
+++ b/backend/newAgent/prompt/execute.go
@@ -9,7 +9,7 @@ import (
 )

 const executeSystemPromptWithPlan = `
-你是 SmartFlow NewAgent 的执行器。你需要在"当前 plan 步骤"约束下推进任务。
+你是 SmartMate 的执行器。你需要在"当前 plan 步骤"约束下推进任务。

 你可以做什么：
 1. 只围绕当前步骤推进，先读后写，逐步完成当前步骤。
@@ -45,7 +45,7 @@ const executeSystemPromptWithPlan = `
 7. 流程应正式终止时输出 action=abort。`

 const executeSystemPromptReAct = `
-你是 SmartFlow NewAgent 的执行器，当前处于自由执行模式（无预定义 plan 步骤）。
+你是 SmartMate 的执行器，当前处于自由执行模式（无预定义 plan 步骤）。

 阶段事实（强约束）：
 1. 若上下文给出"粗排已完成/rough_build_done"，表示目标任务类已经进入 suggested/existing，不是待排入状态。
--- a/backend/newAgent/prompt/execute_context.go
+++ b/backend/newAgent/prompt/execute_context.go
@@ -48,7 +48,7 @@ const executeMessage1MaxRunes = 1400
 // 1. message[0] 固定 prompt（规则 + 微调硬引导 + 输出约束 + 工具简表）
 // 2. message[1] 历史上下文（真实对话流 + 早期 ReAct 摘要）
 // 3. message[2] 当轮 ReAct Loop 窗口（thought/reason + tool_call + observation 绑定展示）
-// 4. message[3] 当前执行状态（轮次、模式、plan 步骤、任务类等）
+// 4. message[3] 当前执行状态（轮次、模式、plan 步骤、任务类、相关记忆等）
 func buildExecuteStageMessages(
 	stageSystemPrompt string,
 	state *newagentmodel.CommonState,
@@ -72,7 +72,7 @@ func buildExecuteStageMessages(
 func buildExecuteMessage0(stageSystemPrompt string, ctx *newagentmodel.ConversationContext) string {
 	base := strings.TrimSpace(mergeSystemPrompts(ctx, stageSystemPrompt))
 	if base == "" {
-		base = "你是 SmartFlow NewAgent 执行器，请继续 execute 阶段。"
+		base = "你是 SmartMate 执行器，请继续 execute 阶段。"
 	}

 	toolCatalog := renderExecuteToolCatalogCompact(ctx)
@@ -290,6 +290,10 @@ func buildExecuteMessage3(state *newagentmodel.CommonState, ctx *newagentmodel.C
 			lines = append(lines, "- 顺序策略：默认保持 suggested 相对顺序，禁止调用 min_context_switch。")
 		}
 	}
+	if memoryText := renderExecuteMemoryContext(ctx); memoryText != "" {
+		lines = append(lines, "相关记忆（仅在确有帮助时参考，不要机械复述）：")
+		lines = append(lines, memoryText)
+	}

 	// 兼容上层传入的执行指令；若为空则使用固定收口指令。
 	instruction := strings.TrimSpace(runtimeUserPrompt)
--- a/backend/newAgent/prompt/execute_pinned.go
+++ b/backend/newAgent/prompt/execute_pinned.go
@@ -0,0 +1,31 @@
+package newagentprompt
+
+import (
+	"strings"
+
+	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
+)
+
+const executeMemoryContextKey = "memory_context"
+
+// renderExecuteMemoryContext 提取 Execute 阶段需要补充到 msg3 的记忆文本。
+//
+// 步骤化说明：
+// 1. 只白名单消费 memory_context，避免把 execution_context / current_step 等 Execute 自有块再次注入；
+// 2. 若 block 不存在或正文为空，直接返回空串，不给 msg3 留空段；
+// 3. 这里不重新渲染记忆，只消费 agentsvc 已经产出的最终文本，保证所有阶段口径一致。
+func renderExecuteMemoryContext(ctx *newagentmodel.ConversationContext) string {
+	if ctx == nil {
+		return ""
+	}
+
+	block, ok := ctx.PinnedBlockByKey(executeMemoryContextKey)
+	if !ok {
+		return ""
+	}
+	content := strings.TrimSpace(block.Content)
+	if content == "" {
+		return ""
+	}
+	return content
+}
--- a/backend/newAgent/prompt/plan.go
+++ b/backend/newAgent/prompt/plan.go
@@ -10,7 +10,7 @@ import (
 )

 const planSystemPrompt = `
-你是 SmartFlow NewAgent 的规划器。
+你是 SmartMate 的规划器。
 你的职责不是直接执行任务，而是先把用户意图拆成一组清晰、稳定、可逐步执行的自然语言计划，并严格按后端约定的 JSON 协议输出。

 请遵守以下规则：
--- a/backend/service/agentsvc/agent.go
+++ b/backend/service/agentsvc/agent.go
@@ -15,6 +15,7 @@ import (
 	"github.com/LoveLosita/smartflow/backend/dao"
 	outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
 	"github.com/LoveLosita/smartflow/backend/inits"
+	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	"github.com/LoveLosita/smartflow/backend/model"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
@@ -58,6 +59,7 @@ type AgentService struct {
 	agentStateStore   newagentmodel.AgentStateStore
 	compactionStore   newagentmodel.CompactionStore
 	memoryReader      MemoryReader
+	memoryCfg         memorymodel.Config
 }

 // NewAgentService 构造 AgentService。
--- a/backend/service/agentsvc/agent_memory.go
+++ b/backend/service/agentsvc/agent_memory.go
@@ -2,7 +2,6 @@ package agentsvc

 import (
 	"context"
-	"fmt"
 	"log"
 	"strings"
 	"time"
@@ -28,9 +27,10 @@ type MemoryReader interface {
 	Retrieve(ctx context.Context, req memorymodel.RetrieveRequest) ([]memorymodel.ItemDTO, error)
 }

-// SetMemoryReader 注入 newAgent 主链路读取记忆所需的薄接口。
-func (s *AgentService) SetMemoryReader(reader MemoryReader) {
+// SetMemoryReader 注入 newAgent 主链路读取记忆所需的薄接口与渲染配置。
+func (s *AgentService) SetMemoryReader(reader MemoryReader, cfg memorymodel.Config) {
 	s.memoryReader = reader
+	s.memoryCfg = cfg
 }

 // injectMemoryContext 在 graph 执行前，把本轮相关记忆写入 ConversationContext 的 pinned block。
@@ -68,7 +68,7 @@ func (s *AgentService) injectMemoryContext(
 		return
 	}

-	content := renderMemoryPinnedContent(items)
+	content := renderMemoryPinnedContentByMode(items, s.memoryCfg.EffectiveInjectRenderMode())
 	if content == "" {
 		conversationContext.RemovePinnedBlock(newAgentMemoryBlockKey)
 		return
@@ -100,62 +100,3 @@ func shouldInjectMemoryForInput(userMessage string) bool {
 		return true
 	}
 }
-
-// renderMemoryPinnedContent 把召回结果转成一段稳定、紧凑、适合 prompt 注入的自然语言文本。
-func renderMemoryPinnedContent(items []memorymodel.ItemDTO) string {
-	if len(items) == 0 {
-		return ""
-	}
-
-	var sb strings.Builder
-	sb.WriteString(newAgentMemoryIntroLine)
-
-	seen := make(map[string]struct{}, len(items))
-	written := 0
-	for _, item := range items {
-		line := buildMemoryPinnedLine(item)
-		if line == "" {
-			continue
-		}
-		if _, exists := seen[line]; exists {
-			continue
-		}
-		seen[line] = struct{}{}
-		sb.WriteString("\n- ")
-		sb.WriteString(line)
-		written++
-	}
-
-	if written == 0 {
-		return ""
-	}
-	return strings.TrimSpace(sb.String())
-}
-
-// buildMemoryPinnedLine 把单条记忆渲染成“[类型] 内容”的简洁格式。
-func buildMemoryPinnedLine(item memorymodel.ItemDTO) string {
-	text := strings.TrimSpace(item.Content)
-	if text == "" {
-		text = strings.TrimSpace(item.Title)
-	}
-	if text == "" {
-		return ""
-	}
-	return fmt.Sprintf("[%s] %s", localizeMemoryType(item.MemoryType), text)
-}
-
-// localizeMemoryType 把 memory 类型映射成 prompt 里更自然的中文标签。
-func localizeMemoryType(memoryType string) string {
-	switch strings.TrimSpace(memoryType) {
-	case memorymodel.MemoryTypePreference:
-		return "偏好"
-	case memorymodel.MemoryTypeConstraint:
-		return "约束"
-	case memorymodel.MemoryTypeTodoHint:
-		return "待办线索"
-	case memorymodel.MemoryTypeFact:
-		return "事实"
-	default:
-		return "记忆"
-	}
-}
--- a/backend/service/agentsvc/agent_memory_render.go
+++ b/backend/service/agentsvc/agent_memory_render.go
@@ -0,0 +1,159 @@
+package agentsvc
+
+import (
+	"fmt"
+	"strings"
+
+	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+)
+
+// renderMemoryPinnedContentByMode 根据配置选择记忆渲染方式。
+func renderMemoryPinnedContentByMode(items []memorymodel.ItemDTO, renderMode string) string {
+	switch memorymodel.NormalizeInjectRenderMode(renderMode) {
+	case memorymodel.MemoryInjectRenderModeTypedV2:
+		return RenderTypedMemoryContent(items)
+	default:
+		return RenderFlatMemoryContent(items)
+	}
+}
+
+// RenderFlatMemoryContent 生成兼容旧链路的扁平记忆文本。
+func RenderFlatMemoryContent(items []memorymodel.ItemDTO) string {
+	if len(items) == 0 {
+		return ""
+	}
+
+	var sb strings.Builder
+	sb.WriteString(newAgentMemoryIntroLine)
+
+	seen := make(map[string]struct{}, len(items))
+	written := 0
+	for _, item := range items {
+		line := buildMemoryPinnedLine(item)
+		if line == "" {
+			continue
+		}
+		if _, exists := seen[line]; exists {
+			continue
+		}
+		seen[line] = struct{}{}
+		sb.WriteString("\n- ")
+		sb.WriteString(line)
+		written++
+	}
+
+	if written == 0 {
+		return ""
+	}
+	return strings.TrimSpace(sb.String())
+}
+
+// RenderTypedMemoryContent 按记忆类型分段渲染。
+//
+// 步骤化说明：
+// 1. 先按固定类型顺序分组，避免同类记忆在 prompt 中被打散；
+// 2. 每组内部继续做文本级去重，兜底保护历史脏数据；
+// 3. 只输出非空分组，减少 Execute / Plan 阶段的无效噪音。
+func RenderTypedMemoryContent(items []memorymodel.ItemDTO) string {
+	if len(items) == 0 {
+		return ""
+	}
+
+	type renderSection struct {
+		Title string
+		Items []string
+	}
+	orderedTypes := []string{
+		memorymodel.MemoryTypeConstraint,
+		memorymodel.MemoryTypePreference,
+		memorymodel.MemoryTypeFact,
+		memorymodel.MemoryTypeTodoHint,
+	}
+	sectionTitle := map[string]string{
+		memorymodel.MemoryTypeConstraint: "必守约束",
+		memorymodel.MemoryTypePreference: "用户偏好",
+		memorymodel.MemoryTypeFact:       "当前话题相关事实",
+		memorymodel.MemoryTypeTodoHint:   "近期待办",
+	}
+
+	grouped := make(map[string][]string, len(orderedTypes))
+	seen := make(map[string]struct{}, len(items))
+	for _, item := range items {
+		content := buildMemoryRenderContent(item)
+		if content == "" {
+			continue
+		}
+		dedupKey := strings.TrimSpace(item.MemoryType) + "::" + content
+		if _, exists := seen[dedupKey]; exists {
+			continue
+		}
+		seen[dedupKey] = struct{}{}
+
+		memoryType := memorymodel.NormalizeMemoryType(item.MemoryType)
+		if memoryType == "" {
+			memoryType = memorymodel.MemoryTypeFact
+		}
+		grouped[memoryType] = append(grouped[memoryType], content)
+	}
+
+	sections := make([]renderSection, 0, len(orderedTypes))
+	for _, memoryType := range orderedTypes {
+		contentList := grouped[memoryType]
+		if len(contentList) == 0 {
+			continue
+		}
+		sections = append(sections, renderSection{
+			Title: sectionTitle[memoryType],
+			Items: contentList,
+		})
+	}
+	if len(sections) == 0 {
+		return ""
+	}
+
+	var sb strings.Builder
+	sb.WriteString(newAgentMemoryIntroLine)
+	for _, section := range sections {
+		sb.WriteString("\n\n【")
+		sb.WriteString(section.Title)
+		sb.WriteString("】")
+		for _, line := range section.Items {
+			sb.WriteString("\n- ")
+			sb.WriteString(line)
+		}
+	}
+	return strings.TrimSpace(sb.String())
+}
+
+// buildMemoryPinnedLine 把单条记忆渲染成“[类型] 内容”的简洁格式。
+func buildMemoryPinnedLine(item memorymodel.ItemDTO) string {
+	text := buildMemoryRenderContent(item)
+	if text == "" {
+		return ""
+	}
+	return fmt.Sprintf("[%s] %s", localizeMemoryType(item.MemoryType), text)
+}
+
+func buildMemoryRenderContent(item memorymodel.ItemDTO) string {
+	text := strings.TrimSpace(item.Content)
+	if text == "" {
+		text = strings.TrimSpace(item.Title)
+	}
+	return text
+}
+
+// localizeMemoryType 把 memory 类型映射成 prompt 里更自然的中文标签。
+func localizeMemoryType(memoryType string) string {
+	switch strings.TrimSpace(memoryType) {
+	case memorymodel.MemoryTypePreference:
+		return "偏好"
+	case memorymodel.MemoryTypeConstraint:
+		return "约束"
+	case memorymodel.MemoryTypeTodoHint:
+		return "待办线索"
+	case memorymodel.MemoryTypeFact:
+		return "事实"
+	default:
+		return "记忆"
+	}
+}
--- a/backend/service/agentsvc/agent_meta.go
+++ b/backend/service/agentsvc/agent_meta.go
@@ -38,7 +38,7 @@ const (
 	conversationTitleTokenAdjustReason = "conversation_title_async"
 )

-const conversationTitlePrompt = `你是 SmartFlow 的会话标题生成器。
+const conversationTitlePrompt = `你是 SmartMate 的会话标题生成器。
 请基于给定对话内容，生成一个简短中文标题。

 要求：
--- a/frontend/src/api/agent.ts
+++ b/frontend/src/api/agent.ts
@@ -1,6 +1,6 @@
 import http from '@/api/http'
 import type { ApiResponse } from '@/types/api'
-import type { ConversationListResponse, ConversationMeta } from '@/types/dashboard'
+import type { ConversationContextStats, ConversationListResponse, ConversationMeta } from '@/types/dashboard'
 import { extractErrorMessage } from '@/utils/http'

 const conversationHistoryPath = '/agent/conversation-history'
@@ -23,6 +23,63 @@ export interface ConversationListQuery {
  status?: 'active' | 'archived'
 }

+function normalizeNonNegativeInteger(value: unknown) {
+  if (typeof value !== 'number' || !Number.isFinite(value)) {
+    return null
+  }
+
+  return Math.max(0, Math.round(value))
+}
+
+function normalizeConversationContextStats(raw: unknown): ConversationContextStats | null {
+  // 1. 后端这里直接透传数据库中的 JSON，前端需要同时兜住 object / null / 空字符串三种返回。
+  // 2. 若后端灰度期间字段缺失，则尽量使用四段消息之和回填 total，避免展示层继续散落兼容逻辑。
+  // 3. budget 缺失时说明统计不完整，此时返回 null，让界面统一走“暂无统计”态更安全。
+  if (raw == null || raw === '') {
+    return null
+  }
+
+  let candidate: unknown = raw
+  if (typeof candidate === 'string') {
+    const trimmed = candidate.trim()
+    if (!trimmed) {
+      return null
+    }
+
+    try {
+      candidate = JSON.parse(trimmed) as unknown
+    } catch {
+      return null
+    }
+  }
+
+  if (!candidate || typeof candidate !== 'object') {
+    return null
+  }
+
+  const stats = candidate as Record<string, unknown>
+  const msg0 = normalizeNonNegativeInteger(stats.msg0) ?? 0
+  const msg1 = normalizeNonNegativeInteger(stats.msg1) ?? 0
+  const msg2 = normalizeNonNegativeInteger(stats.msg2) ?? 0
+  const msg3 = normalizeNonNegativeInteger(stats.msg3) ?? 0
+  const fallbackTotal = msg0 + msg1 + msg2 + msg3
+  const total = normalizeNonNegativeInteger(stats.total) ?? fallbackTotal
+  const budget = normalizeNonNegativeInteger(stats.budget)
+
+  if (budget == null || budget <= 0) {
+    return null
+  }
+
+  return {
+    msg0,
+    msg1,
+    msg2,
+    msg3,
+    total: Math.max(total, fallbackTotal),
+    budget,
+  }
+}
+
 function normalizeConversationHistoryMessage(raw: unknown): ConversationHistoryMessage | null {
  if (!raw || typeof raw !== 'object') {
    return null
@@ -110,3 +167,16 @@ export async function getConversationHistory(conversationId: string) {
    throw new Error(extractErrorMessage(error, '会话消息加载失败，请稍后重试'))
  }
 }
+
+export async function getContextStats(conversationId: string) {
+  try {
+    const response = await http.get<ApiResponse<unknown>>('/agent/context-stats', {
+      params: {
+        conversation_id: conversationId,
+      },
+    })
+    return normalizeConversationContextStats(response.data.data)
+  } catch (error) {
+    throw new Error(extractErrorMessage(error, '上下文窗口统计加载失败，请稍后重试'))
+  }
+}
--- a/frontend/src/components/assistant/ContextWindowMeter.vue
+++ b/frontend/src/components/assistant/ContextWindowMeter.vue
@@ -0,0 +1,229 @@
+<script setup lang="ts">
+import { computed } from 'vue'
+
+import type { ConversationContextStats } from '@/types/dashboard'
+
+interface ContextSegment {
+  key: 'msg0' | 'msg1' | 'msg2' | 'msg3'
+  label: string
+  value: number
+  widthPercent: number
+  color: string
+}
+
+const props = withDefaults(
+  defineProps<{
+    stats?: ConversationContextStats | null
+    loading?: boolean
+    disabled?: boolean
+  }>(),
+  {
+    stats: null,
+    loading: false,
+    disabled: false,
+  },
+)
+
+const safeStats = computed(() => props.stats ?? null)
+
+const usagePercent = computed(() => {
+  if (!safeStats.value || safeStats.value.budget <= 0) {
+    return 0
+  }
+  return Math.round((safeStats.value.total / safeStats.value.budget) * 100)
+})
+
+const isOverBudget = computed(() => {
+  if (!safeStats.value) {
+    return false
+  }
+  return safeStats.value.total > safeStats.value.budget
+})
+
+const segments = computed<ContextSegment[]>(() => {
+  const stats = safeStats.value
+  if (!stats) {
+    return []
+  }
+
+  // 1. 进度条固定做成紧凑胶囊，因此按 max(total, budget) 计算比例，既保留预算留白，也兼容超预算占满。
+  // 2. 四段颜色继续对应后端 msg0~msg3 的真实语义，避免前端为了视觉压缩而打乱统计含义。
+  // 3. 零值段不渲染，减少窄尺寸下的噪点，让小组件也能保留基本可读性。
+  const base = Math.max(stats.total, stats.budget, 1)
+  const rawSegments = [
+    { key: 'msg0', label: '规则', value: stats.msg0, color: 'linear-gradient(90deg, #2556c7, #3b82f6)' },
+    { key: 'msg1', label: '历史', value: stats.msg1, color: 'linear-gradient(90deg, #0f766e, #14b8a6)' },
+    { key: 'msg2', label: '执行', value: stats.msg2, color: 'linear-gradient(90deg, #b45309, #f59e0b)' },
+    { key: 'msg3', label: '当前', value: stats.msg3, color: 'linear-gradient(90deg, #15803d, #22c55e)' },
+  ] as const
+
+  return rawSegments
+    .filter((segment) => segment.value > 0)
+    .map((segment) => ({
+      ...segment,
+      widthPercent: Math.max(0, Math.min(100, (segment.value / base) * 100)),
+    }))
+})
+
+const usageText = computed(() => {
+  if (props.loading) {
+    return '...'
+  }
+
+  if (!safeStats.value) {
+    return props.disabled ? '--' : '空'
+  }
+
+  return `${usagePercent.value}%`
+})
+
+const tooltipText = computed(() => {
+  if (props.loading) {
+    return '正在读取当前会话的上下文窗口统计'
+  }
+
+  if (!safeStats.value) {
+    return props.disabled ? '新会话发送首条消息后展示上下文窗口统计' : '当前会话暂无上下文窗口统计'
+  }
+
+  const segmentText = segments.value.map((segment) => `${segment.label} ${segment.value}`).join(' / ')
+  const usageSummary = `总计 ${safeStats.value.total} / 预算 ${safeStats.value.budget}（${usagePercent.value}%）`
+  return segmentText ? `${usageSummary}；${segmentText}` : usageSummary
+})
+</script>
+
+<template>
+  <div
+    class="assistant-context-meter"
+    :class="{
+      'assistant-context-meter--loading': loading,
+      'assistant-context-meter--disabled': disabled,
+      'assistant-context-meter--danger': isOverBudget,
+    }"
+    :title="tooltipText"
+  >
+    <span class="assistant-context-meter__label">窗口</span>
+
+    <div class="assistant-context-meter__track" aria-hidden="true">
+      <div v-if="loading" class="assistant-context-meter__loading-bar" />
+
+      <template v-else>
+        <div
+          v-for="segment in segments"
+          :key="segment.key"
+          class="assistant-context-meter__segment"
+          :style="{
+            width: `${segment.widthPercent}%`,
+            background: segment.color,
+          }"
+        />
+      </template>
+    </div>
+
+    <span class="assistant-context-meter__value">{{ usageText }}</span>
+  </div>
+</template>
+
+<style scoped>
+.assistant-context-meter {
+  width: 144px;
+  min-width: 144px;
+  max-width: 144px;
+  height: 32px;
+  padding: 0 9px 0 10px;
+  border: 1px solid rgba(15, 23, 42, 0.1);
+  border-radius: 999px;
+  background: #ffffff;
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  box-sizing: border-box;
+  color: #243042;
+  transition: border-color 0.15s ease, background-color 0.15s ease, box-shadow 0.15s ease;
+}
+
+.assistant-context-meter:hover {
+  border-color: rgba(58, 96, 195, 0.24);
+  background: #fbfcff;
+}
+
+.assistant-context-meter--disabled {
+  color: #6b7280;
+  background: #fbfcfd;
+}
+
+.assistant-context-meter--danger {
+  border-color: rgba(220, 38, 38, 0.22);
+  background: linear-gradient(180deg, rgba(255, 255, 255, 1), rgba(255, 246, 246, 1));
+}
+
+.assistant-context-meter__label,
+.assistant-context-meter__value {
+  flex: 0 0 auto;
+  font-size: 12px;
+  line-height: 1;
+  white-space: nowrap;
+}
+
+.assistant-context-meter__label {
+  color: #4b5563;
+  font-weight: 600;
+}
+
+.assistant-context-meter__value {
+  width: 28px;
+  min-width: 28px;
+  text-align: right;
+  color: #334155;
+  font-weight: 700;
+}
+
+.assistant-context-meter--disabled .assistant-context-meter__value {
+  color: #6b7280;
+}
+
+.assistant-context-meter--danger .assistant-context-meter__value {
+  color: #b42318;
+}
+
+.assistant-context-meter__track {
+  flex: 1 1 auto;
+  min-width: 0;
+  height: 7px;
+  overflow: hidden;
+  border-radius: 999px;
+  background:
+    linear-gradient(180deg, rgba(232, 238, 246, 0.95), rgba(243, 247, 251, 0.95)),
+    #edf2f7;
+  display: flex;
+}
+
+.assistant-context-meter--disabled .assistant-context-meter__track {
+  background:
+    linear-gradient(180deg, rgba(239, 243, 247, 0.95), rgba(245, 247, 250, 0.95)),
+    #eef2f7;
+}
+
+.assistant-context-meter__segment {
+  height: 100%;
+  flex: 0 0 auto;
+}
+
+.assistant-context-meter__loading-bar {
+  width: 100%;
+  height: 100%;
+  border-radius: inherit;
+  background: linear-gradient(90deg, rgba(221, 231, 244, 0.78), rgba(162, 188, 229, 0.95), rgba(221, 231, 244, 0.78));
+  background-size: 200% 100%;
+  animation: context-meter-loading 1.15s linear infinite;
+}
+
+@keyframes context-meter-loading {
+  0% {
+    background-position: 200% 0;
+  }
+  100% {
+    background-position: -200% 0;
+  }
+}
+</style>
--- a/frontend/src/components/assistant/TaskClassPlanningPicker.vue
+++ b/frontend/src/components/assistant/TaskClassPlanningPicker.vue
@@ -0,0 +1,484 @@
+<script setup lang="ts">
+import { computed, ref, watch } from 'vue'
+import { ElMessage } from 'element-plus'
+
+import { getTaskClassList } from '@/api/scheduleCenter'
+import type { TaskClassListItem } from '@/types/schedule'
+
+interface SelectedTaskClassSummary {
+  id: number
+  name: string
+}
+
+const props = withDefaults(
+  defineProps<{
+    modelValue: number[]
+    disabled?: boolean
+  }>(),
+  {
+    disabled: false,
+  },
+)
+
+const emit = defineEmits<{
+  'update:modelValue': [taskClassIds: number[]]
+  applied: [taskClassIds: number[]]
+}>()
+
+const popoverVisible = ref(false)
+const taskClassLoading = ref(false)
+const taskClasses = ref<TaskClassListItem[]>([])
+const draftSelectedIds = ref<number[]>([])
+const taskClassListReady = ref(false)
+
+const triggerLabel = computed(() => {
+  if (props.modelValue.length <= 0) {
+    return '智能编排'
+  }
+  return `编排 ${props.modelValue.length}`
+})
+
+const selectedTaskClasses = computed<SelectedTaskClassSummary[]>(() => {
+  const lookup = new Map(taskClasses.value.map((item) => [item.id, item]))
+  return props.modelValue.map((taskClassId) => {
+    const taskClass = lookup.get(taskClassId)
+    return {
+      id: taskClassId,
+      name: taskClass?.name || `任务类 #${taskClassId}`,
+    }
+  })
+})
+
+watch(
+  () => props.modelValue,
+  (nextValue) => {
+    if (!popoverVisible.value) {
+      draftSelectedIds.value = [...nextValue]
+    }
+  },
+  { immediate: true },
+)
+
+watch(popoverVisible, (visible) => {
+  if (!visible) {
+    return
+  }
+  draftSelectedIds.value = [...props.modelValue]
+  void ensureTaskClassListLoaded()
+})
+
+function normalizeTaskClassIds(taskClassIds: number[]) {
+  const seen = new Set<number>()
+  const normalized: number[] = []
+
+  for (const taskClassId of taskClassIds) {
+    if (!Number.isInteger(taskClassId) || taskClassId <= 0 || seen.has(taskClassId)) {
+      continue
+    }
+    seen.add(taskClassId)
+    normalized.push(taskClassId)
+  }
+
+  return normalized
+}
+
+async function ensureTaskClassListLoaded() {
+  if (taskClassLoading.value || taskClassListReady.value) {
+    return
+  }
+
+  taskClassLoading.value = true
+  try {
+    taskClasses.value = await getTaskClassList()
+    taskClassListReady.value = true
+  } catch (error) {
+    ElMessage.error(error instanceof Error ? error.message : '任务类列表加载失败')
+  } finally {
+    taskClassLoading.value = false
+  }
+}
+
+function toggleDraftSelection(taskClassId: number) {
+  if (draftSelectedIds.value.includes(taskClassId)) {
+    draftSelectedIds.value = draftSelectedIds.value.filter((id) => id !== taskClassId)
+    return
+  }
+  draftSelectedIds.value = [...draftSelectedIds.value, taskClassId]
+}
+
+function applySelection() {
+  // 1. 先在前端做一次去重和非法值过滤，避免把脏 ID 直接发给后端。
+  // 2. 这里只负责提交“下一条消息要带的任务类上下文”，不负责直接触发发送。
+  // 3. 提交成功后关闭弹层，让用户回到输入区继续编辑本轮提示词。
+  const normalizedTaskClassIds = normalizeTaskClassIds(draftSelectedIds.value)
+  emit('update:modelValue', normalizedTaskClassIds)
+  emit('applied', normalizedTaskClassIds)
+  popoverVisible.value = false
+}
+
+function clearSelectionFromPanel() {
+  draftSelectedIds.value = []
+  emit('update:modelValue', [])
+  emit('applied', [])
+  popoverVisible.value = false
+}
+
+function removeSelectedTaskClass(taskClassId: number) {
+  emit(
+    'update:modelValue',
+    props.modelValue.filter((id) => id !== taskClassId),
+  )
+}
+
+function clearSelectedTaskClasses() {
+  emit('update:modelValue', [])
+}
+
+function formatDateRange(taskClass: TaskClassListItem) {
+  const startDate = formatDateLabel(taskClass.start_date)
+  const endDate = formatDateLabel(taskClass.end_date)
+  if (!startDate || !endDate) {
+    return '时间范围待补充'
+  }
+  return `${startDate} - ${endDate}`
+}
+
+function formatDateLabel(value: string) {
+  const parsedDate = new Date(value)
+  if (Number.isNaN(parsedDate.getTime())) {
+    return ''
+  }
+  const month = `${parsedDate.getMonth() + 1}`.padStart(2, '0')
+  const day = `${parsedDate.getDate()}`.padStart(2, '0')
+  return `${month}.${day}`
+}
+</script>
+
+<template>
+  <div class="assistant-planning">
+    <el-popover
+      v-model:visible="popoverVisible"
+      placement="top-start"
+      trigger="click"
+      :width="360"
+      :teleported="true"
+      popper-class="assistant-planning-popover"
+    >
+      <template #reference>
+        <button
+          type="button"
+          class="assistant-planning__trigger"
+          :class="{ 'assistant-planning__trigger--active': modelValue.length > 0 }"
+          :disabled="disabled"
+        >
+          <span class="assistant-planning__trigger-icon" aria-hidden="true">
+            <svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg">
+              <path d="M7 1.25L12.25 4.375L7 7.5L1.75 4.375L7 1.25Z" fill="currentColor" />
+              <path d="M1.75 6.5625L7 9.6875L12.25 6.5625" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round" />
+              <path d="M1.75 8.75L7 11.875L12.25 8.75" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round" />
+            </svg>
+          </span>
+          <span class="assistant-planning__trigger-text">{{ triggerLabel }}</span>
+        </button>
+      </template>
+
+      <div class="assistant-planning__panel">
+        <div class="assistant-planning__panel-header">
+          <div>
+            <strong>选择任务类</strong>
+            <p>本次发送将把所选任务类作为智能编排上下文带给后端。</p>
+          </div>
+        </div>
+
+        <div v-if="taskClassLoading" class="assistant-planning__loading">
+          <div v-for="index in 4" :key="index" class="assistant-planning__loading-item" />
+        </div>
+
+        <div v-else-if="taskClasses.length" class="assistant-planning__list">
+          <button
+            v-for="taskClass in taskClasses"
+            :key="taskClass.id"
+            type="button"
+            class="assistant-planning__item"
+            :class="{ 'assistant-planning__item--selected': draftSelectedIds.includes(taskClass.id) }"
+            @click="toggleDraftSelection(taskClass.id)"
+          >
+            <span
+              class="assistant-planning__item-check"
+              :class="{ 'assistant-planning__item-check--selected': draftSelectedIds.includes(taskClass.id) }"
+              aria-hidden="true"
+            />
+            <span class="assistant-planning__item-body">
+              <strong>{{ taskClass.name }}</strong>
+              <small>{{ formatDateRange(taskClass) }}</small>
+            </span>
+            <span class="assistant-planning__item-slots">{{ taskClass.total_slots }} 节</span>
+          </button>
+        </div>
+
+        <div v-else class="assistant-planning__empty">
+          当前还没有可用于智能编排的任务类。
+        </div>
+
+        <div class="assistant-planning__panel-actions">
+          <button type="button" class="assistant-planning__panel-button assistant-planning__panel-button--ghost" @click="clearSelectionFromPanel">
+            清空
+          </button>
+          <button type="button" class="assistant-planning__panel-button assistant-planning__panel-button--primary" @click="applySelection">
+            应用选择
+          </button>
+        </div>
+      </div>
+    </el-popover>
+
+    <div v-if="selectedTaskClasses.length" class="assistant-planning__summary">
+      <span class="assistant-planning__summary-label">已选任务类</span>
+      <div class="assistant-planning__tags">
+        <button
+          v-for="taskClass in selectedTaskClasses"
+          :key="taskClass.id"
+          type="button"
+          class="assistant-planning__tag"
+          :disabled="disabled"
+          @click="removeSelectedTaskClass(taskClass.id)"
+        >
+          <span>{{ taskClass.name }}</span>
+          <span aria-hidden="true">×</span>
+        </button>
+        <button type="button" class="assistant-planning__clear" :disabled="disabled" @click="clearSelectedTaskClasses">
+          清空全部
+        </button>
+      </div>
+    </div>
+  </div>
+</template>
+
+<style scoped>
+.assistant-planning {
+  display: grid;
+  justify-items: start;
+  gap: 10px;
+  min-width: 0;
+  padding: 10px 12px 0;
+}
+
+.assistant-planning__trigger {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  gap: 6px;
+  width: 138px;
+  min-width: 138px;
+  max-width: 138px;
+  height: 32px;
+  padding: 0 10px;
+  box-sizing: border-box;
+  border: 1px solid rgba(15, 23, 42, 0.1);
+  border-radius: 999px;
+  background: #ffffff;
+  color: #1f2430;
+  font-size: 13px;
+  font-weight: 600;
+  transition: border-color 0.15s ease, background-color 0.15s ease, color 0.15s ease;
+}
+
+.assistant-planning__trigger:hover:not(:disabled) {
+  border-color: rgba(57, 86, 178, 0.26);
+  background: #f8fafc;
+}
+
+.assistant-planning__trigger:disabled {
+  cursor: not-allowed;
+  opacity: 0.58;
+}
+
+.assistant-planning__trigger--active {
+  border-color: rgba(57, 86, 178, 0.24);
+  background: #eef3ff;
+  color: #3357c2;
+}
+
+.assistant-planning__trigger-icon {
+  display: inline-flex;
+  width: 14px;
+  height: 14px;
+}
+
+.assistant-planning__trigger-text {
+  min-width: 0;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.assistant-planning__summary {
+  display: grid;
+  gap: 8px;
+}
+
+.assistant-planning__summary-label {
+  color: #5b6677;
+  font-size: 12px;
+  font-weight: 600;
+}
+
+.assistant-planning__tags {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+}
+
+.assistant-planning__tag,
+.assistant-planning__clear {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  height: 28px;
+  padding: 0 10px;
+  border-radius: 999px;
+  border: 1px solid rgba(57, 86, 178, 0.16);
+  background: #f6f8ff;
+  color: #3559c3;
+  font-size: 12px;
+  font-weight: 600;
+}
+
+.assistant-planning__clear {
+  border-style: dashed;
+  background: #ffffff;
+  color: #64748b;
+}
+
+.assistant-planning__panel {
+  display: grid;
+  gap: 14px;
+}
+
+.assistant-planning__panel-header strong {
+  display: block;
+  color: #1f2937;
+  font-size: 15px;
+}
+
+.assistant-planning__panel-header p {
+  margin: 6px 0 0;
+  color: #6b7280;
+  font-size: 12px;
+  line-height: 1.5;
+}
+
+.assistant-planning__loading,
+.assistant-planning__list {
+  display: grid;
+  gap: 8px;
+  max-height: 260px;
+  overflow-y: auto;
+}
+
+.assistant-planning__loading-item {
+  height: 62px;
+  border-radius: 16px;
+  background: linear-gradient(90deg, rgba(241, 245, 249, 0.9), rgba(226, 232, 240, 0.72), rgba(241, 245, 249, 0.9));
+}
+
+.assistant-planning__item {
+  width: 100%;
+  display: grid;
+  grid-template-columns: auto minmax(0, 1fr) auto;
+  align-items: center;
+  gap: 12px;
+  padding: 12px 14px;
+  border: 1px solid rgba(15, 23, 42, 0.08);
+  border-radius: 16px;
+  background: #ffffff;
+  text-align: left;
+  transition: border-color 0.15s ease, background-color 0.15s ease, transform 0.15s ease;
+}
+
+.assistant-planning__item:hover {
+  border-color: rgba(57, 86, 178, 0.24);
+  background: #fafcff;
+}
+
+.assistant-planning__item--selected {
+  border-color: rgba(57, 86, 178, 0.28);
+  background: #f5f8ff;
+}
+
+.assistant-planning__item-check {
+  width: 16px;
+  height: 16px;
+  border-radius: 999px;
+  border: 1.5px solid rgba(148, 163, 184, 0.8);
+  background: #ffffff;
+}
+
+.assistant-planning__item-check--selected {
+  border-color: #3357c2;
+  background: radial-gradient(circle at center, #3357c2 0 45%, transparent 46%);
+}
+
+.assistant-planning__item-body {
+  min-width: 0;
+  display: grid;
+  gap: 4px;
+}
+
+.assistant-planning__item-body strong {
+  color: #1f2430;
+  font-size: 13px;
+}
+
+.assistant-planning__item-body small {
+  color: #64748b;
+  font-size: 12px;
+}
+
+.assistant-planning__item-slots {
+  color: #475569;
+  font-size: 12px;
+  font-weight: 600;
+}
+
+.assistant-planning__empty {
+  padding: 18px 16px;
+  border-radius: 16px;
+  background: #f8fafc;
+  color: #64748b;
+  font-size: 13px;
+  line-height: 1.6;
+}
+
+.assistant-planning__panel-actions {
+  display: flex;
+  justify-content: flex-end;
+  gap: 10px;
+}
+
+.assistant-planning__panel-button {
+  height: 34px;
+  padding: 0 14px;
+  border-radius: 999px;
+  border: 1px solid rgba(15, 23, 42, 0.1);
+  font-size: 13px;
+  font-weight: 600;
+}
+
+.assistant-planning__panel-button--ghost {
+  background: #ffffff;
+  color: #475569;
+}
+
+.assistant-planning__panel-button--primary {
+  border-color: transparent;
+  background: #3357c2;
+  color: #ffffff;
+}
+
+:global(.assistant-planning-popover) {
+  padding: 14px;
+  border-radius: 20px;
+  border: 1px solid rgba(203, 213, 225, 0.78);
+  box-shadow: 0 18px 44px rgba(15, 23, 42, 0.14);
+}
+</style>
--- a/frontend/src/components/dashboard/AssistantPanel.vue
+++ b/frontend/src/components/dashboard/AssistantPanel.vue
@@ -1,8 +1,11 @@
 <script setup lang="ts">
-import { computed, onBeforeUnmount, onMounted, reactive, ref, watch } from 'vue'
+import { computed, nextTick, onBeforeUnmount, onMounted, reactive, ref, watch } from 'vue'
 import { ElMessage } from 'element-plus'

+import ContextWindowMeter from '@/components/assistant/ContextWindowMeter.vue'
+import TaskClassPlanningPicker from '@/components/assistant/TaskClassPlanningPicker.vue'
 import {
+  getContextStats,
  getConversationHistory,
  getConversationList,
  getConversationMeta,
@@ -12,9 +15,12 @@ import { refreshToken } from '@/api/auth'
 import { useAuthStore } from '@/stores/auth'
 import type {
  AssistantMessage,
+  ChatRequestExtra,
  ChatStreamRequest,
+  ConversationContextStats,
  ConversationListItem,
  ConversationMeta,
+  ThinkingModeType,
 } from '@/types/dashboard'
 import { formatConversationTime, formatMessageTime } from '@/utils/date'
 import { renderMarkdown } from '@/utils/markdown'
@@ -73,6 +79,7 @@ const authStore = useAuthStore()

 const assistantBodyRef = ref<HTMLElement | null>(null)
 const messageViewportRef = ref<HTMLElement | null>(null)
+const historyContentRef = ref<HTMLElement | null>(null)

 const conversationLoading = ref(false)
 const conversationLoadingMore = ref(false)
@@ -80,13 +87,14 @@ const chatLoading = ref(false)
 const historyExpanded = ref(true)
 const selectedConversationId = ref('')
 const selectedModel = ref<ModelType>('worker')
-const thinkingEnabled = ref(false)
+const selectedThinkingMode = ref<ThinkingModeType>('auto')
 const messageInput = ref('')
 const historyPanelWidth = ref(props.initialHistoryWidth)
 const activeStreamingMessageId = ref('')
 const editingUserMessageId = ref('')
 const editingUserMessageDraft = ref('')
 const retryVisiblePageMap = reactive<Record<string, number>>({})
+const pendingPlanningTaskClassIds = ref<number[]>([])

 const conversationPage = ref(1)
 const conversationPageSize = 12
@@ -101,6 +109,9 @@ const thinkingMessageMap = reactive<Record<string, boolean>>({})
 const reasoningCollapsedMap = reactive<Record<string, boolean>>({})
 const reasoningStartedAtMap = reactive<Record<string, number>>({})
 const reasoningDurationMap = reactive<Record<string, number>>({})
+const conversationContextStatsMap = reactive<Record<string, ConversationContextStats | null>>({})
+const conversationContextStatsLoadingMap = reactive<Record<string, boolean>>({})
+const conversationContextStatsReadyMap = reactive<Record<string, boolean>>({})

 const quickActions = [
  '帮我梳理今天最重要的三件事',
@@ -110,6 +121,7 @@ const quickActions = [
 ]

 const MODEL_PREFERENCE_STORAGE_KEY = 'smartflow.assistant.model.byConversation.v1'
+const DEFAULT_PLANNING_PROMPT = '请基于这些任务类帮我做一版智能编排。'

 let messageScrollRaf = 0
 let messageScrollReleaseRaf = 0
@@ -304,6 +316,26 @@ const shouldShowHistoryFallback = computed(() => {
  )
 })

+const selectedConversationContextStats = computed(() => {
+  const conversationId = selectedConversationId.value
+  if (!conversationId || isDraftConversationId(conversationId)) {
+    return null
+  }
+  return conversationContextStatsMap[conversationId] ?? null
+})
+
+const contextStatsLoading = computed(() => {
+  const conversationId = selectedConversationId.value
+  if (!conversationId) {
+    return false
+  }
+  return conversationContextStatsLoadingMap[conversationId] === true
+})
+
+const contextStatsDisabled = computed(() => {
+  return !selectedConversationId.value || isDraftConversationId(selectedConversationId.value)
+})
+
 function isModelType(value: unknown): value is ModelType {
  return value === 'worker' || value === 'strategist'
 }
@@ -1054,6 +1086,8 @@ async function ensureSelectedConversationAfterListLoad() {
 // 2. reset=false 时只在还有更多数据且当前不在加载时继续拉下一页，避免重复请求。
 // 3. 接口失败时保留现有列表，不清空本地草稿会话，防止用户当前上下文丢失。
 async function loadConversationListData(reset = false) {
+  let loadSucceeded = false
+
  if (reset) {
    conversationPage.value = 1
    conversationHasMore.value = false
@@ -1082,12 +1116,38 @@ async function loadConversationListData(reset = false) {
    conversationPage.value += 1
    conversationListReady.value = true
    await ensureSelectedConversationAfterListLoad()
+    loadSucceeded = true
  } catch (error) {
    ElMessage.warning(error instanceof Error ? error.message : '会话列表加载失败，请稍后重试')
  } finally {
    conversationLoading.value = false
    conversationLoadingMore.value = false
  }
+
+  if (loadSucceeded) {
+    await ensureHistoryPanelCanScroll()
+  }
+}
+
+// ensureHistoryPanelCanScroll 负责在“首屏列表不足以形成滚动条”时自动补拉后续分页。
+// 职责边界：
+// 1. 只处理左侧历史列表的可滚动性，不参与会话选中、标题计算等业务逻辑。
+// 2. 仅当容器已经渲染完成、且当前内容高度仍未超过可视高度时才继续拉下一页，避免无意义请求。
+// 3. 若已经到底、容器不存在，或当前正在加载，则直接停止，防止递归触发形成请求风暴。
+async function ensureHistoryPanelCanScroll() {
+  await nextTick()
+
+  const container = historyContentRef.value
+  if (!container || conversationLoading.value || conversationLoadingMore.value || !conversationHasMore.value) {
+    return
+  }
+
+  const canScroll = container.scrollHeight - container.clientHeight > 1
+  if (canScroll) {
+    return
+  }
+
+  await loadConversationListData(false)
 }

 function handleHistoryScroll(event: Event) {
@@ -1212,11 +1272,39 @@ async function ensureConversationMeta(conversationId: string) {
  }
 }

+async function loadConversationContextStats(conversationId: string, forceReload = false) {
+  // 1. draft 会话还没有稳定 chat_id，直接请求只会得到无意义的空结果，因此这里提前短路。
+  // 2. 已经读过且本轮没有强制刷新时复用本地缓存，避免切换同一会话时重复打点接口。
+  // 3. 接口失败时统一回退为 null 占位，不在切会话时弹错误，避免把增强信息做成高频打扰。
+  if (!conversationId || isDraftConversationId(conversationId)) {
+    return
+  }
+
+  if (!forceReload && conversationContextStatsReadyMap[conversationId] === true) {
+    return
+  }
+
+  conversationContextStatsLoadingMap[conversationId] = true
+  try {
+    conversationContextStatsMap[conversationId] = await getContextStats(conversationId)
+    conversationContextStatsReadyMap[conversationId] = true
+  } catch {
+    delete conversationContextStatsMap[conversationId]
+    conversationContextStatsReadyMap[conversationId] = false
+  } finally {
+    conversationContextStatsLoadingMap[conversationId] = false
+  }
+}
+
 async function selectConversation(conversationId: string) {
  cancelEditUserMessage()
  selectedConversationId.value = conversationId
  applyPreferredModelForConversation(conversationId)
-  await Promise.allSettled([loadConversationMessages(conversationId), ensureConversationMeta(conversationId)])
+  await Promise.allSettled([
+    loadConversationMessages(conversationId),
+    ensureConversationMeta(conversationId),
+    loadConversationContextStats(conversationId),
+  ])
  scheduleScrollMessagesToBottom(false, true)
 }

@@ -1228,6 +1316,48 @@ function startNewConversation() {
  shouldAutoFollowMessages.value = true
 }

+interface RetryRequestExtra {
+  retryGroupId: string
+  retryFromUserMessageId: string | number
+  retryFromAssistantMessageId: string | number
+}
+
+function isManualThinkingEnabled(mode: ThinkingModeType) {
+  return mode === 'true'
+}
+
+function buildChatRequestExtra(
+  planningTaskClassIds: number[] = [],
+  retryExtra?: RetryRequestExtra,
+): ChatRequestExtra | undefined {
+  // 1. retry 与“新一轮智能编排”属于互斥语义：retry 必须严格指向既有历史消息，不应再混入新的任务类上下文。
+  // 2. 因此只有普通发送链路才透传 task_class_ids，避免 regenerate 时把当前输入区的临时选择误带进历史重试。
+  // 3. 若本轮没有任何附加上下文，则返回 undefined，保持请求体尽量精简。
+  if (retryExtra) {
+    return {
+      request_mode: 'retry',
+      retry_group_id: retryExtra.retryGroupId,
+      retry_from_user_message_id: retryExtra.retryFromUserMessageId,
+      retry_from_assistant_message_id: retryExtra.retryFromAssistantMessageId,
+    }
+  }
+
+  if (planningTaskClassIds.length <= 0) {
+    return undefined
+  }
+
+  return {
+    task_class_ids: [...planningTaskClassIds],
+  }
+}
+
+function handlePlanningSelectionApplied(taskClassIds: number[]) {
+  if (taskClassIds.length <= 0 || messageInput.value.trim()) {
+    return
+  }
+  messageInput.value = DEFAULT_PLANNING_PROMPT
+}
+
 // fetchChatStream 负责以 fetch 方式发起聊天请求，并处理一次 refresh token 自动重试。
 // 职责边界：
 // 1. 只负责把请求发出去并返回原始 Response，不在这里解析 SSE 数据。
@@ -1279,7 +1409,7 @@ function prepareAssistantMessageForStreaming(message: AssistantMessage, createdA
  message.content = ''
  message.reasoning = ''
  message.createdAt = createdAt
-  thinkingMessageMap[message.id] = thinkingEnabled.value
+  thinkingMessageMap[message.id] = isManualThinkingEnabled(selectedThinkingMode.value)
  reasoningCollapsedMap[message.id] = false
  delete reasoningStartedAtMap[message.id]
  delete reasoningDurationMap[message.id]
@@ -1367,25 +1497,14 @@ async function streamAssistantReply(
  assistantMessage: AssistantMessage,
  createdAt: string,
  refreshPreview: boolean,
-  retryExtra?: {
-    retryGroupId: string
-    retryFromUserMessageId: string | number
-    retryFromAssistantMessageId: string | number
-  },
+  requestExtra?: ChatRequestExtra,
 ) : Promise<string> {
  const response = await fetchChatStream({
    conversation_id: isDraftConversationId(draftConversationId) ? undefined : draftConversationId,
    message: text,
    model: selectedModel.value,
-    thinking: thinkingEnabled.value,
-    extra: retryExtra
-      ? {
-          request_mode: 'retry',
-          retry_group_id: retryExtra.retryGroupId,
-          retry_from_user_message_id: retryExtra.retryFromUserMessageId,
-          retry_from_assistant_message_id: retryExtra.retryFromAssistantMessageId,
-        }
-      : undefined,
+    thinking: selectedThinkingMode.value,
+    extra: requestExtra,
  })

  const responseConversationId = response.headers.get('X-Conversation-ID')?.trim()
@@ -1449,8 +1568,13 @@ async function sendMessage(preset?: string) {

  chatLoading.value = true

-  const draftConversationId = selectedConversationId.value || createDraftConversationId()
-  if (!selectedConversationId.value) {
+  const planningTaskClassIdsForRequest = [...pendingPlanningTaskClassIds.value]
+  const shouldStartFreshPlanningConversation = planningTaskClassIdsForRequest.length > 0
+  const draftConversationId = shouldStartFreshPlanningConversation
+    ? createDraftConversationId()
+    : (selectedConversationId.value || createDraftConversationId())
+
+  if (!selectedConversationId.value || shouldStartFreshPlanningConversation) {
    selectedConversationId.value = draftConversationId
  }
  savePreferredModel(draftConversationId, selectedModel.value)
@@ -1474,7 +1598,7 @@ async function sendMessage(preset?: string) {
    reasoning: '',
  })

-  thinkingMessageMap[assistantMessage.id] = thinkingEnabled.value
+  thinkingMessageMap[assistantMessage.id] = isManualThinkingEnabled(selectedThinkingMode.value)
  reasoningCollapsedMap[assistantMessage.id] = false
  activeStreamingMessageId.value = assistantMessage.id

@@ -1483,8 +1607,21 @@ async function sendMessage(preset?: string) {
  scheduleScrollMessagesToBottom(false, true)

  try {
-    const actualConversationId = await streamAssistantReply(draftConversationId, text, assistantMessage, now, true)
-    await loadConversationMessages(actualConversationId, true)
+    const actualConversationId = await streamAssistantReply(
+      draftConversationId,
+      text,
+      assistantMessage,
+      now,
+      true,
+      buildChatRequestExtra(planningTaskClassIdsForRequest),
+    )
+    if (planningTaskClassIdsForRequest.length > 0) {
+      pendingPlanningTaskClassIds.value = []
+    }
+    await Promise.allSettled([
+      loadConversationMessages(actualConversationId, true),
+      loadConversationContextStats(actualConversationId, true),
+    ])
  } catch (error) {
    if (!assistantMessage.content.trim()) {
      assistantMessage.content = '本次回复已中断，请稍后重试。'
@@ -1562,12 +1699,22 @@ async function regenerateAssistantMessage(message: AssistantMessage) {
  scheduleScrollMessagesToBottom(false, true)

  try {
-    const actualConversationId = await streamAssistantReply(conversationId, text, retryAssistantMessage, now, true, {
-      retryGroupId,
-      retryFromUserMessageId: retrySource.persistedUserMessageId,
-      retryFromAssistantMessageId: retrySource.persistedAssistantMessageId,
-    })
-    await loadConversationMessages(actualConversationId, true)
+    const actualConversationId = await streamAssistantReply(
+      conversationId,
+      text,
+      retryAssistantMessage,
+      now,
+      true,
+      buildChatRequestExtra([], {
+        retryGroupId,
+        retryFromUserMessageId: retrySource.persistedUserMessageId,
+        retryFromAssistantMessageId: retrySource.persistedAssistantMessageId,
+      }),
+    )
+    await Promise.allSettled([
+      loadConversationMessages(actualConversationId, true),
+      loadConversationContextStats(actualConversationId, true),
+    ])
  } catch (error) {
    if (!retryAssistantMessage.content.trim()) {
      retryAssistantMessage.content = '重新生成失败，请稍后重试。'
@@ -1675,7 +1822,7 @@ onBeforeUnmount(() => {
            </button>
          </div>

-          <div class="assistant-history__content" @scroll="handleHistoryScroll">
+          <div ref="historyContentRef" class="assistant-history__content" @scroll="handleHistoryScroll">
            <button type="button" class="assistant-history__new" @click="startNewConversation">
              <span class="assistant-history__new-icon" aria-hidden="true">
                <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
@@ -1945,6 +2092,12 @@ onBeforeUnmount(() => {
          <div class="aaff8b8f">
            <div class="_77cefa5 _9996a53">
              <div class="_020ab5b">
+                <TaskClassPlanningPicker
+                  v-model="pendingPlanningTaskClassIds"
+                  :disabled="chatLoading"
+                  @applied="handlePlanningSelectionApplied"
+                />
+
                <div class="_24fad49">
                  <textarea
                    v-model="messageInput"
@@ -1957,20 +2110,21 @@ onBeforeUnmount(() => {
                </div>

                <div class="ec4f5d61">
-                  <button
-                    type="button"
-                    class="ds-atom-button f79352dc ds-toggle-button ds-toggle-button--md"
-                    :class="{ 'ds-toggle-button--selected': thinkingEnabled }"
-                    @click="thinkingEnabled = !thinkingEnabled"
-                  >
-                    <div class="ds-icon ds-atom-button__icon">
-                      <svg width="14" height="14" viewBox="0 0 14 14" fill="none" xmlns="http://www.w3.org/2000/svg">
-                        <path d="M7.06428 5.93342C7.6876 5.93342 8.19304 6.43904 8.19319 7.06233C8.19319 7.68573 7.68769 8.19123 7.06428 8.19123C6.44096 8.19113 5.93537 7.68567 5.93537 7.06233C5.93552 6.43911 6.44105 5.93353 7.06428 5.93342Z" fill="currentColor" />
-                        <path fill-rule="evenodd" clip-rule="evenodd" d="M8.68147 0.963693C10.1168 0.447019 11.6266 0.374829 12.5633 1.31135C13.5 2.24805 13.4276 3.75776 12.911 5.19319C12.7126 5.74431 12.4385 6.31796 12.0965 6.89729C12.4969 7.54638 12.8141 8.19018 13.036 8.80647C13.5527 10.2419 13.625 11.7516 12.6883 12.6883C11.7516 13.625 10.2419 13.5527 8.80647 13.036C8.19019 12.8141 7.54638 12.4969 6.89729 12.0965C6.31794 12.4386 5.74432 12.7125 5.19319 12.911C3.75774 13.4276 2.24807 13.5 1.31135 12.5633C0.374829 11.6266 0.447019 10.1168 0.963693 8.68147C1.17182 8.10338 1.46318 7.50063 1.82893 6.8924C1.52179 6.35711 1.27232 5.82825 1.08869 5.31819C0.572038 3.88278 0.499683 2.37306 1.43635 1.43635C2.37304 0.499655 3.88277 0.572044 5.31819 1.08869C5.82825 1.27232 6.35712 1.5218 6.8924 1.82893C7.50063 1.46318 8.10338 1.17181 8.68147 0.963693ZM11.3572 8.01154C10.9083 8.62253 10.3901 9.22873 9.8094 9.8094C9.22874 10.3901 8.62252 10.9083 8.01154 11.3572C8.42567 11.5841 8.82867 11.7688 9.21272 11.9071C10.5455 12.3868 11.4246 12.2547 11.8397 11.8397C12.2547 11.4246 12.3869 10.5456 11.9071 9.21272C11.7688 8.82866 11.5841 8.42568 11.3572 8.01154ZM2.56526 8.02912C2.3734 8.39322 2.21492 8.74796 2.0926 9.08772C1.61288 10.4204 1.74509 11.2995 2.15998 11.7147C2.57502 12.1297 3.45412 12.2618 4.78694 11.7821C5.11053 11.6656 5.44783 11.5164 5.79377 11.3367C5.24897 10.9223 4.70919 10.4533 4.19026 9.9344C3.57575 9.31987 3.03166 8.67633 2.56526 8.02912ZM6.90705 3.2469C6.24062 3.70479 5.56457 4.26321 4.91389 4.91389C4.26322 5.56456 3.70479 6.24063 3.2469 6.90705C3.72671 7.63325 4.32774 8.37459 5.03889 9.08576C5.6494 9.69627 6.2818 10.2265 6.90803 10.6678C7.59365 10.2025 8.29077 9.63076 8.96076 8.96076C9.63077 8.29075 10.2025 7.59366 10.6678 6.90803C10.2265 6.2818 9.69628 5.6494 9.08576 5.03889C8.37459 4.32773 7.63325 3.72672 6.90705 3.2469ZM11.7147 2.15998C11.2995 1.74509 10.4204 1.61288 9.08772 2.0926C8.74832 2.21479 8.39379 2.37271 8.0301 2.56428C8.67725 3.03065 9.31992 3.5758 9.9344 4.19026C10.4533 4.7092 10.9223 5.24896 11.3367 5.79377C11.5164 5.44785 11.6656 5.11052 11.7821 4.78694C12.2618 3.45416 12.1297 2.57502 11.7147 2.15998ZM4.91194 2.2176C3.57918 1.73788 2.70001 1.86995 2.28498 2.28498C1.86998 2.70003 1.73788 3.5792 2.2176 4.91194C2.31706 5.18822 2.44109 5.47427 2.58674 5.7674C3.01928 5.1887 3.51471 4.6158 4.06526 4.06526C4.61581 3.5147 5.18869 3.01928 5.7674 2.58674C5.47428 2.4411 5.18821 2.31706 4.91194 2.2176Z" fill="currentColor" />
-                      </svg>
-                    </div>
-                    <span><span class="_6dbc175">深度思考</span></span>
-                  </button>
+                  <div class="assistant-toolbar__pill assistant-toolbar__pill--select assistant-toolbar__pill--ds-thinking">
+                    <span class="assistant-toolbar__select-label">思考</span>
+                    <el-select
+                      v-model="selectedThinkingMode"
+                      class="assistant-toolbar__select-box assistant-toolbar__select-box--thinking"
+                      size="small"
+                      popper-class="assistant-thinking-select-panel"
+                      placement="top-start"
+                      :teleported="true"
+                    >
+                      <el-option value="auto" label="自动" />
+                      <el-option value="true" label="开启" />
+                      <el-option value="false" label="关闭" />
+                    </el-select>
+                  </div>

                  <div class="assistant-toolbar__pill assistant-toolbar__pill--select assistant-toolbar__pill--ds-model">
                    <span class="assistant-toolbar__select-label">模型</span>
@@ -1987,6 +2141,13 @@ onBeforeUnmount(() => {
                    </el-select>
                  </div>

+                  <ContextWindowMeter
+                    class="assistant-toolbar__context-meter"
+                    :stats="selectedConversationContextStats"
+                    :loading="contextStatsLoading"
+                    :disabled="contextStatsDisabled"
+                  />
+
                  <label class="f02f0e25 ds-icon-button ds-icon-button--l ds-icon-button--sizing-container" role="button" aria-disabled="false">
                    <div class="ds-icon-button__hover-bg" />
                    <div class="ds-icon">
@@ -2977,6 +3138,7 @@ onBeforeUnmount(() => {
  display: flex;
  align-items: center;
  gap: 8px;
+  min-width: 0;
  padding: 8px 10px 10px;
 }

@@ -3021,7 +3183,8 @@ onBeforeUnmount(() => {
  font-weight: 600;
 }

-.assistant-toolbar__pill--ds-model {
+.assistant-toolbar__pill--ds-model,
+.assistant-toolbar__pill--ds-thinking {
  height: 32px;
  padding: 0 8px 0 10px;
  border: 1px solid rgba(15, 23, 42, 0.1);
@@ -3030,11 +3193,24 @@ onBeforeUnmount(() => {
  display: inline-flex;
  align-items: center;
  gap: 8px;
-  margin-right: auto;
-  min-width: 144px;
  flex: 0 0 auto;
 }

+.assistant-toolbar__pill--ds-thinking {
+  min-width: 138px;
+}
+
+.assistant-toolbar__pill--ds-model {
+  min-width: 144px;
+}
+
+.assistant-toolbar__context-meter {
+  width: 144px;
+  min-width: 144px;
+  flex: 0 0 144px;
+  margin-right: auto;
+}
+
 .assistant-toolbar__select-label {
  color: #4b5563;
  font-weight: 600;
@@ -3051,6 +3227,11 @@ onBeforeUnmount(() => {
  flex: 0 0 96px;
 }

+.assistant-toolbar__select-box--thinking {
+  min-width: 86px;
+  flex: 0 0 86px;
+}
+
 .assistant-toolbar__select-box :deep(.el-select__wrapper) {
  min-height: 28px;
  padding: 0 6px 0 8px;
@@ -3188,6 +3369,18 @@ onBeforeUnmount(() => {
    padding-left: 18px;
    padding-right: 18px;
  }
+
+  .ec4f5d61 {
+    flex-wrap: wrap;
+  }
+
+  .assistant-toolbar__context-meter {
+    width: 144px;
+    min-width: 144px;
+    flex-basis: 144px;
+    margin-right: 0;
+    order: 3;
+  }
 }

@media (max-width: 1280px) {
--- a/frontend/src/types/dashboard.ts
+++ b/frontend/src/types/dashboard.ts
@@ -93,10 +93,32 @@ export interface AssistantMessage {
  retryTotal?: number
 }

+export type ThinkingModeType = 'auto' | 'true' | 'false'
+
+export interface ChatRequestExtra {
+  task_class_ids?: number[]
+  request_mode?: 'retry'
+  retry_group_id?: string
+  retry_from_user_message_id?: string | number
+  retry_from_assistant_message_id?: string | number
+  confirm_action?: string
+  always_execute?: boolean
+  resume?: Record<string, unknown>
+}
+
+export interface ConversationContextStats {
+  msg0: number
+  msg1: number
+  msg2: number
+  msg3: number
+  total: number
+  budget: number
+}
+
 export interface ChatStreamRequest {
  conversation_id?: string
  message: string
  model?: string
-  thinking?: boolean
-  extra?: Record<string, unknown>
+  thinking?: ThinkingModeType
+  extra?: ChatRequestExtra
 }