From ddb0d9cc17bb953c24840ee80e083a9c8fbc74f5 Mon Sep 17 00:00:00 2001
From: Losita <2810873701@qq.com>
Date: Thu, 26 Mar 2026 22:15:16 +0800
Subject: [PATCH] =?UTF-8?q?Version:=200.8.1.dev.260326=20=E5=90=8E?=
 =?UTF-8?q?=E7=AB=AF=EF=BC=9A=201.=E8=8E=B7=E5=8F=96agent=E8=81=8A?=
 =?UTF-8?q?=E5=A4=A9=E5=8E=86=E5=8F=B2=E8=AE=B0=E5=BD=95=E6=8E=A5=E5=8F=A3?=
 =?UTF-8?q?=E5=81=9A=E4=BA=86=E5=A6=82=E4=B8=8B=E6=9B=B4=E6=94=B9=EF=BC=9A?=
 =?UTF-8?q?=20(1)=E5=AF=B9reasoning=5Fcontent=E4=B9=9F=E5=81=9A=E4=BA=86?=
 =?UTF-8?q?=E5=AD=98=E5=82=A8=EF=BC=8C=E5=90=8C=E6=AD=A5=E6=9B=B4=E6=94=B9?=
 =?UTF-8?q?=E4=BA=86mysql=E5=92=8Credis=E7=BC=93=E5=AD=98=E7=9A=84?=
 =?UTF-8?q?=E8=AF=BB=E5=86=99=E9=80=BB=E8=BE=91=20(2)=E4=B8=BA=E4=BA=86?=
 =?UTF-8?q?=E6=89=BF=E6=8E=A5=E5=89=8D=E7=AB=AF=E7=9A=84=E9=87=8D=E8=AF=95?=
 =?UTF-8?q?/=E4=BF=AE=E6=94=B9=E6=B6=88=E6=81=AF=E7=9A=84=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91=EF=BC=8C=E8=BF=9B=E8=A1=8C=E4=BA=86=E4=B8=80=E4=BA=9B?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=E5=92=8C=E8=A1=A8=E5=8D=95=E4=B8=8A=E7=9A=84?=
 =?UTF-8?q?=E6=94=B9=E5=8A=A8=20=E5=89=8D=E7=AB=AF=EF=BC=9A=201.agent?=
 =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E6=96=B0=E5=A2=9E=E4=BA=86=E5=BE=88=E5=A4=9A?=
 =?UTF-8?q?=E5=B0=8F=E7=BB=84=E4=BB=B6=EF=BC=8C=E6=94=B9=E5=96=84=E4=BA=A4?=
 =?UTF-8?q?=E4=BA=92=E4=BD=93=E9=AA=8C=202.=E6=96=B0=E5=A2=9E=E9=87=8D?=
 =?UTF-8?q?=E8=AF=95=E6=B6=88=E6=81=AF/=E4=BF=AE=E6=94=B9=E6=B6=88?=
 =?UTF-8?q?=E6=81=AF=E5=B9=B6=E9=87=8D=E6=96=B0=E5=8F=91=E9=80=81=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=EF=BC=8C=E5=89=8D=E8=80=85=E6=9C=89bug=EF=BC=8C?=
 =?UTF-8?q?=E5=8F=AF=E8=83=BD=E5=89=8D=E5=90=8E=E7=AB=AF=E9=83=BD=E6=9C=89?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E5=BE=85=E4=BF=AE=E5=A4=8D=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 backend/agent2/chat/stream.go                 |  44 +-
 backend/conv/agent.go                         |  46 +-
 backend/dao/agent-cache.go                    | 117 +++
 backend/dao/agent.go                          | 179 ++--
 backend/model/agent.go                        | 132 +--
 backend/service/agentsvc/agent.go             | 276 +++++-
 backend/service/agentsvc/agent_history.go     | 171 +++-
 backend/service/agentsvc/agent_quick_note.go  |  99 ++-
 .../service/events/chat_history_persist.go    |   6 +
 frontend/src/api/agent.ts                     |  50 +-
 .../components/dashboard/AssistantPanel.vue   | 812 ++++++++++++++++--
 frontend/src/types/dashboard.ts               |   3 +
 openapi.yaml                                  | 215 +++--
 13 files changed, 1828 insertions(+), 322 deletions(-)

diff --git a/backend/agent2/chat/stream.go b/backend/agent2/chat/stream.go
index beef18e..cd8831c 100644
--- a/backend/agent2/chat/stream.go
+++ b/backend/agent2/chat/stream.go
@@ -29,7 +29,8 @@ func StreamChat(
 	traceID string,
 	chatID string,
 	requestStart time.Time,
-) (string, *schema.TokenUsage, error) {
+	reasoningStartAt *time.Time,
+) (string, string, int, *schema.TokenUsage, error) {
 	/*callStart := time.Now()*/
 
 	messages := make([]*schema.Message, 0)
@@ -49,7 +50,7 @@ func StreamChat(
 	/*connectStart := time.Now()*/
 	reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
 	if err != nil {
-		return "", nil, err
+		return "", "", 0, nil, err
 	}
 	defer reader.Close()
 
@@ -61,6 +62,12 @@ func StreamChat(
 	firstChunk := true
 	chunkCount := 0
 	var tokenUsage *schema.TokenUsage
+	var localReasoningStartAt *time.Time
+	if reasoningStartAt != nil && !reasoningStartAt.IsZero() {
+		startCopy := reasoningStartAt.In(time.Local)
+		localReasoningStartAt = &startCopy
+	}
+	var reasoningEndAt *time.Time
 	/*streamRecvStart := time.Now()
 
 	log.Printf("打点|流连接建立|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d|history_len=%d",
@@ -73,13 +80,14 @@ func StreamChat(
 	)*/
 
 	var fullText strings.Builder
+	var reasoningText strings.Builder
 	for {
 		chunk, err := reader.Recv()
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
-			return "", nil, err
+			return "", "", 0, nil, err
 		}
 
 		// 优先记录模型真实 usage（通常在尾块返回，部分模型也可能中途返回）。
@@ -87,11 +95,22 @@ func StreamChat(
 			tokenUsage = agentllm.MergeUsage(tokenUsage, chunk.ResponseMeta.Usage)
 		}
 
-		fullText.WriteString(chunk.Content)
+		if chunk != nil {
+			if strings.TrimSpace(chunk.ReasoningContent) != "" && localReasoningStartAt == nil {
+				now := time.Now()
+				localReasoningStartAt = &now
+			}
+			if strings.TrimSpace(chunk.Content) != "" && localReasoningStartAt != nil && reasoningEndAt == nil {
+				now := time.Now()
+				reasoningEndAt = &now
+			}
+			fullText.WriteString(chunk.Content)
+			reasoningText.WriteString(chunk.ReasoningContent)
+		}
 
 		payload, err := agentstream.ToOpenAIStream(chunk, requestID, modelName, created, firstChunk)
 		if err != nil {
-			return "", nil, err
+			return "", "", 0, nil, err
 		}
 		if payload != "" {
 			outChan <- payload
@@ -112,7 +131,7 @@ func StreamChat(
 
 	finishChunk, err := agentstream.ToOpenAIFinishStream(requestID, modelName, created)
 	if err != nil {
-		return "", nil, err
+		return "", "", 0, nil, err
 	}
 	outChan <- finishChunk
 	outChan <- "[DONE]"
@@ -127,5 +146,16 @@ func StreamChat(
 		time.Since(requestStart).Milliseconds(),
 	)*/
 
-	return fullText.String(), tokenUsage, nil
+	reasoningDurationSeconds := 0
+	if localReasoningStartAt != nil {
+		if reasoningEndAt == nil {
+			now := time.Now()
+			reasoningEndAt = &now
+		}
+		if reasoningEndAt.After(*localReasoningStartAt) {
+			reasoningDurationSeconds = int(reasoningEndAt.Sub(*localReasoningStartAt) / time.Second)
+		}
+	}
+
+	return fullText.String(), reasoningText.String(), reasoningDurationSeconds, tokenUsage, nil
 }
diff --git a/backend/conv/agent.go b/backend/conv/agent.go
index 48c46b7..41c07a6 100644
--- a/backend/conv/agent.go
+++ b/backend/conv/agent.go
@@ -10,7 +10,7 @@ func ToEinoMessages(dbMsgs []model.ChatHistory) []*schema.Message {
 	res := make([]*schema.Message, 0)
 	for _, m := range dbMsgs {
 		var role schema.RoleType
-		switch *m.Role {
+		switch safeChatHistoryRole(m.Role) {
 		case "user":
 			role = schema.User
 		case "assistant":
@@ -18,10 +18,46 @@ func ToEinoMessages(dbMsgs []model.ChatHistory) []*schema.Message {
 		default:
 			role = schema.System
 		}
-		res = append(res, &schema.Message{
-			Role:    role,
-			Content: *m.MessageContent,
-		})
+		msg := &schema.Message{
+			Role:             role,
+			Content:          safeChatHistoryText(m.MessageContent),
+			ReasoningContent: safeChatHistoryText(m.ReasoningContent),
+		}
+		extra := make(map[string]any)
+		extra["history_id"] = m.ID
+		if m.ReasoningDurationSeconds > 0 {
+			extra["reasoning_duration_seconds"] = m.ReasoningDurationSeconds
+		}
+		if m.RetryGroupID != nil && *m.RetryGroupID != "" {
+			extra["retry_group_id"] = *m.RetryGroupID
+		}
+		if m.RetryIndex != nil && *m.RetryIndex > 0 {
+			extra["retry_index"] = *m.RetryIndex
+		}
+		if m.RetryFromUserMessageID != nil && *m.RetryFromUserMessageID > 0 {
+			extra["retry_from_user_message_id"] = *m.RetryFromUserMessageID
+		}
+		if m.RetryFromAssistantMessageID != nil && *m.RetryFromAssistantMessageID > 0 {
+			extra["retry_from_assistant_message_id"] = *m.RetryFromAssistantMessageID
+		}
+		if len(extra) > 0 {
+			msg.Extra = extra
+		}
+		res = append(res, msg)
 	}
 	return res
 }
+
+func safeChatHistoryRole(role *string) string {
+	if role == nil {
+		return ""
+	}
+	return *role
+}
+
+func safeChatHistoryText(text *string) string {
+	if text == nil {
+		return ""
+	}
+	return *text
+}
diff --git a/backend/dao/agent-cache.go b/backend/dao/agent-cache.go
index aad6d18..39edaa6 100644
--- a/backend/dao/agent-cache.go
+++ b/backend/dao/agent-cache.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strconv"
+	"strings"
 	"time"
 
 	"github.com/cloudwego/eino/schema"
@@ -163,6 +164,79 @@ func (m *AgentCache) BackfillHistory(ctx context.Context, sessionID string, mess
 	return err
 }
 
+func (m *AgentCache) ApplyRetrySeed(ctx context.Context, sessionID, retryGroupID string, sourceUserMessageID, sourceAssistantMessageID int) error {
+	if m == nil || m.client == nil {
+		return nil
+	}
+	groupID := strings.TrimSpace(retryGroupID)
+	if groupID == "" {
+		return nil
+	}
+
+	vals, err := m.client.LRange(ctx, m.historyKey(sessionID), 0, -1).Result()
+	if err != nil {
+		return err
+	}
+	if len(vals) == 0 {
+		return nil
+	}
+
+	changed := false
+	targets := map[int]struct{}{}
+	if sourceUserMessageID > 0 {
+		targets[sourceUserMessageID] = struct{}{}
+	}
+	if sourceAssistantMessageID > 0 {
+		targets[sourceAssistantMessageID] = struct{}{}
+	}
+	if len(targets) == 0 {
+		return nil
+	}
+
+	indexOne := 1
+	for idx, raw := range vals {
+		var msg schema.Message
+		if err := json.Unmarshal([]byte(raw), &msg); err != nil {
+			return err
+		}
+		historyID := extractMessageHistoryID(&msg)
+		if historyID <= 0 {
+			continue
+		}
+		if _, ok := targets[historyID]; !ok {
+			continue
+		}
+		if msg.Extra == nil {
+			msg.Extra = make(map[string]any)
+		}
+		msg.Extra["retry_group_id"] = groupID
+		msg.Extra["retry_index"] = indexOne
+		updated, err := json.Marshal(&msg)
+		if err != nil {
+			return err
+		}
+		vals[idx] = string(updated)
+		changed = true
+	}
+
+	if !changed {
+		return nil
+	}
+
+	pipe := m.client.Pipeline()
+	key := m.historyKey(sessionID)
+	pipe.Del(ctx, key)
+	values := make([]interface{}, 0, len(vals))
+	for _, item := range vals {
+		values = append(values, item)
+	}
+	pipe.RPush(ctx, key, values...)
+	pipe.LTrim(ctx, key, 0, int64(len(vals)-1))
+	pipe.Expire(ctx, key, m.expiration)
+	_, err = pipe.Exec(ctx)
+	return err
+}
+
 func (m *AgentCache) ClearHistory(ctx context.Context, sessionID string) error {
 	historyKey := m.historyKey(sessionID)
 	windowKey := m.historyWindowKey(sessionID)
@@ -188,3 +262,46 @@ func (m *AgentCache) DeleteConversationStatus(ctx context.Context, sessionID str
 	key := fmt.Sprintf("smartflow:conversation_status:%s", sessionID)
 	return m.client.Del(ctx, key).Err()
 }
+
+func extractMessageHistoryID(msg *schema.Message) int {
+	if msg == nil || msg.Extra == nil {
+		return 0
+	}
+	raw, ok := msg.Extra["history_id"]
+	if !ok {
+		return 0
+	}
+	// 1. history_id 主要来自 DB 回填，正常情况下是 number。
+	// 2. 但 Redis 往返、灰度期数据修复或手工写入时，仍可能出现字符串数字。
+	// 3. 这里做一次宽松解析，避免重试分组补种时因为类型差异找不到源消息。
+	switch v := raw.(type) {
+	case int:
+		return v
+	case int32:
+		return int(v)
+	case int64:
+		return int(v)
+	case float64:
+		return int(v)
+	case json.Number:
+		if parsed, err := v.Int64(); err == nil {
+			return int(parsed)
+		}
+		if parsed, err := v.Float64(); err == nil {
+			return int(parsed)
+		}
+		return 0
+	case string:
+		trimmed := strings.TrimSpace(v)
+		if trimmed == "" {
+			return 0
+		}
+		parsed, err := strconv.Atoi(trimmed)
+		if err != nil {
+			return 0
+		}
+		return parsed
+	default:
+		return 0
+	}
+}
diff --git a/backend/dao/agent.go b/backend/dao/agent.go
index d497092..ec49054 100644
--- a/backend/dao/agent.go
+++ b/backend/dao/agent.go
@@ -23,38 +23,52 @@ func (r *AgentDAO) WithTx(tx *gorm.DB) *AgentDAO {
 	return &AgentDAO{db: tx}
 }
 
-// saveChatHistoryCore 是“聊天消息落库 + 会话统计更新”的核心实现。
+// saveChatHistoryCore 鏄€滆亰澶╂秷鎭惤搴?+ 浼氳瘽缁熻鏇存柊鈥濈殑鏍稿績瀹炵幇銆?
 //
-// 职责边界：
-// 1. 只执行当前 DAO 句柄上的数据库写入动作；
-// 2. 不主动开启事务（事务由调用方决定）；
-// 3. 保证 chat_histories 与 agent_chats.message_count 的一致性口径。
+// 鑱岃矗杈圭晫锛?
+// 1. 鍙墽琛屽綋鍓?DAO 鍙ユ焺涓婄殑鏁版嵁搴撳啓鍏ュ姩浣滐紱
+// 2. 涓嶄富鍔ㄥ紑鍚簨鍔★紙浜嬪姟鐢辫皟鐢ㄦ柟鍐冲畾锛夛紱
+// 3. 淇濊瘉 chat_histories 涓?agent_chats.message_count 鐨勪竴鑷存€у彛寰勩€?
 //
-// 失败处理：
-// 1. 任一步骤失败都返回 error；
-// 2. 若调用方处于事务中，返回 error 会触发事务回滚。
-func (a *AgentDAO) saveChatHistoryCore(ctx context.Context, userID int, conversationID string, role, message string, tokensConsumed int) error {
-	// 0. token 入库前兜底：负数统一归零，避免异常值污染累计统计。
+// 澶辫触澶勭悊锛?
+// 1. 浠讳竴姝ラ澶辫触閮借繑鍥?error锛?
+// 2. 鑻ヨ皟鐢ㄦ柟澶勪簬浜嬪姟涓紝杩斿洖 error 浼氳Е鍙戜簨鍔″洖婊氥€?
+func (a *AgentDAO) saveChatHistoryCore(ctx context.Context, userID int, conversationID string, role, message, reasoningContent string, reasoningDurationSeconds int, retryGroupID *string, retryIndex *int, retryFromUserMessageID *int, retryFromAssistantMessageID *int, tokensConsumed int) error {
+	// 0. token 鍏ュ簱鍓嶅厹搴曪細璐熸暟缁熶竴褰掗浂锛岄伩鍏嶅紓甯稿€兼薄鏌撶疮璁＄粺璁°€?
 	if tokensConsumed < 0 {
 		tokensConsumed = 0
 	}
+	reasoningContent = strings.TrimSpace(reasoningContent)
+	if reasoningDurationSeconds < 0 {
+		reasoningDurationSeconds = 0
+	}
 
-	// 1. 先写 chat_histories 原始消息。
+	// 1. 鍏堝啓 chat_histories 鍘熷娑堟伅銆?
+	var reasoningContentPtr *string
+	if reasoningContent != "" {
+		reasoningContentPtr = &reasoningContent
+	}
 	userChat := model.ChatHistory{
-		UserID:         userID,
-		MessageContent: &message,
-		Role:           &role,
-		ChatID:         conversationID,
-		TokensConsumed: tokensConsumed,
+		UserID:                      userID,
+		MessageContent:              &message,
+		ReasoningContent:            reasoningContentPtr,
+		ReasoningDurationSeconds:    reasoningDurationSeconds,
+		RetryGroupID:                retryGroupID,
+		RetryIndex:                  retryIndex,
+		RetryFromUserMessageID:      retryFromUserMessageID,
+		RetryFromAssistantMessageID: retryFromAssistantMessageID,
+		Role:                        &role,
+		ChatID:                      conversationID,
+		TokensConsumed:              tokensConsumed,
 	}
 	if err := a.db.WithContext(ctx).Create(&userChat).Error; err != nil {
 		return err
 	}
 
-	// 2. 再更新会话统计：
-	// 2.1 message_count +1，保持和 chat_histories 行数口径一致；
-	// 2.2 tokens_total 累加本条消息 token；
-	// 2.3 last_message_at 刷新为当前时间，供会话排序使用。
+	// 2. 鍐嶆洿鏂颁細璇濈粺璁★細
+	// 2.1 message_count +1锛屼繚鎸佸拰 chat_histories 琛屾暟鍙ｅ緞涓€鑷达紱
+	// 2.2 tokens_total 绱姞鏈潯娑堟伅 token锛?
+	// 2.3 last_message_at 鍒锋柊涓哄綋鍓嶆椂闂达紝渚涗細璇濇帓搴忎娇鐢ㄣ€?
 	now := time.Now()
 	updates := map[string]interface{}{
 		"message_count":   gorm.Expr("message_count + ?", 1),
@@ -68,14 +82,14 @@ func (a *AgentDAO) saveChatHistoryCore(ctx context.Context, userID int, conversa
 		return result.Error
 	}
 	if result.RowsAffected == 0 {
-		// 会话不存在时直接失败，避免出现“孤儿历史消息”。
+		// 浼氳瘽涓嶅瓨鍦ㄦ椂鐩存帴澶辫触锛岄伩鍏嶅嚭鐜扳€滃鍎垮巻鍙叉秷鎭€濄€?
 		return fmt.Errorf("conversation not found when updating stats: user_id=%d chat_id=%s", userID, conversationID)
 	}
 
-	// 3. 最后更新 users.token_usage（同一事务内）：
-	// 3.1 只在 tokensConsumed>0 时执行，避免无意义写入；
-	// 3.2 和 chat_histories/agent_chats 放在同一事务里，保证统计口径原子一致；
-	// 3.3 若用户行不存在则返回错误，触发事务回滚，防止出现“会话统计成功但用户统计丢失”。
+	// 3. 鏈€鍚庢洿鏂?users.token_usage锛堝悓涓€浜嬪姟鍐咃級锛?
+	// 3.1 鍙湪 tokensConsumed>0 鏃舵墽琛岋紝閬垮厤鏃犳剰涔夊啓鍏ワ紱
+	// 3.2 鍜?chat_histories/agent_chats 鏀惧湪鍚屼竴浜嬪姟閲岋紝淇濊瘉缁熻鍙ｅ緞鍘熷瓙涓€鑷达紱
+	// 3.3 鑻ョ敤鎴疯涓嶅瓨鍦ㄥ垯杩斿洖閿欒锛岃Е鍙戜簨鍔″洖婊氾紝闃叉鍑虹幇鈥滀細璇濈粺璁℃垚鍔熶絾鐢ㄦ埛缁熻涓㈠け鈥濄€?
 	if tokensConsumed > 0 {
 		userUpdate := a.db.WithContext(ctx).
 			Model(&model.User{}).
@@ -91,38 +105,38 @@ func (a *AgentDAO) saveChatHistoryCore(ctx context.Context, userID int, conversa
 	return nil
 }
 
-// SaveChatHistoryInTx 在调用方“已开启事务”的场景下写入聊天历史。
+// SaveChatHistoryInTx 鍦ㄨ皟鐢ㄦ柟鈥滃凡寮€鍚簨鍔♀€濈殑鍦烘櫙涓嬪啓鍏ヨ亰澶╁巻鍙层€?
 //
-// 设计目的：
-// 1. 给服务层组合多个 DAO 操作时复用，避免嵌套事务；
-// 2. 让 outbox 消费处理器可以和业务写入共享同一个 tx。
-func (a *AgentDAO) SaveChatHistoryInTx(ctx context.Context, userID int, conversationID string, role, message string, tokensConsumed int) error {
-	return a.saveChatHistoryCore(ctx, userID, conversationID, role, message, tokensConsumed)
+// 璁捐鐩殑锛?
+// 1. 缁欐湇鍔″眰缁勫悎澶氫釜 DAO 鎿嶄綔鏃跺鐢紝閬垮厤宓屽浜嬪姟锛?
+// 2. 璁?outbox 娑堣垂澶勭悊鍣ㄥ彲浠ュ拰涓氬姟鍐欏叆鍏变韩鍚屼竴涓?tx銆?
+func (a *AgentDAO) SaveChatHistoryInTx(ctx context.Context, userID int, conversationID string, role, message, reasoningContent string, reasoningDurationSeconds int, retryGroupID *string, retryIndex *int, retryFromUserMessageID *int, retryFromAssistantMessageID *int, tokensConsumed int) error {
+	return a.saveChatHistoryCore(ctx, userID, conversationID, role, message, reasoningContent, reasoningDurationSeconds, retryGroupID, retryIndex, retryFromUserMessageID, retryFromAssistantMessageID, tokensConsumed)
 }
 
-// SaveChatHistory 在同步直写路径下写入聊天历史。
+// SaveChatHistory 鍦ㄥ悓姝ョ洿鍐欒矾寰勪笅鍐欏叆鑱婂ぉ鍘嗗彶銆?
 //
-// 说明：
-// 1. 该方法会自行开启事务；
-// 2. 内部复用 saveChatHistoryCore，确保和 SaveChatHistoryInTx 的业务口径完全一致。
-func (a *AgentDAO) SaveChatHistory(ctx context.Context, userID int, conversationID string, role, message string, tokensConsumed int) error {
+// 璇存槑锛?
+// 1. 璇ユ柟娉曚細鑷寮€鍚簨鍔★紱
+// 2. 鍐呴儴澶嶇敤 saveChatHistoryCore锛岀‘淇濆拰 SaveChatHistoryInTx 鐨勪笟鍔″彛寰勫畬鍏ㄤ竴鑷淬€?
+func (a *AgentDAO) SaveChatHistory(ctx context.Context, userID int, conversationID string, role, message, reasoningContent string, reasoningDurationSeconds int, retryGroupID *string, retryIndex *int, retryFromUserMessageID *int, retryFromAssistantMessageID *int, tokensConsumed int) error {
 	return a.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
-		return a.WithTx(tx).saveChatHistoryCore(ctx, userID, conversationID, role, message, tokensConsumed)
+		return a.WithTx(tx).saveChatHistoryCore(ctx, userID, conversationID, role, message, reasoningContent, reasoningDurationSeconds, retryGroupID, retryIndex, retryFromUserMessageID, retryFromAssistantMessageID, tokensConsumed)
 	})
 }
 
-// adjustTokenUsageCore 在同一事务语义下做“会话+用户”token 账本增量调整。
+// adjustTokenUsageCore 鍦ㄥ悓涓€浜嬪姟璇箟涓嬪仛鈥滀細璇?鐢ㄦ埛鈥漷oken 璐︽湰澧為噺璋冩暣銆?
 //
-// 职责边界：
-// 1. 只更新 agent_chats.tokens_total 与 users.token_usage；
-// 2. 不写 chat_histories（消息落库由 SaveChatHistory* 路径负责）；
-// 3. deltaTokens<=0 时视为无操作，直接返回。
+// 鑱岃矗杈圭晫锛?
+// 1. 鍙洿鏂?agent_chats.tokens_total 涓?users.token_usage锛?
+// 2. 涓嶅啓 chat_histories锛堟秷鎭惤搴撶敱 SaveChatHistory* 璺緞璐熻矗锛夛紱
+// 3. deltaTokens<=0 鏃惰涓烘棤鎿嶄綔锛岀洿鎺ヨ繑鍥炪€?
 func (a *AgentDAO) adjustTokenUsageCore(ctx context.Context, userID int, conversationID string, deltaTokens int) error {
 	if deltaTokens <= 0 {
 		return nil
 	}
 
-	// 1. 先更新会话累计 token。
+	// 1. 鍏堟洿鏂颁細璇濈疮璁?token銆?
 	chatUpdate := a.db.WithContext(ctx).
 		Model(&model.AgentChat{}).
 		Where("user_id = ? AND chat_id = ?", userID, conversationID).
@@ -134,7 +148,7 @@ func (a *AgentDAO) adjustTokenUsageCore(ctx context.Context, userID int, convers
 		return fmt.Errorf("conversation not found when adjusting tokens: user_id=%d chat_id=%s", userID, conversationID)
 	}
 
-	// 2. 再更新用户累计 token。
+	// 2. 鍐嶆洿鏂扮敤鎴风疮璁?token銆?
 	userUpdate := a.db.WithContext(ctx).
 		Model(&model.User{}).
 		Where("id = ?", userID).
@@ -148,12 +162,12 @@ func (a *AgentDAO) adjustTokenUsageCore(ctx context.Context, userID int, convers
 	return nil
 }
 
-// AdjustTokenUsageInTx 在调用方已开启事务时执行 token 账本增量调整。
+// AdjustTokenUsageInTx 鍦ㄨ皟鐢ㄦ柟宸插紑鍚簨鍔℃椂鎵ц token 璐︽湰澧為噺璋冩暣銆?
 func (a *AgentDAO) AdjustTokenUsageInTx(ctx context.Context, userID int, conversationID string, deltaTokens int) error {
 	return a.adjustTokenUsageCore(ctx, userID, conversationID, deltaTokens)
 }
 
-// AdjustTokenUsage 在同步路径下执行 token 账本增量调整（内部自带事务）。
+// AdjustTokenUsage 鍦ㄥ悓姝ヨ矾寰勪笅鎵ц token 璐︽湰澧為噺璋冩暣锛堝唴閮ㄨ嚜甯︿簨鍔★級銆?
 func (a *AgentDAO) AdjustTokenUsage(ctx context.Context, userID int, conversationID string, deltaTokens int) error {
 	return a.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
 		return a.WithTx(tx).adjustTokenUsageCore(ctx, userID, conversationID, deltaTokens)
@@ -183,13 +197,58 @@ func (a *AgentDAO) GetUserChatHistories(ctx context.Context, userID, limit int,
 	if err != nil {
 		return nil, err
 	}
-	// 保留“最近 N 条”后，反转成时间正序，方便模型消费。
+	// 淇濈暀鈥滄渶杩?N 鏉♀€濆悗锛屽弽杞垚鏃堕棿姝ｅ簭锛屾柟渚挎ā鍨嬫秷璐广€?
 	for i, j := 0, len(histories)-1; i < j; i, j = i+1, j-1 {
 		histories[i], histories[j] = histories[j], histories[i]
 	}
 	return histories, nil
 }
 
+func (a *AgentDAO) EnsureRetryGroupSeed(ctx context.Context, userID int, chatID, retryGroupID string, sourceUserMessageID, sourceAssistantMessageID int) error {
+	normalizedGroupID := strings.TrimSpace(retryGroupID)
+	if normalizedGroupID == "" {
+		return nil
+	}
+
+	indexOne := 1
+	ids := make([]int, 0, 2)
+	if sourceUserMessageID > 0 {
+		ids = append(ids, sourceUserMessageID)
+	}
+	if sourceAssistantMessageID > 0 {
+		ids = append(ids, sourceAssistantMessageID)
+	}
+	if len(ids) == 0 {
+		return nil
+	}
+
+	return a.db.WithContext(ctx).
+		Model(&model.ChatHistory{}).
+		Where("user_id = ? AND chat_id = ? AND id IN ?", userID, chatID, ids).
+		Where("(retry_group_id IS NULL OR retry_group_id = '')").
+		Updates(map[string]any{
+			"retry_group_id": normalizedGroupID,
+			"retry_index":    indexOne,
+		}).Error
+}
+
+func (a *AgentDAO) GetRetryGroupNextIndex(ctx context.Context, userID int, chatID, retryGroupID string) (int, error) {
+	normalizedGroupID := strings.TrimSpace(retryGroupID)
+	if normalizedGroupID == "" {
+		return 0, errors.New("retry_group_id is empty")
+	}
+
+	var maxIndex int
+	if err := a.db.WithContext(ctx).
+		Model(&model.ChatHistory{}).
+		Where("user_id = ? AND chat_id = ? AND retry_group_id = ?", userID, chatID, normalizedGroupID).
+		Select("COALESCE(MAX(retry_index), 0)").
+		Scan(&maxIndex).Error; err != nil {
+		return 0, err
+	}
+	return maxIndex + 1, nil
+}
+
 func (a *AgentDAO) IfChatExists(ctx context.Context, userID int, chatID string) (bool, error) {
 	var chat model.AgentChat
 	err := a.db.WithContext(ctx).Where("user_id = ? AND chat_id = ?", userID, chatID).First(&chat).Error
@@ -202,7 +261,7 @@ func (a *AgentDAO) IfChatExists(ctx context.Context, userID int, chatID string)
 	return true, nil
 }
 
-// GetConversationMeta 查询单个会话元信息。
+// GetConversationMeta 鏌ヨ鍗曚釜浼氳瘽鍏冧俊鎭€?
 func (a *AgentDAO) GetConversationMeta(ctx context.Context, userID int, chatID string) (*model.AgentChat, error) {
 	var chat model.AgentChat
 	err := a.db.WithContext(ctx).
@@ -215,7 +274,7 @@ func (a *AgentDAO) GetConversationMeta(ctx context.Context, userID int, chatID s
 	return &chat, nil
 }
 
-// GetConversationTitle 读取当前会话标题。
+// GetConversationTitle 璇诲彇褰撳墠浼氳瘽鏍囬銆?
 func (a *AgentDAO) GetConversationTitle(ctx context.Context, userID int, chatID string) (title string, exists bool, err error) {
 	var chat model.AgentChat
 	queryErr := a.db.WithContext(ctx).
@@ -234,7 +293,7 @@ func (a *AgentDAO) GetConversationTitle(ctx context.Context, userID int, chatID
 	return strings.TrimSpace(*chat.Title), true, nil
 }
 
-// UpdateConversationTitleIfEmpty 仅在标题为空时更新会话标题。
+// UpdateConversationTitleIfEmpty 浠呭湪鏍囬涓虹┖鏃舵洿鏂颁細璇濇爣棰樸€?
 func (a *AgentDAO) UpdateConversationTitleIfEmpty(ctx context.Context, userID int, chatID, title string) error {
 	normalized := strings.TrimSpace(title)
 	if normalized == "" {
@@ -246,20 +305,20 @@ func (a *AgentDAO) UpdateConversationTitleIfEmpty(ctx context.Context, userID in
 		Update("title", normalized).Error
 }
 
-// GetConversationList 按分页查询指定用户的会话列表。
+// GetConversationList 鎸夊垎椤垫煡璇㈡寚瀹氱敤鎴风殑浼氳瘽鍒楄〃銆?
 //
-// 职责边界：
-// 1. 只负责读库，不负责缓存；
-// 2. 只负责 user_id 数据隔离，不负责参数合法性兜底（由 service 负责）；
-// 3. 返回总数 total 供上层计算 has_more。
+// 鑱岃矗杈圭晫锛?
+// 1. 鍙礋璐ｈ搴擄紝涓嶈礋璐ｇ紦瀛橈紱
+// 2. 鍙礋璐?user_id 鏁版嵁闅旂锛屼笉璐熻矗鍙傛暟鍚堟硶鎬у厹搴曪紙鐢?service 璐熻矗锛夛紱
+// 3. 杩斿洖鎬绘暟 total 渚涗笂灞傝绠?has_more銆?
 func (a *AgentDAO) GetConversationList(ctx context.Context, userID, page, pageSize int, status string) ([]model.AgentChat, int64, error) {
-	// 1. 先构造统一过滤条件，保证 total 与 list 的统计口径一致。
+	// 1. 鍏堟瀯閫犵粺涓€杩囨护鏉′欢锛屼繚璇?total 涓?list 鐨勭粺璁″彛寰勪竴鑷淬€?
 	baseQuery := a.db.WithContext(ctx).Model(&model.AgentChat{}).Where("user_id = ?", userID)
 	if strings.TrimSpace(status) != "" {
 		baseQuery = baseQuery.Where("status = ?", status)
 	}
 
-	// 2. 先查总条数，给前端分页器提供完整元信息。
+	// 2. 鍏堟煡鎬绘潯鏁帮紝缁欏墠绔垎椤靛櫒鎻愪緵瀹屾暣鍏冧俊鎭€?
 	var total int64
 	if err := baseQuery.Count(&total).Error; err != nil {
 		return nil, 0, err
@@ -268,9 +327,9 @@ func (a *AgentDAO) GetConversationList(ctx context.Context, userID, page, pageSi
 		return make([]model.AgentChat, 0), 0, nil
 	}
 
-	// 3. 再查当前页数据：
-	// 3.1 按最近消息时间倒序，保证“最近活跃”优先展示；
-	// 3.2 同时间戳下按 id 倒序，避免翻页时顺序抖动。
+	// 3. 鍐嶆煡褰撳墠椤垫暟鎹細
+	// 3.1 鎸夋渶杩戞秷鎭椂闂村€掑簭锛屼繚璇佲€滄渶杩戞椿璺冣€濅紭鍏堝睍绀猴紱
+	// 3.2 鍚屾椂闂存埑涓嬫寜 id 鍊掑簭锛岄伩鍏嶇炕椤垫椂椤哄簭鎶栧姩銆?
 	offset := (page - 1) * pageSize
 	var chats []model.AgentChat
 	query := a.db.WithContext(ctx).
diff --git a/backend/model/agent.go b/backend/model/agent.go
index e3223ed..84e31be 100644
--- a/backend/model/agent.go
+++ b/backend/model/agent.go
@@ -7,29 +7,23 @@ type UserSendMessageRequest struct {
 	Message        string         `json:"message" binding:"required"`
 	Model          string         `json:"model,omitempty"`
 	Thinking       bool           `json:"thinking,omitempty"`
-	Extra          map[string]any `json:"extra,omitempty"` // 附加参数（如 task_class_id），供 agent 分支链路使用
+	Extra          map[string]any `json:"extra,omitempty"`
 }
 
-// ChatHistoryPersistPayload 是“聊天消息持久化请求”业务 DTO。
-//
-// 职责边界：
-// 1. 只描述聊天业务需要落库的核心字段；
-// 2. 可被同步直写路径与异步事件路径复用；
-// 3. 不包含 outbox/kafka 协议字段（这些字段由 infra 层统一封装）。
 type ChatHistoryPersistPayload struct {
-	UserID         int    `json:"user_id"`
-	ConversationID string `json:"conversation_id"`
-	Role           string `json:"role"`
-	Message        string `json:"message"`
-	TokensConsumed int    `json:"tokens_consumed"`
+	UserID                      int     `json:"user_id"`
+	ConversationID              string  `json:"conversation_id"`
+	Role                        string  `json:"role"`
+	Message                     string  `json:"message"`
+	ReasoningContent            string  `json:"reasoning_content,omitempty"`
+	ReasoningDurationSeconds    int     `json:"reasoning_duration_seconds,omitempty"`
+	RetryGroupID                *string `json:"retry_group_id,omitempty"`
+	RetryIndex                  *int    `json:"retry_index,omitempty"`
+	RetryFromUserMessageID      *int    `json:"retry_from_user_message_id,omitempty"`
+	RetryFromAssistantMessageID *int    `json:"retry_from_assistant_message_id,omitempty"`
+	TokensConsumed              int     `json:"tokens_consumed"`
 }
 
-// ChatTokenUsageAdjustPayload 是“会话 token 账本增量调整”事件载荷。
-//
-// 职责边界：
-// 1. 只表达“对哪个用户/会话增加多少 token”；
-// 2. 不承载 chat_histories 落库语义（消息正文由聊天持久化事件负责）；
-// 3. 不包含 outbox/kafka 协议字段（由基础设施层统一封装）。
 type ChatTokenUsageAdjustPayload struct {
 	UserID         int       `json:"user_id"`
 	ConversationID string    `json:"conversation_id"`
@@ -38,11 +32,6 @@ type ChatTokenUsageAdjustPayload struct {
 	TriggeredAt    time.Time `json:"triggered_at"`
 }
 
-// GetConversationMetaResponse 是会话元信息查询接口的返回结构。
-// 说明：
-// 1) title 可能为空字符串（表示标题尚未生成）；
-// 2) has_title 便于前端快速判断是否需要展示默认占位文案；
-// 3) 保留 message_count/last_message_at，方便前端后续扩展会话列表排序或角标。
 type GetConversationMetaResponse struct {
 	ConversationID string     `json:"conversation_id"`
 	Title          string     `json:"title"`
@@ -52,12 +41,6 @@ type GetConversationMetaResponse struct {
 	Status         string     `json:"status"`
 }
 
-// GetConversationListItem 是“会话列表”中的单项数据。
-//
-// 职责边界：
-// 1. 仅承载列表展示所需的轻量字段，不承载具体消息正文；
-// 2. title 允许为空字符串，has_title 用于前端快速判断占位文案；
-// 3. message_count/last_message_at 用于排序展示与角标扩展。
 type GetConversationListItem struct {
 	ConversationID string     `json:"conversation_id"`
 	Title          string     `json:"title"`
@@ -68,12 +51,6 @@ type GetConversationListItem struct {
 	CreatedAt      *time.Time `json:"created_at,omitempty"`
 }
 
-// GetConversationListResponse 是“获取用户会话列表”接口的统一响应体。
-//
-// 职责边界：
-// 1. list 承载当前页数据；
-// 2. page/page_size/total/has_more 承载分页语义；
-// 3. 不负责返回会话正文明细（正文仍由聊天历史接口承担）。
 type GetConversationListResponse struct {
 	List     []GetConversationListItem `json:"list"`
 	Page     int                       `json:"page"`
@@ -83,58 +60,30 @@ type GetConversationListResponse struct {
 	HasMore  bool                      `json:"has_more"`
 }
 
-// GetConversationHistoryItem 是“按会话读取聊天历史”接口的单条消息响应。
-//
-// 职责边界：
-// 1. role/content：承载前端渲染消息气泡所需的核心字段；
-// 2. id/created_at：仅在回源 DB 时可稳定提供，命中 Redis 时允许为空；
-// 3. reasoning_content：兼容模型推理内容，缓存命中时可直接透传。
 type GetConversationHistoryItem struct {
-	ID               int        `json:"id,omitempty"`
-	Role             string     `json:"role"`
-	Content          string     `json:"content"`
-	CreatedAt        *time.Time `json:"created_at,omitempty"`
-	ReasoningContent string     `json:"reasoning_content,omitempty"`
+	ID                       int        `json:"id,omitempty"`
+	Role                     string     `json:"role"`
+	Content                  string     `json:"content"`
+	CreatedAt                *time.Time `json:"created_at,omitempty"`
+	ReasoningContent         string     `json:"reasoning_content,omitempty"`
+	ReasoningDurationSeconds int        `json:"reasoning_duration_seconds,omitempty"`
+	RetryGroupID             *string    `json:"retry_group_id"`
+	RetryIndex               *int       `json:"retry_index"`
+	RetryTotal               *int       `json:"retry_total"`
 }
 
-// SchedulePlanPreviewCache 是“排程预览”在 Redis 中的缓存结构。
-//
-// 职责边界：
-// 1. 负责承载排程完成后的结构化预览快照（summary + candidate_plans）；
-// 2. 通过 user_id 做查询归属校验，避免跨用户越权读取；
-// 3. 仅用于缓存层读写，不表示已落库或已应用到正式日程；
-// 4. 通过 trace_id 标识本次预览来源，便于排查链路问题。
 type SchedulePlanPreviewCache struct {
-	UserID         int                `json:"user_id"`
-	ConversationID string             `json:"conversation_id"`
-	TraceID        string             `json:"trace_id,omitempty"`
-	Summary        string             `json:"summary"`
-	CandidatePlans []UserWeekSchedule `json:"candidate_plans"`
-	// TaskClassIDs 记录本次预览对应的任务类集合。
-	// 作用：
-	// 1. 连续对话微调时，若本轮请求未显式传 task_class_ids，可用该字段兜底；
-	// 2. 仅用于会话内上下文承接，不表示用户最终确认后的持久化状态。
-	TaskClassIDs []int `json:"task_class_ids,omitempty"`
-	// HybridEntries 保存“可优化的混合日程底板”。
-	// 作用：
-	// 1. 连续对话微调时复用上轮结果作为起点，避免每轮都从粗排重算；
-	// 2. 仅缓存态，生命周期受 Redis TTL 控制。
-	HybridEntries []HybridScheduleEntry `json:"hybrid_entries,omitempty"`
-	// AllocatedItems 保存建议任务块的当前分配状态。
-	// 作用：
-	// 1. 保证 final_check 的数量核对口径在连续微调场景下可持续；
-	// 2. return_preview 节点可继续回填 embedded_time。
-	AllocatedItems []TaskClassItem `json:"allocated_items,omitempty"`
-	GeneratedAt    time.Time       `json:"generated_at"`
+	UserID         int                   `json:"user_id"`
+	ConversationID string                `json:"conversation_id"`
+	TraceID        string                `json:"trace_id,omitempty"`
+	Summary        string                `json:"summary"`
+	CandidatePlans []UserWeekSchedule    `json:"candidate_plans"`
+	TaskClassIDs   []int                 `json:"task_class_ids,omitempty"`
+	HybridEntries  []HybridScheduleEntry `json:"hybrid_entries,omitempty"`
+	AllocatedItems []TaskClassItem       `json:"allocated_items,omitempty"`
+	GeneratedAt    time.Time             `json:"generated_at"`
 }
 
-// GetSchedulePlanPreviewResponse 是“按会话查询排程预览”接口返回结构。
-//
-// 职责边界：
-// 1. conversation_id：标识该预览属于哪个会话；
-// 2. summary：给用户展示的终审自然语言总结；
-// 3. candidate_plans：给前端渲染课表/时间轴用的结构化 JSON；
-// 4. generated_at：预览生成时间，便于前端判断是否是最新结果。
 type GetSchedulePlanPreviewResponse struct {
 	ConversationID string             `json:"conversation_id"`
 	TraceID        string             `json:"trace_id,omitempty"`
@@ -174,15 +123,20 @@ type AgentChat struct {
 func (AgentChat) TableName() string { return "agent_chats" }
 
 type ChatHistory struct {
-	ID             int        `gorm:"column:id;primaryKey;autoIncrement"`
-	ChatID         string     `gorm:"column:chat_id;type:varchar(36);not null;index:idx_user_chat,priority:2;index:idx_chat_id;comment:会话UUID"`
-	UserID         int        `gorm:"column:user_id;not null;index:idx_user_chat,priority:1"`
-	MessageContent *string    `gorm:"column:message_content;type:text;comment:消息内容"`
-	Role           *string    `gorm:"column:role;type:varchar(32);comment:消息角色"`
-	TokensConsumed int        `gorm:"column:tokens_consumed;not null;default:0;comment:本轮消耗Token"`
-	CreatedAt      *time.Time `gorm:"column:created_at;autoCreateTime"`
+	ID                          int        `gorm:"column:id;primaryKey;autoIncrement"`
+	ChatID                      string     `gorm:"column:chat_id;type:varchar(36);not null;index:idx_user_chat,priority:2;index:idx_chat_id;comment:会话UUID"`
+	UserID                      int        `gorm:"column:user_id;not null;index:idx_user_chat,priority:1"`
+	MessageContent              *string    `gorm:"column:message_content;type:text;comment:消息内容"`
+	ReasoningContent            *string    `gorm:"column:reasoning_content;type:text;comment:deep reasoning text"`
+	ReasoningDurationSeconds    int        `gorm:"column:reasoning_duration_seconds;not null;default:0;comment:deep reasoning duration seconds"`
+	RetryGroupID                *string    `gorm:"column:retry_group_id;type:varchar(64);index:idx_retry_group;comment:retry group id"`
+	RetryIndex                  *int       `gorm:"column:retry_index;comment:retry page index"`
+	RetryFromUserMessageID      *int       `gorm:"column:retry_from_user_message_id;comment:source user message id"`
+	RetryFromAssistantMessageID *int       `gorm:"column:retry_from_assistant_message_id;comment:source assistant message id"`
+	Role                        *string    `gorm:"column:role;type:varchar(32);comment:消息角色"`
+	TokensConsumed              int        `gorm:"column:tokens_consumed;not null;default:0;comment:本轮消耗Token"`
+	CreatedAt                   *time.Time `gorm:"column:created_at;autoCreateTime"`
 
-	// 只保留从聊天记录到会话的单向关联，避免迁移时出现循环依赖。
 	Chat AgentChat `gorm:"foreignKey:ChatID;references:ChatID;constraint:OnUpdate:CASCADE,OnDelete:CASCADE"`
 }
 
diff --git a/backend/service/agentsvc/agent.go b/backend/service/agentsvc/agent.go
index 1836c62..ca23f68 100644
--- a/backend/service/agentsvc/agent.go
+++ b/backend/service/agentsvc/agent.go
@@ -2,7 +2,9 @@ package agentsvc
 
 import (
 	"context"
+	"encoding/json"
 	"log"
+	"strconv"
 	"strings"
 	"time"
 
@@ -101,7 +103,20 @@ func (s *AgentService) PersistChatHistory(ctx context.Context, payload model.Cha
 	// 1. 未注入事件发布器时（例如本地极简环境），直接同步写 DB。
 	//    这样可以保证功能不依赖 Kafka 也能跑通。
 	if s.eventPublisher == nil {
-		return s.repo.SaveChatHistory(ctx, payload.UserID, payload.ConversationID, payload.Role, payload.Message, payload.TokensConsumed)
+		return s.repo.SaveChatHistory(
+			ctx,
+			payload.UserID,
+			payload.ConversationID,
+			payload.Role,
+			payload.Message,
+			payload.ReasoningContent,
+			payload.ReasoningDurationSeconds,
+			payload.RetryGroupID,
+			payload.RetryIndex,
+			payload.RetryFromUserMessageID,
+			payload.RetryFromAssistantMessageID,
+			payload.TokensConsumed,
+		)
 	}
 	// 2. 已启用异步总线时，只发布“持久化请求事件”，不在请求路径阻塞 Kafka。
 	// 2.1 发布成功仅代表“事件安全入队”，实际落库由消费者异步完成。
@@ -114,6 +129,185 @@ func (s *AgentService) saveChatHistoryReliable(ctx context.Context, payload mode
 	return s.PersistChatHistory(ctx, payload)
 }
 
+func mergeAgentReasoningText(parts ...string) string {
+	merged := make([]string, 0, len(parts))
+	for _, part := range parts {
+		text := strings.TrimSpace(part)
+		if text == "" {
+			continue
+		}
+		merged = append(merged, text)
+	}
+	return strings.Join(merged, "\n\n")
+}
+
+type chatRetryMeta struct {
+	GroupID                string
+	Index                  int
+	FromUserMessageID      int
+	FromAssistantMessageID int
+}
+
+func (m *chatRetryMeta) GroupIDPtr() *string {
+	if m == nil || strings.TrimSpace(m.GroupID) == "" {
+		return nil
+	}
+	groupID := strings.TrimSpace(m.GroupID)
+	return &groupID
+}
+
+func (m *chatRetryMeta) IndexPtr() *int {
+	if m == nil || m.Index <= 0 {
+		return nil
+	}
+	index := m.Index
+	return &index
+}
+
+func (m *chatRetryMeta) FromUserMessageIDPtr() *int {
+	if m == nil || m.FromUserMessageID <= 0 {
+		return nil
+	}
+	id := m.FromUserMessageID
+	return &id
+}
+
+func (m *chatRetryMeta) FromAssistantMessageIDPtr() *int {
+	if m == nil || m.FromAssistantMessageID <= 0 {
+		return nil
+	}
+	id := m.FromAssistantMessageID
+	return &id
+}
+
+func (m *chatRetryMeta) CacheExtra() map[string]any {
+	if m == nil || strings.TrimSpace(m.GroupID) == "" || m.Index <= 0 {
+		return nil
+	}
+	extra := map[string]any{
+		"retry_group_id": m.GroupID,
+		"retry_index":    m.Index,
+	}
+	if m.FromUserMessageID > 0 {
+		extra["retry_from_user_message_id"] = m.FromUserMessageID
+	}
+	if m.FromAssistantMessageID > 0 {
+		extra["retry_from_assistant_message_id"] = m.FromAssistantMessageID
+	}
+	return extra
+}
+
+func (s *AgentService) buildChatRetryMeta(ctx context.Context, userID int, chatID string, extra map[string]any) (*chatRetryMeta, error) {
+	if len(extra) == 0 {
+		return nil, nil
+	}
+	requestMode := strings.ToLower(strings.TrimSpace(readAgentExtraString(extra, "request_mode")))
+	if requestMode != "retry" {
+		return nil, nil
+	}
+
+	groupID := strings.TrimSpace(readAgentExtraString(extra, "retry_group_id"))
+	if groupID == "" {
+		groupID = uuid.NewString()
+	}
+
+	sourceUserMessageID := readAgentExtraInt(extra, "retry_from_user_message_id")
+	sourceAssistantMessageID := readAgentExtraInt(extra, "retry_from_assistant_message_id")
+
+	if err := s.repo.EnsureRetryGroupSeed(ctx, userID, chatID, groupID, sourceUserMessageID, sourceAssistantMessageID); err != nil {
+		return nil, err
+	}
+	if s.agentCache != nil && (sourceUserMessageID > 0 || sourceAssistantMessageID > 0) {
+		if cacheErr := s.agentCache.ApplyRetrySeed(ctx, chatID, groupID, sourceUserMessageID, sourceAssistantMessageID); cacheErr != nil {
+			log.Printf("更新重试分组缓存失败 chat=%s group=%s err=%v", chatID, groupID, cacheErr)
+		}
+	}
+
+	nextIndex, err := s.repo.GetRetryGroupNextIndex(ctx, userID, chatID, groupID)
+	if err != nil {
+		return nil, err
+	}
+
+	return &chatRetryMeta{
+		GroupID:                groupID,
+		Index:                  nextIndex,
+		FromUserMessageID:      sourceUserMessageID,
+		FromAssistantMessageID: sourceAssistantMessageID,
+	}, nil
+}
+
+func readAgentExtraString(extra map[string]any, key string) string {
+	if len(extra) == 0 {
+		return ""
+	}
+	raw, ok := extra[key]
+	if !ok {
+		return ""
+	}
+	text, ok := raw.(string)
+	if !ok {
+		return ""
+	}
+	return strings.TrimSpace(text)
+}
+
+func readAgentExtraInt(extra map[string]any, key string) int {
+	if len(extra) == 0 {
+		return 0
+	}
+	raw, ok := extra[key]
+	if !ok {
+		return 0
+	}
+	// 1. 前端的历史消息 id 在本地态里可能是 string，也可能是 number。
+	// 2. 重试链路只要这里解析失败，父消息 id 就会退化成 0，后续写库自然会落成 NULL。
+	// 3. 因此这里统一做“宽松整型解析”，兼容 JSON number、前端字符串数字和常见整数类型。
+	value, ok := parseAgentLooseInt(raw)
+	if !ok || value <= 0 {
+		return 0
+	}
+	return value
+}
+
+// parseAgentLooseInt 负责把 extra 中的“弱类型数字”归一成 int。
+//
+// 职责边界：
+// 1. 负责兼容前端 JSON 解码后的常见数值类型，以及字符串形式的数字。
+// 2. 不负责业务语义校验；例如是否必须大于 0，由调用方自行决定。
+// 3. 解析失败时返回 ok=false，调用方可按各自场景走兜底逻辑。
+func parseAgentLooseInt(raw any) (value int, ok bool) {
+	switch v := raw.(type) {
+	case int:
+		return v, true
+	case int32:
+		return int(v), true
+	case int64:
+		return int(v), true
+	case float64:
+		return int(v), true
+	case json.Number:
+		if parsed, err := v.Int64(); err == nil {
+			return int(parsed), true
+		}
+		if parsed, err := v.Float64(); err == nil {
+			return int(parsed), true
+		}
+		return 0, false
+	case string:
+		trimmed := strings.TrimSpace(v)
+		if trimmed == "" {
+			return 0, false
+		}
+		parsed, err := strconv.Atoi(trimmed)
+		if err != nil {
+			return 0, false
+		}
+		return parsed, true
+	default:
+		return 0, false
+	}
+}
+
 // pushErrNonBlocking 向错误通道“尽力投递”错误。
 // 目的：
 // 1) 避免 goroutine 在 errChan 满时被阻塞导致泄漏；
@@ -135,6 +329,9 @@ func (s *AgentService) runNormalChatFlow(
 	selectedModel *ark.ChatModel,
 	resolvedModelName string,
 	userMessage string,
+	assistantReasoningPrefix string,
+	assistantReasoningStartedAt *time.Time,
+	retryMeta *chatRetryMeta,
 	ifThinking bool,
 	userID int,
 	chatID string,
@@ -192,11 +389,12 @@ func (s *AgentService) runNormalChatFlow(
 
 	// 6. 执行真正的流式聊天。
 	//    fullText 用于后续写 Redis/持久化，outChan 用于把流片段实时推给前端。
-	fullText, streamUsage, streamErr := agentchat.StreamChat(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, traceID, chatID, requestStart)
+	fullText, reasoningText, reasoningDurationSeconds, streamUsage, streamErr := agentchat.StreamChat(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, traceID, chatID, requestStart, assistantReasoningStartedAt)
 	if streamErr != nil {
 		pushErrNonBlocking(errChan, streamErr)
 		return
 	}
+	assistantReasoning := mergeAgentReasoningText(assistantReasoningPrefix, reasoningText)
 
 	// 6.1 流式 usage 并入请求级 token 统计器：
 	// 6.1.1 route/quicknote/taskquery 等 Generate 调用由 callback 自动累加；
@@ -212,15 +410,25 @@ func (s *AgentService) runNormalChatFlow(
 	// 7. 后置持久化（用户消息）：
 	//    7.1 先写 Redis，保证“最新会话上下文”可立即用于下一轮推理；
 	//    7.2 再走可靠持久化入口（outbox 或同步 DB）。
-	if err = s.agentCache.PushMessage(ctx, chatID, &schema.Message{Role: schema.User, Content: userMessage}); err != nil {
+	userMsg := &schema.Message{Role: schema.User, Content: userMessage}
+	if retryExtra := retryMeta.CacheExtra(); len(retryExtra) > 0 {
+		userMsg.Extra = retryExtra
+	}
+	if err = s.agentCache.PushMessage(ctx, chatID, userMsg); err != nil {
 		log.Printf("写入用户消息到 Redis 失败: %v", err)
 	}
 
 	if err = s.PersistChatHistory(ctx, model.ChatHistoryPersistPayload{
-		UserID:         userID,
-		ConversationID: chatID,
-		Role:           "user",
-		Message:        userMessage,
+		UserID:                      userID,
+		ConversationID:              chatID,
+		Role:                        "user",
+		Message:                     userMessage,
+		ReasoningContent:            "",
+		ReasoningDurationSeconds:    0,
+		RetryGroupID:                retryMeta.GroupIDPtr(),
+		RetryIndex:                  retryMeta.IndexPtr(),
+		RetryFromUserMessageID:      retryMeta.FromUserMessageIDPtr(),
+		RetryFromAssistantMessageID: retryMeta.FromAssistantMessageIDPtr(),
 		// 口径B：用户消息固定记 0；本轮总 token 统一记在助手消息。
 		TokensConsumed: 0,
 	}); err != nil {
@@ -233,15 +441,33 @@ func (s *AgentService) runNormalChatFlow(
 	// 8. 后置持久化（助手消息）：
 	//    8.1 先写 Redis，保证下一轮上下文可见；
 	//    8.2 再异步可靠落库，失败通过 errChan 回传给上层。
-	if err = s.agentCache.PushMessage(context.Background(), chatID, &schema.Message{Role: schema.Assistant, Content: fullText}); err != nil {
+	assistantMsg := &schema.Message{Role: schema.Assistant, Content: fullText, ReasoningContent: assistantReasoning}
+	if reasoningDurationSeconds > 0 {
+		assistantMsg.Extra = map[string]any{"reasoning_duration_seconds": reasoningDurationSeconds}
+	}
+	if retryExtra := retryMeta.CacheExtra(); len(retryExtra) > 0 {
+		if assistantMsg.Extra == nil {
+			assistantMsg.Extra = make(map[string]any, len(retryExtra))
+		}
+		for key, value := range retryExtra {
+			assistantMsg.Extra[key] = value
+		}
+	}
+	if err = s.agentCache.PushMessage(context.Background(), chatID, assistantMsg); err != nil {
 		log.Printf("写入助手消息到 Redis 失败: %v", err)
 	}
 
 	if saveErr := s.PersistChatHistory(context.Background(), model.ChatHistoryPersistPayload{
-		UserID:         userID,
-		ConversationID: chatID,
-		Role:           "assistant",
-		Message:        fullText,
+		UserID:                      userID,
+		ConversationID:              chatID,
+		Role:                        "assistant",
+		Message:                     fullText,
+		ReasoningContent:            assistantReasoning,
+		ReasoningDurationSeconds:    reasoningDurationSeconds,
+		RetryGroupID:                retryMeta.GroupIDPtr(),
+		RetryIndex:                  retryMeta.IndexPtr(),
+		RetryFromUserMessageID:      retryMeta.FromUserMessageIDPtr(),
+		RetryFromAssistantMessageID: retryMeta.FromAssistantMessageIDPtr(),
 		// 口径B：助手消息记录“本轮请求总 token”。
 		TokensConsumed: requestTotalTokens,
 	}); saveErr != nil {
@@ -303,6 +529,14 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 		}
 	}
 
+	retryMeta, err := s.buildChatRetryMeta(requestCtx, userID, chatID, extra)
+	if err != nil {
+		errChan <- err
+		close(outChan)
+		close(errChan)
+		return outChan, errChan
+	}
+
 	// 3) 统一异步分流：
 	// 3.1 先走“通用控制码路由”决定 action（chat / quick_note_create / task_query）；
 	// 3.2 quick_note_create 进入随口记 graph；
@@ -322,7 +556,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 
 		// 3.2 chat：直接走普通聊天主链路。
 		if routing.Action == agentrouter.ActionChat {
-			s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
+			s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, "", nil, retryMeta, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
 			return
 		}
 
@@ -358,7 +592,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 
 				// 3.4.2 对随口记回复执行统一后置持久化（Redis + outbox/DB）。
 				requestTotalTokens := snapshotRequestTokenMeter(requestCtx).TotalTokens
-				s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, quickReply, 0, requestTotalTokens, errChan)
+				s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, quickReply, progress.HistoryText(), progress.DurationSeconds(time.Now()), retryMeta, 0, requestTotalTokens, errChan)
 				// 3.4.3 随口记链路同样异步生成会话标题（仅首次写入）。
 				s.ensureConversationTitleAsync(userID, chatID)
 				return
@@ -366,7 +600,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 
 			// 3.4.4 路由误判或 graph 判定非随口记时，回落普通聊天，保证“能聊”。
 			progress.Emit("quick_note.fallback", "当前输入不是随口记请求，切换到普通对话。")
-			s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
+			s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, progress.HistoryText(), progress.StartedAt(), retryMeta, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
 			return
 		}
 
@@ -377,7 +611,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 				// 3.5.1 任务查询失败时回退普通聊天，避免请求直接中断。
 				log.Printf("任务查询 tool-calling 执行失败，回退普通聊天 trace_id=%s chat_id=%s err=%v", traceID, chatID, queryErr)
 				progress.Emit("task_query.fallback", "任务查询暂不可用，先切回普通对话。")
-				s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
+				s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, progress.HistoryText(), progress.StartedAt(), retryMeta, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
 				return
 			}
 
@@ -387,7 +621,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 				return
 			}
 			requestTotalTokens := snapshotRequestTokenMeter(requestCtx).TotalTokens
-			s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, reply, 0, requestTotalTokens, errChan)
+			s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, reply, progress.HistoryText(), progress.DurationSeconds(time.Now()), retryMeta, 0, requestTotalTokens, errChan)
 			s.ensureConversationTitleAsync(userID, chatID)
 			return
 		}
@@ -398,7 +632,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 			if planErr != nil {
 				log.Printf("智能排程 graph 执行失败，回退普通聊天 trace_id=%s chat_id=%s err=%v", traceID, chatID, planErr)
 				progress.Emit("schedule_plan.fallback", "智能排程暂不可用，先切回普通对话。")
-				s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
+				s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, progress.HistoryText(), progress.StartedAt(), retryMeta, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
 				return
 			}
 
@@ -407,7 +641,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 				return
 			}
 			requestTotalTokens := snapshotRequestTokenMeter(requestCtx).TotalTokens
-			s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, reply, 0, requestTotalTokens, errChan)
+			s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, reply, progress.HistoryText(), progress.DurationSeconds(time.Now()), retryMeta, 0, requestTotalTokens, errChan)
 			s.ensureConversationTitleAsync(userID, chatID)
 			return
 		}
@@ -426,13 +660,13 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
 				return
 			}
 			requestTotalTokens := snapshotRequestTokenMeter(requestCtx).TotalTokens
-			s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, reply, 0, requestTotalTokens, errChan)
+			s.persistChatAfterReply(requestCtx, userID, chatID, userMessage, reply, progress.HistoryText(), progress.DurationSeconds(time.Now()), retryMeta, 0, requestTotalTokens, errChan)
 			s.ensureConversationTitleAsync(userID, chatID)
 			return
 		}
 
 		// 3.8 未知 action 兜底：走普通聊天，保证可用性。
-		s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
+		s.runNormalChatFlow(requestCtx, selectedModel, resolvedModelName, userMessage, progress.HistoryText(), progress.StartedAt(), retryMeta, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
 	}()
 
 	return outChan, errChan
diff --git a/backend/service/agentsvc/agent_history.go b/backend/service/agentsvc/agent_history.go
index fccbccf..637f500 100644
--- a/backend/service/agentsvc/agent_history.go
+++ b/backend/service/agentsvc/agent_history.go
@@ -45,7 +45,7 @@ func (s *AgentService) GetConversationHistory(ctx context.Context, userID int, c
 		history, cacheErr := s.agentCache.GetHistory(ctx, normalizedChatID)
 		if cacheErr != nil {
 			log.Printf("读取会话历史缓存失败 chat_id=%s: %v", normalizedChatID, cacheErr)
-		} else if history != nil {
+		} else if history != nil && !cacheConversationHistoryHasRetryMetadata(history) {
 			return buildConversationHistoryItemsFromCache(history), nil
 		}
 	}
@@ -81,12 +81,15 @@ func buildConversationHistoryItemsFromCache(messages []*schema.Message) []model.
 			continue
 		}
 		items = append(items, model.GetConversationHistoryItem{
-			Role:             normalizeConversationHistoryRole(string(msg.Role)),
-			Content:          strings.TrimSpace(msg.Content),
-			ReasoningContent: strings.TrimSpace(msg.ReasoningContent),
+			Role:                     normalizeConversationHistoryRole(string(msg.Role)),
+			Content:                  strings.TrimSpace(msg.Content),
+			ReasoningContent:         strings.TrimSpace(msg.ReasoningContent),
+			ReasoningDurationSeconds: extractConversationReasoningDurationSeconds(msg),
+			RetryGroupID:             extractConversationRetryGroupID(msg),
+			RetryIndex:               extractConversationRetryIndex(msg),
 		})
 	}
-	return items
+	return attachConversationRetryTotals(items)
 }
 
 // buildConversationHistoryItemsFromDB 把数据库聊天记录转换为接口响应。
@@ -109,15 +112,156 @@ func buildConversationHistoryItemsFromDB(histories []model.ChatHistory) []model.
 		}
 
 		items = append(items, model.GetConversationHistoryItem{
-			ID:        history.ID,
-			Role:      role,
-			Content:   content,
-			CreatedAt: history.CreatedAt,
+			ID:                       history.ID,
+			Role:                     role,
+			Content:                  content,
+			CreatedAt:                history.CreatedAt,
+			ReasoningContent:         strings.TrimSpace(derefConversationHistoryText(history.ReasoningContent)),
+			ReasoningDurationSeconds: history.ReasoningDurationSeconds,
+			RetryGroupID:             cloneConversationStringPointer(history.RetryGroupID),
+			RetryIndex:               cloneConversationIntPointer(history.RetryIndex),
 		})
 	}
+	return attachConversationRetryTotals(items)
+}
+
+func derefConversationHistoryText(text *string) string {
+	if text == nil {
+		return ""
+	}
+	return *text
+}
+
+func extractConversationReasoningDurationSeconds(msg *schema.Message) int {
+	if msg == nil || msg.Extra == nil {
+		return 0
+	}
+	raw, ok := msg.Extra["reasoning_duration_seconds"]
+	if !ok {
+		return 0
+	}
+	switch v := raw.(type) {
+	case int:
+		return v
+	case int32:
+		return int(v)
+	case int64:
+		return int(v)
+	case float64:
+		return int(v)
+	default:
+		return 0
+	}
+}
+
+func extractConversationRetryGroupID(msg *schema.Message) *string {
+	if msg == nil || msg.Extra == nil {
+		return nil
+	}
+	raw, ok := msg.Extra["retry_group_id"]
+	if !ok {
+		return nil
+	}
+	text, ok := raw.(string)
+	if !ok {
+		return nil
+	}
+	text = strings.TrimSpace(text)
+	if text == "" {
+		return nil
+	}
+	return &text
+}
+
+func extractConversationRetryIndex(msg *schema.Message) *int {
+	if msg == nil || msg.Extra == nil {
+		return nil
+	}
+	raw, ok := msg.Extra["retry_index"]
+	if !ok {
+		return nil
+	}
+	switch v := raw.(type) {
+	case int:
+		if v <= 0 {
+			return nil
+		}
+		return &v
+	case int32:
+		value := int(v)
+		if value <= 0 {
+			return nil
+		}
+		return &value
+	case int64:
+		value := int(v)
+		if value <= 0 {
+			return nil
+		}
+		return &value
+	case float64:
+		value := int(v)
+		if value <= 0 {
+			return nil
+		}
+		return &value
+	default:
+		return nil
+	}
+}
+
+func attachConversationRetryTotals(items []model.GetConversationHistoryItem) []model.GetConversationHistoryItem {
+	if len(items) == 0 {
+		return items
+	}
+	groupTotals := make(map[string]int)
+	for _, item := range items {
+		if item.RetryGroupID == nil || item.RetryIndex == nil {
+			continue
+		}
+		groupID := strings.TrimSpace(*item.RetryGroupID)
+		if groupID == "" {
+			continue
+		}
+		if *item.RetryIndex > groupTotals[groupID] {
+			groupTotals[groupID] = *item.RetryIndex
+		}
+	}
+	for idx := range items {
+		groupIDPtr := items[idx].RetryGroupID
+		if groupIDPtr == nil {
+			continue
+		}
+		groupID := strings.TrimSpace(*groupIDPtr)
+		total := groupTotals[groupID]
+		if total <= 0 {
+			continue
+		}
+		totalCopy := total
+		items[idx].RetryTotal = &totalCopy
+	}
 	return items
 }
 
+func cloneConversationStringPointer(src *string) *string {
+	if src == nil {
+		return nil
+	}
+	text := strings.TrimSpace(*src)
+	if text == "" {
+		return nil
+	}
+	return &text
+}
+
+func cloneConversationIntPointer(src *int) *int {
+	if src == nil || *src <= 0 {
+		return nil
+	}
+	value := *src
+	return &value
+}
+
 func normalizeConversationHistoryRole(role string) string {
 	switch strings.ToLower(strings.TrimSpace(role)) {
 	case "user":
@@ -128,3 +272,12 @@ func normalizeConversationHistoryRole(role string) string {
 		return "system"
 	}
 }
+
+func cacheConversationHistoryHasRetryMetadata(messages []*schema.Message) bool {
+	for _, msg := range messages {
+		if extractConversationRetryGroupID(msg) != nil {
+			return true
+		}
+	}
+	return false
+}
diff --git a/backend/service/agentsvc/agent_quick_note.go b/backend/service/agentsvc/agent_quick_note.go
index e5cd608..dda56d6 100644
--- a/backend/service/agentsvc/agent_quick_note.go
+++ b/backend/service/agentsvc/agent_quick_note.go
@@ -34,6 +34,8 @@ type quickNoteProgressEmitter struct {
 	requestID  string
 	created    int64
 	enablePush bool
+	reasoning  strings.Builder
+	startedAt  *time.Time
 }
 
 // newQuickNoteProgressEmitter 构造“阶段进度推送器”。
@@ -69,6 +71,23 @@ func (e *quickNoteProgressEmitter) Emit(stage, detail string) {
 	if stage == "" && detail == "" {
 		return
 	}
+	if e.startedAt == nil {
+		now := time.Now()
+		e.startedAt = &now
+	}
+	if e.reasoning.Len() > 0 {
+		e.reasoning.WriteString("\n\n")
+	}
+	if stage != "" {
+		e.reasoning.WriteString("阶段：")
+		e.reasoning.WriteString(stage)
+	}
+	if detail != "" {
+		if stage != "" {
+			e.reasoning.WriteString("\n")
+		}
+		e.reasoning.WriteString(detail)
+	}
 
 	// 3. 调用目的：阶段提示统一走 agent2/stream 的 reasoning chunk 包装，
 	//    避免 service 层继续自己拼 OpenAI 兼容 JSON。
@@ -83,6 +102,31 @@ func (e *quickNoteProgressEmitter) Emit(stage, detail string) {
 	}
 }
 
+func (e *quickNoteProgressEmitter) HistoryText() string {
+	if e == nil {
+		return ""
+	}
+	return strings.TrimSpace(e.reasoning.String())
+}
+
+func (e *quickNoteProgressEmitter) StartedAt() *time.Time {
+	if e == nil || e.startedAt == nil {
+		return nil
+	}
+	startCopy := *e.startedAt
+	return &startCopy
+}
+
+func (e *quickNoteProgressEmitter) DurationSeconds(end time.Time) int {
+	if e == nil || e.startedAt == nil {
+		return 0
+	}
+	if !end.After(*e.startedAt) {
+		return 0
+	}
+	return int(end.Sub(*e.startedAt) / time.Second)
+}
+
 // tryHandleQuickNoteWithGraph 尝试用“随口记 graph”处理本次用户输入。
 // 返回值语义：
 // 1) handled=true：本次请求已在随口记链路处理完成（成功/失败都会返回文案）；
@@ -268,39 +312,70 @@ func (s *AgentService) persistChatAfterReply(
 	chatID string,
 	userMessage string,
 	assistantReply string,
+	assistantReasoning string,
+	assistantReasoningDurationSeconds int,
+	retryMeta *chatRetryMeta,
 	userTokens int,
 	assistantTokens int,
 	errChan chan error,
 ) {
 	// 1. 先把用户消息写入 Redis，保证会话上下文“马上可见”。
-	if err := s.agentCache.PushMessage(ctx, chatID, &schema.Message{Role: schema.User, Content: userMessage}); err != nil {
+	userMsg := &schema.Message{Role: schema.User, Content: userMessage}
+	if retryExtra := retryMeta.CacheExtra(); len(retryExtra) > 0 {
+		userMsg.Extra = retryExtra
+	}
+	if err := s.agentCache.PushMessage(ctx, chatID, userMsg); err != nil {
 		log.Printf("写入用户消息到 Redis 失败: %v", err)
 	}
 
 	// 2. 再把用户消息写入可靠持久化通道（outbox 或同步 DB）。
 	if err := s.PersistChatHistory(ctx, model.ChatHistoryPersistPayload{
-		UserID:         userID,
-		ConversationID: chatID,
-		Role:           "user",
-		Message:        userMessage,
-		TokensConsumed: userTokens,
+		UserID:                      userID,
+		ConversationID:              chatID,
+		Role:                        "user",
+		Message:                     userMessage,
+		ReasoningContent:            "",
+		ReasoningDurationSeconds:    0,
+		RetryGroupID:                retryMeta.GroupIDPtr(),
+		RetryIndex:                  retryMeta.IndexPtr(),
+		RetryFromUserMessageID:      retryMeta.FromUserMessageIDPtr(),
+		RetryFromAssistantMessageID: retryMeta.FromAssistantMessageIDPtr(),
+		TokensConsumed:              userTokens,
 	}); err != nil {
 		pushErrNonBlocking(errChan, err)
 		return
 	}
 
 	// 3. 助手消息同样遵循“Redis 先行 + 可靠持久化补齐”策略。
-	if err := s.agentCache.PushMessage(context.Background(), chatID, &schema.Message{Role: schema.Assistant, Content: assistantReply}); err != nil {
+	assistantMsg := &schema.Message{Role: schema.Assistant, Content: assistantReply, ReasoningContent: assistantReasoning}
+	if assistantReasoningDurationSeconds > 0 {
+		assistantMsg.Extra = map[string]any{"reasoning_duration_seconds": assistantReasoningDurationSeconds}
+	}
+	if retryExtra := retryMeta.CacheExtra(); len(retryExtra) > 0 {
+		if assistantMsg.Extra == nil {
+			assistantMsg.Extra = make(map[string]any, len(retryExtra))
+		}
+		for key, value := range retryExtra {
+			assistantMsg.Extra[key] = value
+		}
+	}
+	if err := s.agentCache.PushMessage(context.Background(), chatID, assistantMsg); err != nil {
 		log.Printf("写入助手消息到 Redis 失败: %v", err)
 	}
 
 	// 4. 助手消息持久化失败不阻断主流程，通过 errChan 异步上报。
 	if err := s.PersistChatHistory(context.Background(), model.ChatHistoryPersistPayload{
-		UserID:         userID,
-		ConversationID: chatID,
-		Role:           "assistant",
-		Message:        assistantReply,
-		TokensConsumed: assistantTokens,
+		UserID:                      userID,
+		ConversationID:              chatID,
+		Role:                        "assistant",
+		Message:                     assistantReply,
+		ReasoningContent:            assistantReasoning,
+		ReasoningDurationSeconds:    assistantReasoningDurationSeconds,
+		RetryGroupID:                retryMeta.GroupIDPtr(),
+		RetryIndex:                  retryMeta.IndexPtr(),
+		RetryFromUserMessageID:      retryMeta.FromUserMessageIDPtr(),
+		RetryFromAssistantMessageID: retryMeta.FromAssistantMessageIDPtr(),
+		TokensConsumed:              assistantTokens,
 	}); err != nil {
 		pushErrNonBlocking(errChan, err)
 	}
diff --git a/backend/service/events/chat_history_persist.go b/backend/service/events/chat_history_persist.go
index 2964923..87fc89c 100644
--- a/backend/service/events/chat_history_persist.go
+++ b/backend/service/events/chat_history_persist.go
@@ -68,6 +68,12 @@ func RegisterChatHistoryPersistHandler(
 				payload.ConversationID,
 				payload.Role,
 				payload.Message,
+				payload.ReasoningContent,
+				payload.ReasoningDurationSeconds,
+				payload.RetryGroupID,
+				payload.RetryIndex,
+				payload.RetryFromUserMessageID,
+				payload.RetryFromAssistantMessageID,
 				payload.TokensConsumed,
 			)
 		})
diff --git a/frontend/src/api/agent.ts b/frontend/src/api/agent.ts
index 904aa9b..599df33 100644
--- a/frontend/src/api/agent.ts
+++ b/frontend/src/api/agent.ts
@@ -10,7 +10,11 @@ export interface ConversationHistoryMessage {
   role: 'user' | 'assistant' | 'system'
   content: string
   created_at?: string | null
-  reasoning_content?: string
+  reasoning_content?: string | null
+  reasoning_duration_seconds?: number | null
+  retry_group_id?: string | null
+  retry_index?: number | null
+  retry_total?: number | null
 }
 
 export interface ConversationListQuery {
@@ -19,9 +23,46 @@ export interface ConversationListQuery {
   status?: 'active' | 'archived'
 }
 
+function normalizeConversationHistoryMessage(raw: unknown): ConversationHistoryMessage | null {
+  if (!raw || typeof raw !== 'object') {
+    return null
+  }
+
+  const candidate = raw as Record<string, unknown>
+  const role = candidate.role
+  const content = candidate.content
+
+  if ((role !== 'user' && role !== 'assistant' && role !== 'system') || typeof content !== 'string') {
+    return null
+  }
+
+  // 1. 按 openapi 优先读取 reasoning_content，兼容后端历史接口新增的思考存储字段。
+  // 2. 若后端灰度期间仍返回 legacy reasoning 字段，这里也做一次前端兜底兼容。
+  // 3. 统一归一化成 string/null，避免页面层反复做类型分支判断。
+  const normalizedReasoning =
+    typeof candidate.reasoning_content === 'string'
+      ? candidate.reasoning_content
+      : typeof candidate.reasoning === 'string'
+        ? candidate.reasoning
+        : null
+
+  return {
+    id: typeof candidate.id === 'string' || typeof candidate.id === 'number' ? candidate.id : undefined,
+    role,
+    content,
+    created_at: typeof candidate.created_at === 'string' ? candidate.created_at : null,
+    reasoning_content: normalizedReasoning,
+    reasoning_duration_seconds:
+      typeof candidate.reasoning_duration_seconds === 'number' ? candidate.reasoning_duration_seconds : null,
+    retry_group_id: typeof candidate.retry_group_id === 'string' ? candidate.retry_group_id : null,
+    retry_index: typeof candidate.retry_index === 'number' ? candidate.retry_index : null,
+    retry_total: typeof candidate.retry_total === 'number' ? candidate.retry_total : null,
+  }
+}
+
 // getConversationList 负责按 openapi 约定读取会话列表分页。
 // 职责边界：
-// 1. 负责把前端分页参数映射为后端要求的 page/limit。
+// 1. 负责把前端分页参数映射为后端要求的 page/page_size。
 // 2. 不负责前端滚动懒加载时的合并、去重和选中逻辑。
 // 3. 接口失败时统一抛出中文错误，便于页面层直接提示。
 export async function getConversationList(options: ConversationListQuery = {}) {
@@ -31,6 +72,7 @@ export async function getConversationList(options: ConversationListQuery = {}) {
     const response = await http.get<ApiResponse<ConversationListResponse>>('/agent/conversation-list', {
       params: {
         page,
+        page_size: pageSize,
         limit: pageSize,
         status,
       },
@@ -61,7 +103,9 @@ export async function getConversationHistory(conversationId: string) {
         conversation_id: conversationId,
       },
     })
-    return response.data.data ?? []
+    return (response.data.data ?? [])
+      .map(normalizeConversationHistoryMessage)
+      .filter((message): message is ConversationHistoryMessage => Boolean(message))
   } catch (error) {
     throw new Error(extractErrorMessage(error, '会话消息加载失败，请稍后重试'))
   }
diff --git a/frontend/src/components/dashboard/AssistantPanel.vue b/frontend/src/components/dashboard/AssistantPanel.vue
index bb450ab..df2e249 100644
--- a/frontend/src/components/dashboard/AssistantPanel.vue
+++ b/frontend/src/components/dashboard/AssistantPanel.vue
@@ -50,6 +50,14 @@ interface ConversationGroup {
   items: ConversationListItem[]
 }
 
+interface RetryPageGroup {
+  groupId: string
+  total: number
+  latestIndex: number
+  visibleIndex: number
+  pages: Map<number, { user?: AssistantMessage; assistant?: AssistantMessage }>
+}
+
 const props = withDefaults(
   defineProps<{
     initialHistoryWidth?: number
@@ -76,6 +84,9 @@ const thinkingEnabled = ref(false)
 const messageInput = ref('')
 const historyPanelWidth = ref(props.initialHistoryWidth)
 const activeStreamingMessageId = ref('')
+const editingUserMessageId = ref('')
+const editingUserMessageDraft = ref('')
+const retryVisiblePageMap = reactive<Record<string, number>>({})
 
 const conversationPage = ref(1)
 const conversationPageSize = 12
@@ -101,10 +112,12 @@ const quickActions = [
 const MODEL_PREFERENCE_STORAGE_KEY = 'smartflow.assistant.model.byConversation.v1'
 
 let messageScrollRaf = 0
+let messageScrollReleaseRaf = 0
 let reasoningTicker = 0
 const reasoningDisplayNow = ref(Date.now())
 const shouldAutoFollowMessages = ref(true)
-const messageBottomTolerancePx = 6
+const messageBottomTolerancePx = 24
+const isProgrammaticMessageScroll = ref(false)
 
 const isStandaloneMode = computed(() => props.viewMode === 'standalone')
 
@@ -122,13 +135,84 @@ const selectedConversation = computed(() =>
   conversationList.value.find((item) => item.conversation_id === selectedConversationId.value),
 )
 
-const selectedMessages = computed(() => {
+const rawSelectedMessages = computed(() => {
   if (!selectedConversationId.value) {
     return []
   }
   return conversationMessagesMap[selectedConversationId.value] ?? []
 })
 
+const retryPageGroups = computed<Map<string, RetryPageGroup>>(() => {
+  const grouped = new Map<string, RetryPageGroup>()
+
+  for (const message of rawSelectedMessages.value) {
+    if (!message.retryGroupId || !message.retryIndex || !message.retryTotal || message.retryTotal <= 1) {
+      continue
+    }
+
+    const existed = grouped.get(message.retryGroupId) ?? {
+      groupId: message.retryGroupId,
+      total: message.retryTotal,
+      latestIndex: message.retryIndex,
+      visibleIndex: retryVisiblePageMap[message.retryGroupId] ?? message.retryTotal,
+      pages: new Map<number, { user?: AssistantMessage; assistant?: AssistantMessage }>(),
+    }
+
+    existed.total = Math.max(existed.total, message.retryTotal)
+    existed.latestIndex = Math.max(existed.latestIndex, message.retryIndex)
+    existed.visibleIndex = retryVisiblePageMap[message.retryGroupId] ?? existed.latestIndex
+
+    const page = existed.pages.get(message.retryIndex) ?? {}
+    if (message.role === 'user') {
+      page.user = message
+    }
+    if (message.role === 'assistant') {
+      page.assistant = message
+    }
+    existed.pages.set(message.retryIndex, page)
+    grouped.set(message.retryGroupId, existed)
+  }
+
+  return grouped
+})
+
+const selectedMessages = computed(() => {
+  const visible: AssistantMessage[] = []
+  const insertedRetryGroups = new Set<string>()
+
+  for (const message of rawSelectedMessages.value) {
+    if (!message.retryGroupId) {
+      visible.push(message)
+      continue
+    }
+
+    const retryGroup = retryPageGroups.value.get(message.retryGroupId)
+    if (!retryGroup || retryGroup.total <= 1 || !message.retryIndex) {
+      visible.push(message)
+      continue
+    }
+
+    if (insertedRetryGroups.has(message.retryGroupId)) {
+      continue
+    }
+
+    insertedRetryGroups.add(message.retryGroupId)
+    const nextPage =
+      retryGroup.pages.get(retryGroup.visibleIndex) ??
+      retryGroup.pages.get(retryGroup.latestIndex) ??
+      retryGroup.pages.get(1)
+
+    if (nextPage?.user) {
+      visible.push(nextPage.user)
+    }
+    if (nextPage?.assistant) {
+      visible.push(nextPage.assistant)
+    }
+  }
+
+  return visible
+})
+
 function resolveConversationGroupLabel(timeText?: string | null) {
   if (!timeText) {
     return '更早'
@@ -207,7 +291,7 @@ const selectedConversationSubtitle = computed(() => {
 
   const meta = conversationMetaMap[selectedConversationId.value]
   const current = selectedConversation.value
-  const messageCount = meta?.message_count ?? current?.message_count ?? selectedMessages.value.length
+  const messageCount = meta?.message_count ?? current?.message_count ?? rawSelectedMessages.value.length
   const lastMessageAt = meta?.last_message_at ?? current?.last_message_at
   return `消息 ${messageCount} 条 · 最近更新 ${formatConversationTime(lastMessageAt)}`
 })
@@ -219,7 +303,7 @@ const shouldShowHistoryFallback = computed(() => {
 
   return (
     unavailableHistoryMap[selectedConversationId.value] === true &&
-    selectedMessages.value.length === 0 &&
+    rawSelectedMessages.value.length === 0 &&
     (selectedConversation.value?.message_count ?? 0) > 0
   )
 })
@@ -446,19 +530,86 @@ function prependConversationPreview(conversationId: string, previewText: string,
 
 function normalizeHistoryMessage(message: ConversationHistoryMessage, index: number): AssistantMessage {
   const id = `${message.id ?? `${message.role}-${index}`}`
+  const reasoningText = typeof message.reasoning_content === 'string' ? message.reasoning_content : ''
   const normalized: AssistantMessage = {
     id,
     role: message.role,
     content: message.content,
     createdAt: message.created_at ?? new Date().toISOString(),
-    reasoning: message.reasoning_content,
+    reasoning: reasoningText || undefined,
+    retryGroupId: typeof message.retry_group_id === 'string' ? message.retry_group_id : undefined,
+    retryIndex: typeof message.retry_index === 'number' ? message.retry_index : undefined,
+    retryTotal: typeof message.retry_total === 'number' ? message.retry_total : undefined,
+  }
+
+  // 1. 历史消息优先使用后端持久化的思考时长，避免刷新后重新按“当前时间 - 创建时间”误算。
+  // 2. 若后端当前未返回有效时长，则清掉旧缓存，回退为“仅展示已思考文案”。
+  // 3. 同时清理 startedAt，防止历史消息误进入前端实时计时分支。
+  delete reasoningStartedAtMap[id]
+  if (typeof message.reasoning_duration_seconds === 'number' && message.reasoning_duration_seconds > 0) {
+    reasoningDurationMap[id] = Math.max(1, Math.round(message.reasoning_duration_seconds))
+  } else {
+    delete reasoningDurationMap[id]
   }
 
   thinkingMessageMap[id] = false
-  reasoningCollapsedMap[id] = Boolean(message.reasoning_content?.trim())
+  reasoningCollapsedMap[id] = Boolean(reasoningText.trim())
   return normalized
 }
 
+function resolveMessageTimestamp(message: AssistantMessage) {
+  const parsed = Date.parse(message.createdAt)
+  return Number.isFinite(parsed) ? parsed : 0
+}
+
+function isSameLogicalMessage(left: AssistantMessage, right: AssistantMessage) {
+  return (
+    left.role === right.role &&
+    left.content === right.content &&
+    (left.reasoning || '') === (right.reasoning || '') &&
+    (left.retryGroupId || '') === (right.retryGroupId || '') &&
+    (left.retryIndex || 0) === (right.retryIndex || 0)
+  )
+}
+
+function mergeServerHistoryWithLocalState(
+  conversationId: string,
+  history: ConversationHistoryMessage[],
+) {
+  const existingBucket = conversationMessagesMap[conversationId] ?? []
+  const normalizedHistory = history.map(normalizeHistoryMessage)
+  const existingById = new Map(existingBucket.map((message) => [message.id, message]))
+
+  const mergedHistory = normalizedHistory.map((serverMessage) => {
+    const localMessage = existingById.get(serverMessage.id)
+    if (!localMessage) {
+      return serverMessage
+    }
+
+    return {
+      ...serverMessage,
+      retryGroupId: serverMessage.retryGroupId ?? localMessage.retryGroupId,
+      retryIndex: serverMessage.retryIndex ?? localMessage.retryIndex,
+      retryTotal: serverMessage.retryTotal ?? localMessage.retryTotal,
+    }
+  })
+
+  const mergedIds = new Set(mergedHistory.map((message) => message.id))
+  const optimisticMessages = existingBucket.filter((message) => {
+    if (mergedIds.has(message.id)) {
+      return false
+    }
+
+    if (!isLocalEphemeralMessageId(message.id)) {
+      return true
+    }
+
+    return !mergedHistory.some((serverMessage) => isSameLogicalMessage(serverMessage, message))
+  })
+
+  return [...mergedHistory, ...optimisticMessages].sort((left, right) => resolveMessageTimestamp(left) - resolveMessageTimestamp(right))
+}
+
 function renderMessageMarkdown(content: string) {
   return renderMarkdown(content)
 }
@@ -467,10 +618,187 @@ function isStreamingMessage(message: AssistantMessage) {
   return message.id === activeStreamingMessageId.value
 }
 
+function isEditingUserMessage(messageId: string) {
+  return editingUserMessageId.value === messageId
+}
+
 function isThinkingMessage(message: AssistantMessage) {
   return thinkingMessageMap[message.id] === true
 }
 
+function findMessageIndex(messageId: string) {
+  return selectedMessages.value.findIndex((message) => message.id === messageId)
+}
+
+function isLatestAssistantMessage(messageId: string) {
+  const lastAssistant = [...selectedMessages.value].reverse().find((message) => message.role === 'assistant')
+  return lastAssistant?.id === messageId
+}
+
+function resolveRetryPageGroup(message: AssistantMessage) {
+  if (!message.retryGroupId) {
+    return null
+  }
+  return retryPageGroups.value.get(message.retryGroupId) ?? null
+}
+
+function shouldShowRetryPager(message: AssistantMessage) {
+  if (message.role !== 'assistant') {
+    return false
+  }
+
+  const retryGroup = resolveRetryPageGroup(message)
+  return Boolean(retryGroup && retryGroup.total > 1)
+}
+
+function changeRetryPage(message: AssistantMessage, delta: number) {
+  const retryGroup = resolveRetryPageGroup(message)
+  if (!retryGroup) {
+    return
+  }
+
+  const nextPage = Math.min(Math.max(1, retryGroup.visibleIndex + delta), retryGroup.total)
+  if (nextPage === retryGroup.visibleIndex) {
+    return
+  }
+
+  retryVisiblePageMap[retryGroup.groupId] = nextPage
+}
+
+function resolveVisibleUserMessageBeforeAssistant(messageId: string) {
+  const index = findMessageIndex(messageId)
+  if (index <= 0) {
+    return null
+  }
+
+  for (let current = index - 1; current >= 0; current -= 1) {
+    const candidate = selectedMessages.value[current]
+    if (candidate?.role === 'user') {
+      return candidate
+    }
+  }
+
+  return null
+}
+
+function isLocalEphemeralMessageId(id: string) {
+  return /^(user|assistant|system)-\d{13}-[a-z0-9]+$/i.test(id)
+}
+
+function resolvePersistedMessageId(message: AssistantMessage | null) {
+  if (!message) {
+    return null
+  }
+
+  if (isLocalEphemeralMessageId(message.id)) {
+    return null
+  }
+
+  if (/^\d+$/.test(message.id)) {
+    return Number(message.id)
+  }
+
+  return message.id
+}
+
+function createRetryGroupId() {
+  if (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function') {
+    return `retry-${crypto.randomUUID()}`
+  }
+
+  return `retry-${Date.now()}-${Math.random().toString(16).slice(2)}`
+}
+
+function applyRetryGroupToExistingMessages(groupId: string, total: number, userMessageId: string, assistantMessageId: string) {
+  const conversationId = selectedConversationId.value
+  if (!conversationId) {
+    return
+  }
+
+  const bucket = conversationMessagesMap[conversationId] ?? []
+  for (const message of bucket) {
+    if (message.id === userMessageId || message.id === assistantMessageId || message.retryGroupId === groupId) {
+      message.retryGroupId = groupId
+      message.retryTotal = total
+      if (message.id === userMessageId || (message.retryGroupId === groupId && message.role === 'user' && !message.retryIndex)) {
+        message.retryIndex = 1
+      }
+      if (message.id === assistantMessageId || (message.retryGroupId === groupId && message.role === 'assistant' && !message.retryIndex)) {
+        message.retryIndex = 1
+      }
+    }
+  }
+
+  retryVisiblePageMap[groupId] = total
+}
+
+function resolvePromptBeforeAssistantMessage(messageId: string) {
+  const index = findMessageIndex(messageId)
+  if (index <= 0) {
+    return ''
+  }
+
+  for (let current = index - 1; current >= 0; current -= 1) {
+    const candidate = selectedMessages.value[current]
+    if (candidate?.role === 'user' && candidate.content.trim()) {
+      return candidate.content
+    }
+  }
+
+  return ''
+}
+
+async function copyText(text: string, successMessage: string) {
+  try {
+    if (navigator?.clipboard?.writeText) {
+      await navigator.clipboard.writeText(text)
+    } else {
+      const textarea = document.createElement('textarea')
+      textarea.value = text
+      textarea.style.position = 'fixed'
+      textarea.style.opacity = '0'
+      document.body.appendChild(textarea)
+      textarea.focus()
+      textarea.select()
+      document.execCommand('copy')
+      document.body.removeChild(textarea)
+    }
+
+    ElMessage.success(successMessage)
+  } catch {
+    ElMessage.error('复制失败，请稍后重试')
+  }
+}
+
+function startEditUserMessage(message: AssistantMessage) {
+  editingUserMessageId.value = message.id
+  editingUserMessageDraft.value = message.content
+}
+
+function cancelEditUserMessage() {
+  editingUserMessageId.value = ''
+  editingUserMessageDraft.value = ''
+}
+
+function submitEditedUserMessage(message: AssistantMessage) {
+  const nextContent = editingUserMessageDraft.value.trim()
+  if (!nextContent) {
+    ElMessage.warning('消息内容不能为空')
+    return
+  }
+
+  if (chatLoading.value) {
+    ElMessage.info('当前正在生成回复，请稍后再发送修改后的消息')
+    return
+  }
+
+  // 1. “修改消息”当前按产品定义等价于“复制原消息到输入区后，编辑并重新发送一条新消息”。
+  // 2. 因此这里不改写历史里的旧消息，只关闭编辑态并走现有 sendMessage 主链路。
+  // 3. 这样无需后端新增接口，也能和普通发送保持完全一致的会话语义。
+  cancelEditUserMessage()
+  void sendMessage(nextContent)
+}
+
 function markReasoningStart(message: AssistantMessage) {
   if (reasoningStartedAtMap[message.id]) {
     return
@@ -560,6 +888,11 @@ function handleMessageViewportScroll(event: Event) {
     return
   }
 
+  if (isProgrammaticMessageScroll.value) {
+    shouldAutoFollowMessages.value = true
+    return
+  }
+
   // 1. 若滚动到底部（最后一行完整露出），恢复自动跟随。
   // 2. 只要离底部有距离，就维持“手动阅读模式”，防止流式输出打断阅读。
   // 3. 该状态会影响后续 scheduleScrollMessagesToBottom，形成可控的跟随策略。
@@ -578,6 +911,9 @@ function scheduleScrollMessagesToBottom(smooth = false, force = false) {
   if (messageScrollRaf) {
     cancelAnimationFrame(messageScrollRaf)
   }
+  if (messageScrollReleaseRaf) {
+    cancelAnimationFrame(messageScrollReleaseRaf)
+  }
 
   messageScrollRaf = window.requestAnimationFrame(() => {
     if (!force && !shouldAutoFollowMessages.value) {
@@ -591,11 +927,26 @@ function scheduleScrollMessagesToBottom(smooth = false, force = false) {
       return
     }
 
+    // 1. 先标记为程序触发滚动，避免 scroll 事件把自动跟随错误关闭。
+    // 2. 采用双 requestAnimationFrame，等待本轮文本增量和布局波动稳定后再落到底部。
+    // 3. 下一帧统一释放程序滚动标记，恢复用户主动滚动的判断能力。
+    isProgrammaticMessageScroll.value = true
     viewport.scrollTo({
       top: viewport.scrollHeight,
       behavior: smooth ? 'smooth' : 'auto',
     })
-    messageScrollRaf = 0
+    messageScrollRaf = window.requestAnimationFrame(() => {
+      viewport.scrollTo({
+        top: viewport.scrollHeight,
+        behavior: 'auto',
+      })
+      messageScrollRaf = 0
+      messageScrollReleaseRaf = window.requestAnimationFrame(() => {
+        isProgrammaticMessageScroll.value = false
+        shouldAutoFollowMessages.value = isMessageViewportAtBottom(viewport)
+        messageScrollReleaseRaf = 0
+      })
+    })
   })
 }
 
@@ -698,18 +1049,18 @@ function toggleHistoryPanel() {
   historyExpanded.value = !historyExpanded.value
 }
 
-async function loadConversationMessages(conversationId: string) {
+async function loadConversationMessages(conversationId: string, forceReload = false) {
   if (!conversationId) {
     return
   }
 
-  if (conversationMessagesMap[conversationId] && unavailableHistoryMap[conversationId] !== true) {
+  if (!forceReload && conversationMessagesMap[conversationId] && unavailableHistoryMap[conversationId] !== true) {
     return
   }
 
   try {
     const history = await getConversationHistory(conversationId)
-    conversationMessagesMap[conversationId] = history.map(normalizeHistoryMessage)
+    conversationMessagesMap[conversationId] = mergeServerHistoryWithLocalState(conversationId, history)
     unavailableHistoryMap[conversationId] = false
   } catch {
     unavailableHistoryMap[conversationId] = true
@@ -733,6 +1084,7 @@ async function ensureConversationMeta(conversationId: string) {
 }
 
 async function selectConversation(conversationId: string) {
+  cancelEditUserMessage()
   selectedConversationId.value = conversationId
   applyPreferredModelForConversation(conversationId)
   await Promise.allSettled([loadConversationMessages(conversationId), ensureConversationMeta(conversationId)])
@@ -740,6 +1092,7 @@ async function selectConversation(conversationId: string) {
 }
 
 function startNewConversation() {
+  cancelEditUserMessage()
   selectedConversationId.value = ''
   messageInput.value = ''
   activeStreamingMessageId.value = ''
@@ -793,6 +1146,16 @@ async function fetchChatStream(body: ChatStreamRequest, attempt = 0): Promise<Re
   return response
 }
 
+function prepareAssistantMessageForStreaming(message: AssistantMessage, createdAt: string) {
+  message.content = ''
+  message.reasoning = ''
+  message.createdAt = createdAt
+  thinkingMessageMap[message.id] = thinkingEnabled.value
+  reasoningCollapsedMap[message.id] = false
+  delete reasoningStartedAtMap[message.id]
+  delete reasoningDurationMap[message.id]
+}
+
 // processSseBlock 负责解析单个 SSE block，并把增量内容落到当前 assistant message 上。
 // 职责边界：
 // 1. 会把同一个 block 里的多行 data: 合并后再解析，兼容标准 SSE 多行数据格式。
@@ -869,6 +1232,81 @@ function processSseBlock(block: string, assistantMessage: AssistantMessage) {
   scheduleScrollMessagesToBottom(false)
 }
 
+async function streamAssistantReply(
+  draftConversationId: string,
+  text: string,
+  assistantMessage: AssistantMessage,
+  createdAt: string,
+  refreshPreview: boolean,
+  retryExtra?: {
+    retryGroupId: string
+    retryFromUserMessageId: string | number
+    retryFromAssistantMessageId: string | number
+  },
+) : Promise<string> {
+  const response = await fetchChatStream({
+    conversation_id: isDraftConversationId(draftConversationId) ? undefined : draftConversationId,
+    message: text,
+    model: selectedModel.value,
+    thinking: thinkingEnabled.value,
+    extra: retryExtra
+      ? {
+          request_mode: 'retry',
+          retry_group_id: retryExtra.retryGroupId,
+          retry_from_user_message_id: retryExtra.retryFromUserMessageId,
+          retry_from_assistant_message_id: retryExtra.retryFromAssistantMessageId,
+        }
+      : undefined,
+  })
+
+  const responseConversationId = response.headers.get('X-Conversation-ID')?.trim()
+  const actualConversationId = responseConversationId || draftConversationId
+
+  if (actualConversationId !== draftConversationId) {
+    migrateConversationState(draftConversationId, actualConversationId)
+    if (refreshPreview) {
+      prependConversationPreview(actualConversationId, text, createdAt)
+    }
+  }
+
+  const reader = response.body!.getReader()
+  const decoder = new TextDecoder('utf-8')
+  let buffer = ''
+
+  while (true) {
+    const { done, value } = await reader.read()
+    if (done) {
+      break
+    }
+
+    buffer += decoder.decode(value, { stream: true })
+    const blocks = buffer.split(/\r?\n\r?\n/)
+    buffer = blocks.pop() ?? ''
+
+    for (const block of blocks) {
+      processSseBlock(block, assistantMessage)
+    }
+  }
+
+  buffer += decoder.decode()
+  if (buffer.trim()) {
+    processSseBlock(buffer, assistantMessage)
+  }
+
+  if (!assistantMessage.content.trim()) {
+    assistantMessage.content = assistantMessage.reasoning?.trim()
+      ? '已完成深度思考，但当前响应未返回正文内容。'
+      : '暂未收到回复正文，请稍后重试。'
+  }
+
+  if (refreshPreview) {
+    await loadConversationListData(true)
+    await ensureConversationMeta(actualConversationId)
+  }
+
+  return actualConversationId
+}
+
 // sendMessage 负责执行“本地先上屏，再异步接流”的发送链路。
 // 职责边界：
 // 1. 先创建用户消息和 assistant 占位消息，让发送动作立即反馈到界面，等待建连过程无感化。
@@ -916,53 +1354,8 @@ async function sendMessage(preset?: string) {
   scheduleScrollMessagesToBottom(false, true)
 
   try {
-    const response = await fetchChatStream({
-      conversation_id: isDraftConversationId(draftConversationId) ? undefined : draftConversationId,
-      message: text,
-      model: selectedModel.value,
-      thinking: thinkingEnabled.value,
-    })
-
-    const responseConversationId = response.headers.get('X-Conversation-ID')?.trim()
-    const actualConversationId = responseConversationId || draftConversationId
-
-    if (actualConversationId !== draftConversationId) {
-      migrateConversationState(draftConversationId, actualConversationId)
-      prependConversationPreview(actualConversationId, text, now)
-    }
-
-    const reader = response.body!.getReader()
-    const decoder = new TextDecoder('utf-8')
-    let buffer = ''
-
-    while (true) {
-      const { done, value } = await reader.read()
-      if (done) {
-        break
-      }
-
-      buffer += decoder.decode(value, { stream: true })
-      const blocks = buffer.split(/\r?\n\r?\n/)
-      buffer = blocks.pop() ?? ''
-
-      for (const block of blocks) {
-        processSseBlock(block, assistantMessage)
-      }
-    }
-
-    buffer += decoder.decode()
-    if (buffer.trim()) {
-      processSseBlock(buffer, assistantMessage)
-    }
-
-    if (!assistantMessage.content.trim()) {
-      assistantMessage.content = assistantMessage.reasoning?.trim()
-        ? '已完成深度思考，但当前响应未返回正文内容。'
-        : '暂未收到回复正文，请稍后重试。'
-    }
-
-    await loadConversationListData(true)
-    await ensureConversationMeta(actualConversationId)
+    const actualConversationId = await streamAssistantReply(draftConversationId, text, assistantMessage, now, true)
+    await loadConversationMessages(actualConversationId, true)
   } catch (error) {
     if (!assistantMessage.content.trim()) {
       assistantMessage.content = '本次回复已中断，请稍后重试。'
@@ -975,6 +1368,85 @@ async function sendMessage(preset?: string) {
   }
 }
 
+async function regenerateAssistantMessage(message: AssistantMessage) {
+  if (chatLoading.value) {
+    return
+  }
+
+  const sourceUserMessage = resolveVisibleUserMessageBeforeAssistant(message.id)
+  const text = sourceUserMessage?.content.trim() || ''
+  const conversationId = selectedConversationId.value
+  const persistedUserMessageId = resolvePersistedMessageId(sourceUserMessage)
+  const persistedAssistantMessageId = resolvePersistedMessageId(message)
+  if (!text || !conversationId || !sourceUserMessage) {
+    ElMessage.warning('没有找到可用于重试的用户消息')
+    return
+  }
+
+  if (!persistedUserMessageId) {
+    ElMessage.info('当前消息仍在本地态，稍后刷新完成后再试重试')
+    return
+  }
+
+  if (!persistedAssistantMessageId) {
+    ElMessage.info('当前回复仍在本地态，稍后刷新完成后再试重试')
+    return
+  }
+
+  chatLoading.value = true
+  cancelEditUserMessage()
+
+  const retryGroup = resolveRetryPageGroup(message)
+  const retryGroupId = retryGroup?.groupId || createRetryGroupId()
+  const nextRetryIndex = (retryGroup?.total ?? 1) + 1
+  applyRetryGroupToExistingMessages(retryGroupId, nextRetryIndex, sourceUserMessage.id, message.id)
+
+  const now = new Date().toISOString()
+  appendConversationMessage(conversationId, {
+    id: createMessageId('user'),
+    role: 'user',
+    content: text,
+    createdAt: now,
+    retryGroupId,
+    retryIndex: nextRetryIndex,
+    retryTotal: nextRetryIndex,
+  })
+  const retryAssistantMessage = appendConversationMessage(conversationId, {
+    id: createMessageId('assistant'),
+    role: 'assistant',
+    content: '',
+    createdAt: now,
+    reasoning: '',
+    retryGroupId,
+    retryIndex: nextRetryIndex,
+    retryTotal: nextRetryIndex,
+  })
+
+  retryVisiblePageMap[retryGroupId] = nextRetryIndex
+  prependConversationPreview(conversationId, text, now)
+  prepareAssistantMessageForStreaming(retryAssistantMessage, now)
+  activeStreamingMessageId.value = retryAssistantMessage.id
+  scheduleScrollMessagesToBottom(false, true)
+
+  try {
+    const actualConversationId = await streamAssistantReply(conversationId, text, retryAssistantMessage, now, true, {
+      retryGroupId,
+      retryFromUserMessageId: persistedUserMessageId,
+      retryFromAssistantMessageId: persistedAssistantMessageId,
+    })
+    await loadConversationMessages(actualConversationId, true)
+  } catch (error) {
+    if (!retryAssistantMessage.content.trim()) {
+      retryAssistantMessage.content = '重新生成失败，请稍后重试。'
+    }
+    reasoningCollapsedMap[retryAssistantMessage.id] = false
+    ElMessage.error(error instanceof Error ? error.message : '重新生成失败，请稍后重试')
+  } finally {
+    activeStreamingMessageId.value = ''
+    chatLoading.value = false
+  }
+}
+
 watch(
   () => selectedMessages.value.length,
   () => {
@@ -1004,6 +1476,9 @@ onBeforeUnmount(() => {
   if (messageScrollRaf) {
     cancelAnimationFrame(messageScrollRaf)
   }
+  if (messageScrollReleaseRaf) {
+    cancelAnimationFrame(messageScrollReleaseRaf)
+  }
   if (reasoningTicker) {
     window.clearInterval(reasoningTicker)
     reasoningTicker = 0
@@ -1142,7 +1617,48 @@ onBeforeUnmount(() => {
           >
             <div v-if="message.role === 'user'" class="chat-message__user-row">
               <div class="chat-message__user-bubble">
-                <div class="chat-message__markdown" v-html="renderMessageMarkdown(message.content)" />
+                <template v-if="isEditingUserMessage(message.id)">
+                  <div class="chat-message__editor">
+                    <textarea
+                      v-model="editingUserMessageDraft"
+                      class="chat-message__editor-textarea"
+                      rows="3"
+                    />
+                    <div class="chat-message__editor-actions">
+                      <button type="button" class="chat-message__editor-button chat-message__editor-button--ghost" @click="cancelEditUserMessage()">
+                        取消
+                      </button>
+                      <button type="button" class="chat-message__editor-button chat-message__editor-button--primary" @click="submitEditedUserMessage(message)">
+                        发送
+                      </button>
+                    </div>
+                  </div>
+                </template>
+                <div v-else class="chat-message__markdown" v-html="renderMessageMarkdown(message.content)" />
+              </div>
+              <div v-if="!isEditingUserMessage(message.id)" class="chat-message__action-bar chat-message__action-bar--user">
+                <button
+                  type="button"
+                  class="chat-message__icon-button"
+                  aria-label="复制消息"
+                  @click="copyText(message.content, '已复制用户消息')"
+                >
+                  <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+                    <path d="M6.14923 4.02032C7.11191 4.02032 7.87977 4.02017 8.49591 4.07599C9.12122 4.1327 9.65786 4.25188 10.1414 4.53107C10.7201 4.8653 11.2008 5.34591 11.535 5.92462C11.8142 6.40818 11.9333 6.94482 11.9901 7.57013C12.0459 8.18625 12.0457 8.9542 12.0457 9.91681C12.0457 10.8795 12.0459 11.6474 11.9901 12.2635C11.9333 12.8888 11.8142 13.4254 11.535 13.909C11.2008 14.4877 10.7201 14.9683 10.1414 15.3026C9.65786 15.5817 9.12122 15.7009 8.49591 15.7576C7.87977 15.8134 7.1119 15.8133 6.14923 15.8133C5.18661 15.8133 4.41868 15.8134 3.80255 15.7576C3.17724 15.7009 2.6406 15.5817 2.15704 15.3026C1.57834 14.9684 1.09772 14.4877 0.763489 13.909C0.484305 13.4254 0.365123 12.8888 0.308411 12.2635C0.252587 11.6474 0.252747 10.8795 0.252747 9.91681C0.252747 8.95419 0.252603 8.18625 0.308411 7.57013C0.365123 6.94482 0.484305 6.40818 0.763489 5.92462C1.09771 5.3459 1.57833 4.86529 2.15704 4.53107C2.6406 4.25188 3.17724 4.1327 3.80255 4.07599C4.41868 4.02018 5.1866 4.02032 6.14923 4.02032Z" fill="currentColor" />
+                    <path d="M9.80157 0.367981C10.7637 0.367981 11.5313 0.367886 12.1473 0.423645C12.7725 0.480313 13.3093 0.598765 13.7928 0.877747C14.3716 1.21192 14.852 1.69355 15.1863 2.27228C15.4655 2.75575 15.5857 3.29165 15.6424 3.91681C15.6982 4.53301 15.6971 5.30161 15.6971 6.26447V7.8299C15.6971 8.29265 15.6989 8.58994 15.6649 8.84845C15.4667 10.3525 14.4009 11.5738 12.9832 11.9988V10.5467C13.6973 10.1903 14.2104 9.49662 14.3192 8.67169C14.3387 8.52348 14.3406 8.3358 14.3406 7.8299V6.26447C14.3406 5.27707 14.3398 4.58149 14.2908 4.04083C14.2427 3.50969 14.1526 3.19373 14.0125 2.95099C13.7974 2.5785 13.4875 2.2687 13.1151 2.05353C12.8723 1.91347 12.5563 1.82237 12.0252 1.77423C11.4846 1.72528 10.7888 1.7254 9.80157 1.7254H7.71466C6.75614 1.72559 5.92659 2.27697 5.52325 3.07892H4.07013C4.54215 1.51132 5.99314 0.368192 7.71466 0.367981H9.80157Z" fill="currentColor" />
+                  </svg>
+                </button>
+                <button
+                  type="button"
+                  class="chat-message__icon-button"
+                  aria-label="修改消息"
+                  @click="startEditUserMessage(message)"
+                >
+                  <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+                    <path d="M9.94073 1.34942C10.7047 0.902314 11.6503 0.902418 12.4143 1.34942C12.706 1.52016 12.9687 1.79118 13.3104 2.13284C13.652 2.47448 13.9231 2.73721 14.0938 3.02894C14.5408 3.79295 14.5409 4.73856 14.0938 5.50251C13.9231 5.79415 13.652 6.05704 13.3104 6.39861L6.65929 13.0497C6.28065 13.4284 6.00692 13.7108 5.6654 13.9097C5.32388 14.1085 4.94312 14.2074 4.42702 14.3498L3.24391 14.6761C2.77524 14.8054 2.34535 14.9262 2.00128 14.9684C1.65193 15.0112 1.17961 15.0013 0.810733 14.6325C0.44189 14.2637 0.432076 13.7913 0.474829 13.442C0.517004 13.0979 0.63787 12.668 0.767151 12.1993L1.09349 11.0162C1.23585 10.5001 1.33478 10.1194 1.53356 9.77785C1.73246 9.43633 2.01487 9.1626 2.39352 8.78395L9.04463 2.13284C9.38622 1.79126 9.64908 1.52017 9.94073 1.34942Z" fill="currentColor" />
+                    <path d="M15.5427 14.8398H7.5522L8.96704 13.425H15.5427V14.8398Z" fill="currentColor" />
+                  </svg>
+                </button>
               </div>
               <span class="chat-message__time chat-message__time--user">{{ formatMessageTime(message.createdAt) }}</span>
             </div>
@@ -1226,6 +1742,51 @@ onBeforeUnmount(() => {
                 </div>
               </div>
 
+              <div v-if="message.content" class="chat-message__action-bar">
+                <button
+                  type="button"
+                  class="chat-message__icon-button"
+                  aria-label="复制回复"
+                  @click="copyText(message.content, '已复制回复内容')"
+                >
+                  <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+                    <path d="M6.14923 4.02032C7.11191 4.02032 7.87977 4.02017 8.49591 4.07599C9.12122 4.1327 9.65786 4.25188 10.1414 4.53107C10.7201 4.8653 11.2008 5.34591 11.535 5.92462C11.8142 6.40818 11.9333 6.94482 11.9901 7.57013C12.0459 8.18625 12.0457 8.9542 12.0457 9.91681C12.0457 10.8795 12.0459 11.6474 11.9901 12.2635C11.9333 12.8888 11.8142 13.4254 11.535 13.909C11.2008 14.4877 10.7201 14.9683 10.1414 15.3026C9.65786 15.5817 9.12122 15.7009 8.49591 15.7576C7.87977 15.8134 7.1119 15.8133 6.14923 15.8133C5.18661 15.8133 4.41868 15.8134 3.80255 15.7576C3.17724 15.7009 2.6406 15.5817 2.15704 15.3026C1.57834 14.9684 1.09772 14.4877 0.763489 13.909C0.484305 13.4254 0.365123 12.8888 0.308411 12.2635C0.252587 11.6474 0.252747 10.8795 0.252747 9.91681C0.252747 8.95419 0.252603 8.18625 0.308411 7.57013C0.365123 6.94482 0.484305 6.40818 0.763489 5.92462C1.09771 5.3459 1.57833 4.86529 2.15704 4.53107C2.6406 4.25188 3.17724 4.1327 3.80255 4.07599C4.41868 4.02018 5.1866 4.02032 6.14923 4.02032Z" fill="currentColor" />
+                    <path d="M9.80157 0.367981C10.7637 0.367981 11.5313 0.367886 12.1473 0.423645C12.7725 0.480313 13.3093 0.598765 13.7928 0.877747C14.3716 1.21192 14.852 1.69355 15.1863 2.27228C15.4655 2.75575 15.5857 3.29165 15.6424 3.91681C15.6982 4.53301 15.6971 5.30161 15.6971 6.26447V7.8299C15.6971 8.29265 15.6989 8.58994 15.6649 8.84845C15.4667 10.3525 14.4009 11.5738 12.9832 11.9988V10.5467C13.6973 10.1903 14.2104 9.49662 14.3192 8.67169C14.3387 8.52348 14.3406 8.3358 14.3406 7.8299V6.26447C14.3406 5.27707 14.3398 4.58149 14.2908 4.04083C14.2427 3.50969 14.1526 3.19373 14.0125 2.95099C13.7974 2.5785 13.4875 2.2687 13.1151 2.05353C12.8723 1.91347 12.5563 1.82237 12.0252 1.77423C11.4846 1.72528 10.7888 1.7254 9.80157 1.7254H7.71466C6.75614 1.72559 5.92659 2.27697 5.52325 3.07892H4.07013C4.54215 1.51132 5.99314 0.368192 7.71466 0.367981H9.80157Z" fill="currentColor" />
+                  </svg>
+                </button>
+                <button
+                  type="button"
+                  class="chat-message__icon-button"
+                  aria-label="重新生成"
+                  :disabled="chatLoading"
+                  @click="regenerateAssistantMessage(message)"
+                >
+                  <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+                    <path d="M7.92139 0.349152C10.3744 0.349234 12.5564 1.5052 13.9558 3.29894L15.1281 2.12759C15.3304 1.92546 15.6767 2.06943 15.6767 2.35538V5.53923C15.6766 5.71626 15.5329 5.85976 15.3559 5.86002H12.171C11.8855 5.8597 11.7426 5.51465 11.9443 5.31249L12.9641 4.29056C11.8237 2.74305 9.98911 1.74106 7.92139 1.74097C4.46439 1.74097 1.66236 4.543 1.66236 8C1.66236 11.457 4.46439 14.259 7.92139 14.259C11.3783 14.2589 14.1804 11.4569 14.1804 8H15.5722C15.5722 12.2251 12.1465 15.6507 7.92139 15.6508C3.69617 15.6508 0.270538 12.2252 0.270538 8C0.270538 3.77478 3.69617 0.349152 7.92139 0.349152Z" fill="currentColor" />
+                  </svg>
+                </button>
+                <div v-if="shouldShowRetryPager(message)" class="chat-message__retry-pager">
+                  <button
+                    type="button"
+                    class="chat-message__retry-pager-button"
+                    :disabled="resolveRetryPageGroup(message)?.visibleIndex === 1"
+                    @click="changeRetryPage(message, -1)"
+                  >
+                    &lt;
+                  </button>
+                  <span class="chat-message__retry-pager-label">
+                    {{ resolveRetryPageGroup(message)?.visibleIndex }}/{{ resolveRetryPageGroup(message)?.total }}
+                  </span>
+                  <button
+                    type="button"
+                    class="chat-message__retry-pager-button"
+                    :disabled="resolveRetryPageGroup(message)?.visibleIndex === resolveRetryPageGroup(message)?.total"
+                    @click="changeRetryPage(message, 1)"
+                  >
+                    &gt;
+                  </button>
+                </div>
+              </div>
               <span class="chat-message__time">{{ formatMessageTime(message.createdAt) }}</span>
             </div>
           </article>
@@ -1251,7 +1812,7 @@ onBeforeUnmount(() => {
                   <textarea
                     v-model="messageInput"
                     class="_27c9245 ds-scroll-area ds-scroll-area--show-on-focus-within d96f2d2a"
-                    placeholder="给 DeepSeek 发送消息 "
+                    placeholder="输入消息，按 Enter 发送"
                     rows="2"
                     @keydown.enter.exact.prevent="sendMessage()"
                   />
@@ -1807,6 +2368,139 @@ onBeforeUnmount(() => {
   padding-right: 10px;
 }
 
+.chat-message__action-bar {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  flex-wrap: wrap;
+}
+
+.chat-message__action-bar--user {
+  justify-content: flex-end;
+}
+
+.chat-message__icon-button {
+  width: 30px;
+  height: 30px;
+  border: none;
+  border-radius: 999px;
+  background: transparent;
+  color: #7b8798;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  cursor: pointer;
+  transition: background-color 0.15s ease, color 0.15s ease;
+}
+
+.chat-message__icon-button:hover {
+  background: rgba(79, 118, 234, 0.1);
+  color: #3f69d3;
+}
+
+.chat-message__icon-button:disabled {
+  opacity: 0.38;
+  cursor: not-allowed;
+}
+
+.chat-message__retry-pager {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  margin-left: 2px;
+}
+
+.chat-message__retry-pager-button {
+  width: 24px;
+  height: 24px;
+  border: none;
+  border-radius: 999px;
+  background: transparent;
+  color: #7b8798;
+  font-size: 14px;
+  line-height: 1;
+  cursor: pointer;
+  transition: background-color 0.15s ease, color 0.15s ease;
+}
+
+.chat-message__retry-pager-button:hover {
+  background: rgba(79, 118, 234, 0.1);
+  color: #3f69d3;
+}
+
+.chat-message__retry-pager-button:disabled {
+  opacity: 0.3;
+  cursor: not-allowed;
+}
+
+.chat-message__retry-pager-label {
+  min-width: 34px;
+  text-align: center;
+  color: #6f7b8e;
+  font-size: 12px;
+  font-weight: 600;
+}
+
+.chat-message__editor {
+  width: min(100%, 640px);
+  border: 1px solid rgba(77, 107, 254, 0.22);
+  border-radius: 22px;
+  background: #ffffff;
+  box-shadow: 0 10px 22px rgba(15, 23, 42, 0.05);
+}
+
+.chat-message__editor-textarea {
+  width: 100%;
+  min-height: 82px;
+  border: none;
+  outline: none;
+  resize: vertical;
+  background: transparent;
+  padding: 12px 16px 0;
+  font: inherit;
+  font-size: 16px;
+  line-height: 24px;
+  color: #1f2b3f;
+  box-sizing: border-box;
+}
+
+.chat-message__editor-actions {
+  display: flex;
+  justify-content: flex-end;
+  gap: 12px;
+  padding: 6px 10px 10px;
+}
+
+.chat-message__editor-button {
+  height: 32px;
+  padding: 0 14px;
+  border-radius: 12px;
+  border: 1px solid transparent;
+  font-size: 13px;
+  line-height: 20px;
+  cursor: pointer;
+  transition: border-color 0.15s ease, background-color 0.15s ease, color 0.15s ease;
+}
+
+.chat-message__editor-button--ghost {
+  border-color: rgba(15, 23, 42, 0.12);
+  background: #ffffff;
+  color: #475569;
+}
+
+.chat-message__editor-button--ghost:hover {
+  background: #f8fafc;
+}
+
+.chat-message__editor-button--primary {
+  background: #235ff1;
+  color: #ffffff;
+}
+
+.chat-message__editor-button--primary:hover {
+  background: #1b53d7;
+}
+
 .chat-message__reasoning {
   padding: 2px 0 0;
   border: none;
diff --git a/frontend/src/types/dashboard.ts b/frontend/src/types/dashboard.ts
index e7c926e..d965f1c 100644
--- a/frontend/src/types/dashboard.ts
+++ b/frontend/src/types/dashboard.ts
@@ -88,6 +88,9 @@ export interface AssistantMessage {
   content: string
   createdAt: string
   reasoning?: string
+  retryGroupId?: string
+  retryIndex?: number
+  retryTotal?: number
 }
 
 export interface ChatStreamRequest {
diff --git a/openapi.yaml b/openapi.yaml
index 1e08cfb..da5d09a 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -1,26 +1,74 @@
 openapi: 3.0.3
 info:
-  title: SmartFlow Agent Query API
-  version: 0.8.0
-  description: Agent 会话列表与聊天历史查询接口。
+  title: SmartFlow Agent History API
+  version: 0.8.1
+  description: Latest conversation history API spec, including persisted reasoning content.
+
 servers:
   - url: http://localhost:8080
+
 paths:
+  /api/v1/agent/chat:
+    post:
+      tags:
+        - Agent
+      summary: Stream chat reply
+      description: |
+        Starts an Agent chat request.
+        Retry uses the same endpoint and passes retry metadata in `extra`.
+      security:
+        - BearerAuth: []
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AgentChatRequest'
+            examples:
+              normal:
+                value:
+                  conversation_id: 8df59142-29a2-4bf6-85b6-c5e3f4e9cb89
+                  message: Help me review tomorrow's tasks.
+                  model: worker
+                  thinking: true
+                  extra: {}
+              retry:
+                value:
+                  conversation_id: 8df59142-29a2-4bf6-85b6-c5e3f4e9cb89
+                  message: Help me review tomorrow's tasks.
+                  model: worker
+                  thinking: true
+                  extra:
+                    request_mode: retry
+                    retry_group_id: retry-group-001
+                    retry_from_user_message_id: 123
+                    retry_from_assistant_message_id: 456
+      responses:
+        '200':
+          description: SSE stream response
+        '400':
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorResponse'
+
   /api/v1/agent/conversation-list:
     get:
       tags:
         - Agent
-      summary: 获取 Agent 会话列表
-      description: |
-        获取当前登录用户的 Agent 会话列表。
-        支持历史分页参数 `page/page_size`，并兼容懒加载参数 `limit`。
-        当同时传入 `page_size` 与 `limit` 时，以 `limit` 为准。
+      summary: Get conversation list
       security:
         - BearerAuth: []
       parameters:
         - name: page
           in: query
-          description: 页码，默认 1。
           required: false
           schema:
             type: integer
@@ -28,7 +76,6 @@ paths:
             example: 1
         - name: page_size
           in: query
-          description: 历史分页页大小，默认 20。
           required: false
           schema:
             type: integer
@@ -37,8 +84,8 @@ paths:
             example: 20
         - name: limit
           in: query
-          description: 懒加载条数，作为 `page_size` 的别名使用。
           required: false
+          description: Alias of page_size.
           schema:
             type: integer
             minimum: 1
@@ -46,67 +93,47 @@ paths:
             example: 20
         - name: status
           in: query
-          description: 会话状态过滤，仅支持 `active` 或 `archived`。
           required: false
           schema:
             type: string
-            enum:
-              - active
-              - archived
+            enum: [active, archived]
             example: active
       responses:
         '200':
-          description: 查询成功
+          description: Success
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ConversationListEnvelope'
-              examples:
-                success:
-                  value:
-                    status: "10000"
-                    info: success
-                    data:
-                      list:
-                        - conversation_id: 8df59142-29a2-4bf6-85b6-c5e3f4e9cb89
-                          title: 明天课程与任务安排
-                          has_title: true
-                          message_count: 14
-                          last_message_at: "2026-03-24T22:18:45+08:00"
-                          status: active
-                          created_at: "2026-03-24T20:01:12+08:00"
-                      page: 1
-                      page_size: 20
-                      limit: 20
-                      total: 1
-                      has_more: false
         '400':
-          description: 参数错误或会话状态非法
+          description: Invalid request
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorResponse'
         '401':
-          description: 未登录或 token 无效
+          description: Unauthorized
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorResponse'
+
   /api/v1/agent/conversation-history:
     get:
       tags:
         - Agent
-      summary: 获取 Agent 会话聊天历史
+      summary: Get conversation history
       description: |
-        获取指定会话的聊天历史。
-        服务端会先校验会话是否属于当前用户，再优先读取 Redis，
-        Redis 未命中时回源数据库，并把结果回填到 Redis。
+        Returns chat history for a conversation.
+        The service validates ownership first, then prefers Redis and falls back to MySQL.
+        `reasoning_content` is now returned from both cache hits and DB-backed history when available.
+        `reasoning_duration_seconds` is server-computed and persisted in seconds.
+        Retry groups are returned via `retry_group_id`, `retry_index`, and `retry_total`.
       security:
         - BearerAuth: []
       parameters:
         - name: conversation_id
           in: query
-          description: 会话 ID。
           required: true
           schema:
             type: string
@@ -114,7 +141,7 @@ paths:
             example: 8df59142-29a2-4bf6-85b6-c5e3f4e9cb89
       responses:
         '200':
-          description: 查询成功
+          description: Success
           content:
             application/json:
               schema:
@@ -127,31 +154,85 @@ paths:
                     data:
                       - id: 101
                         role: user
-                        content: 帮我看下明天最重要的任务
+                        content: Help me review tomorrow's most important task.
                         created_at: "2026-03-24T22:15:01+08:00"
+                        reasoning_content: ""
+                        reasoning_duration_seconds: 0
+                        retry_group_id: retry-group-001
+                        retry_index: 1
+                        retry_total: 2
                       - id: 102
                         role: assistant
-                        content: 明天优先处理数据库联调和接口验收。
+                        content: Tomorrow you should prioritize the DB integration and API validation.
                         created_at: "2026-03-24T22:15:06+08:00"
+                        reasoning_content: "Stage: request.accepted\nUser request accepted.\n\nStage: task_query.reflecting\nComparing the retrieved tasks and selecting the tightest deadlines."
+                        reasoning_duration_seconds: 5
+                        retry_group_id: retry-group-001
+                        retry_index: 2
+                        retry_total: 2
         '400':
-          description: 缺少参数或会话不存在
+          description: Missing parameter or conversation not found
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorResponse'
         '401':
-          description: 未登录或 token 无效
+          description: Unauthorized
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorResponse'
+
 components:
   securitySchemes:
     BearerAuth:
       type: http
       scheme: bearer
       bearerFormat: JWT
+
   schemas:
+    AgentChatRequest:
+      type: object
+      required:
+        - message
+      properties:
+        conversation_id:
+          type: string
+          format: uuid
+          nullable: true
+        message:
+          type: string
+        model:
+          type: string
+          nullable: true
+          example: worker
+        thinking:
+          type: boolean
+          example: true
+        extra:
+          $ref: '#/components/schemas/AgentChatExtra'
+
+    AgentChatExtra:
+      type: object
+      additionalProperties: true
+      properties:
+        request_mode:
+          type: string
+          description: Use `retry` when regenerating a previous answer version.
+          example: retry
+        retry_group_id:
+          type: string
+          nullable: true
+          example: retry-group-001
+        retry_from_user_message_id:
+          type: integer
+          nullable: true
+          example: 123
+        retry_from_assistant_message_id:
+          type: integer
+          nullable: true
+          example: 456
+
     ErrorResponse:
       type: object
       required:
@@ -164,6 +245,7 @@ components:
         info:
           type: string
           example: wrong param type
+
     ConversationListEnvelope:
       type: object
       required:
@@ -179,6 +261,7 @@ components:
           example: success
         data:
           $ref: '#/components/schemas/ConversationListResponse'
+
     ConversationListResponse:
       type: object
       required:
@@ -201,7 +284,6 @@ components:
           example: 20
         limit:
           type: integer
-          description: 当前实际使用的懒加载条数。
           example: 20
         total:
           type: integer
@@ -210,6 +292,7 @@ components:
         has_more:
           type: boolean
           example: true
+
     ConversationListItem:
       type: object
       required:
@@ -224,7 +307,7 @@ components:
           format: uuid
         title:
           type: string
-          example: 明天课程与任务安排
+          example: Tomorrow task review
         has_title:
           type: boolean
           example: true
@@ -242,6 +325,7 @@ components:
           type: string
           format: date-time
           nullable: true
+
     ConversationHistoryEnvelope:
       type: object
       required:
@@ -259,6 +343,7 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/ConversationHistoryItem'
+
     ConversationHistoryItem:
       type: object
       required:
@@ -267,23 +352,39 @@ components:
       properties:
         id:
           type: integer
-          description: 数据库主键；命中 Redis 时可能为空。
+          description: Database primary key. May be omitted on pure cache hits.
           example: 102
         role:
           type: string
-          enum:
-            - user
-            - assistant
-            - system
+          enum: [user, assistant, system]
           example: assistant
         content:
           type: string
-          example: 明天优先处理数据库联调和接口验收。
+          example: Tomorrow you should prioritize the DB integration and API validation.
         created_at:
           type: string
           format: date-time
           nullable: true
         reasoning_content:
           type: string
-          description: 推理内容；命中数据库历史时通常为空。
-          example: ""
+          description: Aggregated reasoning text, including streamed model reasoning or stage-level thinking text.
+          example: "Stage: request.accepted\nUser request accepted."
+        reasoning_duration_seconds:
+          type: integer
+          description: Server-computed reasoning duration in seconds.
+          example: 5
+        retry_group_id:
+          type: string
+          nullable: true
+          description: Retry group identifier shared by the original answer and all retry versions.
+          example: retry-group-001
+        retry_index:
+          type: integer
+          nullable: true
+          description: 1-based version index within the retry group.
+          example: 2
+        retry_total:
+          type: integer
+          nullable: true
+          description: Total number of versions currently in the retry group.
+          example: 2