Version: 0.9.53.dev.260429
后端: 1. 流式思考链路从 raw reasoning_content 切到 `thinking_summary` 摘要协议,补齐摘要 prompt、digestor 与 Lite 压缩链路,plan / execute / fallback 统一改为“只出摘要、不透原始推理”,正文开始后自动关停摘要流。 2. thinking_summary 打通 timeline / SSE / outbox 持久化闭环,只落 detail_summary 与必要 metadata,并补强 seq 自检、冲突幂等识别与补 seq 回填,提升重放恢复稳定性。 3. 会话历史口径继续收紧,assistant 正文与时间线不再回写 raw reasoning_content,仅保留正文与思考耗时,避免刷新恢复时再次暴露内部推理文本。 前端: 4. 助手页开始接入 thinking_summary 实时流与历史恢复,补齐短摘要状态、长摘要折叠区、正文开流后自动收口,并增加调试入口用于协议联调与验收。 5. 当前前端助手页仍是残次过渡态,本版先以 thinking_summary 协议接通和基础渲染为主,样式、交互与细节体验暂未收平,下一版集中修复。 仓库: 6. 补充 thinking_summary 对接说明,明确 SSE 协议、timeline 恢复口径与 short/detail summary 的使用边界。
This commit is contained in:
@@ -135,6 +135,9 @@ func Start() {
|
||||
if err = eventsvc.RegisterAgentStateSnapshotHandler(eventBus, outboxRepo, manager); err != nil {
|
||||
log.Fatalf("Failed to register agent state snapshot event handler: %v", err)
|
||||
}
|
||||
if err = eventsvc.RegisterAgentTimelinePersistHandler(eventBus, outboxRepo, agentRepo, cacheRepo); err != nil {
|
||||
log.Fatalf("Failed to register agent timeline persist event handler: %v", err)
|
||||
}
|
||||
if err = eventsvc.RegisterMemoryExtractRequestedHandler(eventBus, outboxRepo, memoryModule); err != nil {
|
||||
log.Fatalf("Failed to register memory extract event handler: %v", err)
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
package model
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// AgentTimelineKind 定义会话时间线事件类型。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 这些类型面向前端渲染,要求语义稳定,不随节点内部实现细节频繁变化;
|
||||
// 2. 文本消息和卡片事件共用一条时间线,前端只按 seq 顺序渲染;
|
||||
// 3. token 统计仍以 chat_histories / agent_chats 为准,时间线只做展示顺序与结构承载。
|
||||
// 3. token 统计仍以 chat_histories / agent_chats 为准,时间线只负责展示顺序与结构承载。
|
||||
const (
|
||||
AgentTimelineKindUserText = "user_text"
|
||||
AgentTimelineKindAssistantText = "assistant_text"
|
||||
@@ -16,6 +19,7 @@ const (
|
||||
AgentTimelineKindConfirmRequest = "confirm_request"
|
||||
AgentTimelineKindBusinessCard = "business_card"
|
||||
AgentTimelineKindScheduleCompleted = "schedule_completed"
|
||||
AgentTimelineKindThinkingSummary = "thinking_summary"
|
||||
)
|
||||
|
||||
// AgentTimelineEvent 表示会话里“可展示事件”的统一持久化记录。
|
||||
@@ -40,6 +44,11 @@ type AgentTimelineEvent struct {
|
||||
func (AgentTimelineEvent) TableName() string { return "agent_timeline_events" }
|
||||
|
||||
// ChatTimelinePersistPayload 定义时间线单条事件落库输入。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只表达一次“写入 agent_timeline_events”的最小字段集合;
|
||||
// 2. Content 面向纯文本类事件,结构化事件更多依赖 PayloadJSON;
|
||||
// 3. thinking_summary 事件要求 PayloadJSON 内只保留 detail_summary 与必要 metadata。
|
||||
type ChatTimelinePersistPayload struct {
|
||||
UserID int `json:"user_id"`
|
||||
ConversationID string `json:"conversation_id"`
|
||||
@@ -51,6 +60,73 @@ type ChatTimelinePersistPayload struct {
|
||||
TokensConsumed int `json:"tokens_consumed"`
|
||||
}
|
||||
|
||||
// Normalize 负责收敛时间线持久化载荷的基础口径。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只做字符串 trim 和非负数兜底;
|
||||
// 2. 不负责 thinking_summary 的业务裁剪;
|
||||
// 3. 返回副本,避免调用方意外修改原对象。
|
||||
func (p ChatTimelinePersistPayload) Normalize() ChatTimelinePersistPayload {
|
||||
p.ConversationID = strings.TrimSpace(p.ConversationID)
|
||||
p.Kind = strings.TrimSpace(p.Kind)
|
||||
p.Role = strings.TrimSpace(p.Role)
|
||||
p.Content = strings.TrimSpace(p.Content)
|
||||
p.PayloadJSON = strings.TrimSpace(p.PayloadJSON)
|
||||
if p.Seq < 0 {
|
||||
p.Seq = 0
|
||||
}
|
||||
if p.TokensConsumed < 0 {
|
||||
p.TokensConsumed = 0
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// HasValidIdentity 判断 payload 是否具备最小可持久化主键语义。
|
||||
func (p ChatTimelinePersistPayload) HasValidIdentity() bool {
|
||||
normalized := p.Normalize()
|
||||
return normalized.UserID > 0 &&
|
||||
normalized.ConversationID != "" &&
|
||||
normalized.Seq > 0 &&
|
||||
normalized.Kind != ""
|
||||
}
|
||||
|
||||
// MatchesStoredEvent 判断 payload 与库中事件是否可视为“同一条业务事件”。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 主要用于 outbox 重放时识别“唯一键冲突但其实已经成功落库”的场景;
|
||||
// 2. 只比较持久化字段,不比较 created_at / id 这类存储侧派生值;
|
||||
// 3. 返回 true 时,上层可以把 seq 冲突视为幂等成功。
|
||||
func (p ChatTimelinePersistPayload) MatchesStoredEvent(event AgentTimelineEvent) bool {
|
||||
normalized := p.Normalize()
|
||||
return event.UserID == normalized.UserID &&
|
||||
strings.TrimSpace(event.ChatID) == normalized.ConversationID &&
|
||||
event.Seq == normalized.Seq &&
|
||||
strings.TrimSpace(event.Kind) == normalized.Kind &&
|
||||
trimTimelinePointerString(event.Role) == normalized.Role &&
|
||||
trimTimelinePointerString(event.Content) == normalized.Content &&
|
||||
trimTimelinePointerString(event.Payload) == normalized.PayloadJSON &&
|
||||
event.TokensConsumed == normalized.TokensConsumed
|
||||
}
|
||||
|
||||
// IsTimelineSeqConflictError 判断 error 是否属于时间线 seq 唯一键冲突。
|
||||
//
|
||||
// 说明:
|
||||
// 1. MySQL / PostgreSQL / SQLite 的重复键报错文案并不完全一致,这里用宽松文本匹配;
|
||||
// 2. 该函数只用于“是否进入幂等/补 seq 分支”的判断,不承担精确错误分类职责;
|
||||
// 3. 若未来统一抽数据库错误码适配层,应优先替换这里而不是继续复制判断逻辑。
|
||||
func IsTimelineSeqConflictError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(err.Error())
|
||||
return strings.Contains(lower, "duplicate entry") ||
|
||||
strings.Contains(lower, "duplicate key") ||
|
||||
strings.Contains(lower, "unique constraint") ||
|
||||
strings.Contains(lower, "unique violation") ||
|
||||
strings.Contains(lower, "error 1062") ||
|
||||
strings.Contains(lower, "uk_timeline_user_chat_seq")
|
||||
}
|
||||
|
||||
// GetConversationTimelineItem 定义前端读取时间线接口的单条返回项。
|
||||
type GetConversationTimelineItem struct {
|
||||
ID int64 `json:"id,omitempty"`
|
||||
@@ -62,3 +138,10 @@ type GetConversationTimelineItem struct {
|
||||
TokensConsumed int `json:"tokens_consumed,omitempty"`
|
||||
CreatedAt *time.Time `json:"created_at,omitempty"`
|
||||
}
|
||||
|
||||
func trimTimelinePointerString(value *string) string {
|
||||
if value == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(*value)
|
||||
}
|
||||
|
||||
@@ -56,6 +56,15 @@ func collectExecuteDecisionFromLLM(
|
||||
parser := newagentrouter.NewStreamDecisionParser()
|
||||
output := &executeDecisionStreamOutput{firstChunk: true}
|
||||
var fullText strings.Builder
|
||||
reasoningDigestor, digestorErr := emitter.NewReasoningDigestor(ctx, executeSpeakBlockID, executeStageName)
|
||||
if digestorErr != nil {
|
||||
return nil, fmt.Errorf("执行 thinking 摘要器初始化失败: %w", digestorErr)
|
||||
}
|
||||
defer func() {
|
||||
if reasoningDigestor != nil {
|
||||
_ = reasoningDigestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
chunk, recvErr := reader.Recv()
|
||||
@@ -68,15 +77,9 @@ func collectExecuteDecisionFromLLM(
|
||||
}
|
||||
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := emitter.EmitReasoningText(
|
||||
executeSpeakBlockID,
|
||||
executeStageName,
|
||||
chunk.ReasoningContent,
|
||||
output.firstChunk,
|
||||
); emitErr != nil {
|
||||
return nil, fmt.Errorf("执行 thinking 推送失败: %w", emitErr)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
output.firstChunk = false
|
||||
}
|
||||
|
||||
content := ""
|
||||
@@ -148,6 +151,9 @@ func collectExecuteDecisionFromLLM(
|
||||
output.decision = decision
|
||||
|
||||
if visible != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(
|
||||
executeSpeakBlockID,
|
||||
executeStageName,
|
||||
@@ -174,9 +180,14 @@ func collectExecuteDecisionFromLLM(
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(chunk2.ReasoningContent) != "" {
|
||||
_ = emitter.EmitReasoningText(executeSpeakBlockID, executeStageName, chunk2.ReasoningContent, false)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk2.ReasoningContent)
|
||||
}
|
||||
}
|
||||
if chunk2.Content != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(
|
||||
executeSpeakBlockID,
|
||||
executeStageName,
|
||||
|
||||
@@ -106,6 +106,15 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
parser := newagentrouter.NewStreamDecisionParser()
|
||||
firstChunk := true
|
||||
speakStreamed := false
|
||||
reasoningDigestor, digestorErr := emitter.NewReasoningDigestor(ctx, planSpeakBlockID, planStageName)
|
||||
if digestorErr != nil {
|
||||
return fmt.Errorf("规划 thinking 摘要器初始化失败: %w", digestorErr)
|
||||
}
|
||||
defer func() {
|
||||
if reasoningDigestor != nil {
|
||||
_ = reasoningDigestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
// 3.1 阶段一:解析决策标签。
|
||||
for {
|
||||
@@ -118,12 +127,11 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
break
|
||||
}
|
||||
|
||||
// thinking 内容独立推流。
|
||||
// thinking 内容只进入摘要器,不再把 raw reasoning_content 透传给前端。
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := emitter.EmitReasoningText(planSpeakBlockID, planStageName, chunk.ReasoningContent, firstChunk); emitErr != nil {
|
||||
return fmt.Errorf("规划 thinking 推送失败: %w", emitErr)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
firstChunk = false
|
||||
}
|
||||
|
||||
content := ""
|
||||
@@ -152,6 +160,9 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
// 3.2 阶段二:流式推送 speak(同一 reader 继续读取)。
|
||||
var fullText strings.Builder
|
||||
if visible != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(planSpeakBlockID, planStageName, visible, firstChunk); emitErr != nil {
|
||||
return fmt.Errorf("规划文案推送失败: %w", emitErr)
|
||||
}
|
||||
@@ -172,9 +183,14 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(chunk2.ReasoningContent) != "" {
|
||||
_ = emitter.EmitReasoningText(planSpeakBlockID, planStageName, chunk2.ReasoningContent, false)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk2.ReasoningContent)
|
||||
}
|
||||
}
|
||||
if chunk2.Content != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(planSpeakBlockID, planStageName, chunk2.Content, firstChunk); emitErr != nil {
|
||||
return fmt.Errorf("规划文案推送失败: %w", emitErr)
|
||||
}
|
||||
|
||||
128
backend/newAgent/prompt/reasoning_summary.go
Normal file
128
backend/newAgent/prompt/reasoning_summary.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package newagentprompt
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
reasoningSummaryMaxFullRunes = 6000
|
||||
reasoningSummaryMaxDeltaRunes = 1800
|
||||
)
|
||||
|
||||
// ReasoningSummaryPromptInput 描述一次“思考摘要”模型调用所需的最小输入。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只承载摘要模型需要看的文本与运行态,不绑定 stream 包的 DTO,避免 prompt 层反向依赖输出协议;
|
||||
// 2. FullReasoning 会在构造 prompt 时只保留尾部,避免长时间思考把便宜模型上下文撑爆;
|
||||
// 3. PreviousSummary 只作为连续摘要的参考,不要求模型逐字继承。
|
||||
type ReasoningSummaryPromptInput struct {
|
||||
FullReasoning string
|
||||
DeltaReasoning string
|
||||
PreviousSummary string
|
||||
CandidateSeq int
|
||||
Final bool
|
||||
DurationSeconds float64
|
||||
}
|
||||
|
||||
type reasoningSummaryPromptPayload struct {
|
||||
CandidateSeq int `json:"candidate_seq"`
|
||||
Final bool `json:"final"`
|
||||
DurationSeconds float64 `json:"duration_seconds"`
|
||||
PreviousSummary string `json:"previous_summary,omitempty"`
|
||||
RecentReasoning string `json:"recent_reasoning,omitempty"`
|
||||
DeltaReasoning string `json:"delta_reasoning,omitempty"`
|
||||
SourceTextRunes int `json:"source_text_runes,omitempty"`
|
||||
MaxDetailSummaryRunes int `json:"max_detail_summary_runes,omitempty"`
|
||||
}
|
||||
|
||||
// BuildReasoningSummaryMessages 构造思考摘要模型调用的 messages。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. system prompt 明确“只做用户可见摘要”,禁止复述原始思考链和内部推理细节;
|
||||
// 2. user prompt 使用 JSON 承载输入,便于后续扩展字段且减少模型误读;
|
||||
// 3. 长文本只保留尾部窗口,保证异步摘要请求稳定、便宜、可控。
|
||||
func BuildReasoningSummaryMessages(input ReasoningSummaryPromptInput) []*schema.Message {
|
||||
recentReasoning := trimRunesFromEnd(input.FullReasoning, reasoningSummaryMaxFullRunes)
|
||||
deltaReasoning := trimRunesFromEnd(input.DeltaReasoning, reasoningSummaryMaxDeltaRunes)
|
||||
payload := reasoningSummaryPromptPayload{
|
||||
CandidateSeq: input.CandidateSeq,
|
||||
Final: input.Final,
|
||||
DurationSeconds: input.DurationSeconds,
|
||||
PreviousSummary: strings.TrimSpace(input.PreviousSummary),
|
||||
RecentReasoning: recentReasoning,
|
||||
DeltaReasoning: deltaReasoning,
|
||||
SourceTextRunes: reasoningSummarySourceRunes(recentReasoning, deltaReasoning),
|
||||
MaxDetailSummaryRunes: ReasoningSummaryDetailRuneLimit(input.FullReasoning, input.DeltaReasoning),
|
||||
}
|
||||
|
||||
raw, err := json.MarshalIndent(payload, "", " ")
|
||||
if err != nil {
|
||||
raw = []byte(fmt.Sprintf(`{"recent_reasoning":%q}`, trimRunesFromEnd(input.FullReasoning, reasoningSummaryMaxFullRunes)))
|
||||
}
|
||||
|
||||
return []*schema.Message{
|
||||
schema.SystemMessage(buildReasoningSummarySystemPrompt()),
|
||||
schema.UserMessage("请把下面的模型思考内容整理成用户可见的进度摘要。\n输入:\n" + string(raw)),
|
||||
}
|
||||
}
|
||||
|
||||
func buildReasoningSummarySystemPrompt() string {
|
||||
return strings.TrimSpace(`你是 SmartMate 的“思考摘要器”。你的任务是把模型内部 reasoning 整理成用户可见的进度摘要。
|
||||
|
||||
输出必须是严格 JSON 对象:
|
||||
{
|
||||
"short_summary": "8到18个汉字的短摘要",
|
||||
"detail_summary": "不超过 max_detail_summary_runes 个字的展开摘要"
|
||||
}
|
||||
|
||||
规则:
|
||||
1. 只描述“正在做什么”和“目前推进到哪一步”,不要复述、引用或暴露原始思考链。
|
||||
2. 不输出 markdown,不输出代码块,不解释 JSON 以外的内容。
|
||||
3. short_summary 要短、稳定、适合前端几秒刷新一次。
|
||||
4. detail_summary 不按固定句数限制,而按输入长度控制:字数必须小于等于 max_detail_summary_runes;不需要凑满上限,信息密度优先。
|
||||
5. detail_summary 仍然面向用户,不写内部推理细节、隐含假设链、逐步演算。
|
||||
6. 若输入为空或噪声较多,用保守摘要,例如“正在整理思路”“正在核对可用信息”。
|
||||
7. final=true 时,detail_summary 用完成态语气,说明思考已收拢到下一步答复或动作。`)
|
||||
}
|
||||
|
||||
// ReasoningSummaryDetailRuneLimit 返回 detail_summary 的最大字数。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 与 BuildReasoningSummaryMessages 使用同一套输入窗口,避免 prompt 提示和服务端兜底口径不一致;
|
||||
// 2. 上限取“提供给摘要模型的主要文本段”的一半,并向上取整,适配极短文本;
|
||||
// 3. 返回 0 表示没有有效输入文本,调用方不应做硬裁剪。
|
||||
func ReasoningSummaryDetailRuneLimit(fullReasoning, deltaReasoning string) int {
|
||||
recentReasoning := trimRunesFromEnd(fullReasoning, reasoningSummaryMaxFullRunes)
|
||||
delta := trimRunesFromEnd(deltaReasoning, reasoningSummaryMaxDeltaRunes)
|
||||
sourceRunes := reasoningSummarySourceRunes(recentReasoning, delta)
|
||||
if sourceRunes <= 0 {
|
||||
return 0
|
||||
}
|
||||
return (sourceRunes + 1) / 2
|
||||
}
|
||||
|
||||
func reasoningSummarySourceRunes(recentReasoning, deltaReasoning string) int {
|
||||
recentReasoning = strings.TrimSpace(recentReasoning)
|
||||
if recentReasoning != "" {
|
||||
return utf8.RuneCountInString(recentReasoning)
|
||||
}
|
||||
return utf8.RuneCountInString(strings.TrimSpace(deltaReasoning))
|
||||
}
|
||||
|
||||
func trimRunesFromEnd(text string, maxRunes int) string {
|
||||
text = strings.TrimSpace(text)
|
||||
if text == "" || maxRunes <= 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
runes := []rune(text)
|
||||
if len(runes) <= maxRunes {
|
||||
return text
|
||||
}
|
||||
return string(runes[len(runes)-maxRunes:])
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
@@ -62,6 +63,18 @@ type ChunkEmitter struct {
|
||||
RequestID string
|
||||
ModelName string
|
||||
Created int64
|
||||
|
||||
// thinkingGateMu 是“正文门卫”的轻量保护。
|
||||
// 1. 它只保护 thinking_summary 是否还能发,不串行化全部 SSE;
|
||||
// 2. 正文一旦开始,对应 block 的门会被关闭,后续同 block 摘要直接丢弃;
|
||||
// 3. 这样既避免摘要 goroutine 在正文之后补发旧思考,又不误杀后续节点的新一轮思考。
|
||||
thinkingGateMu sync.Mutex
|
||||
thinkingClosedBlocks map[string]bool
|
||||
// reasoningSummaryFunc 用于把原始 reasoning 压成用户可见摘要。
|
||||
// 1. 该函数由 service 层注入,stream 包只负责调度,不负责选择模型;
|
||||
// 2. 未注入时模型 reasoning 只会被静默丢弃,不再回退成 raw reasoning_content;
|
||||
// 3. 正文一旦开始,ReasoningDigestor 和 ChunkEmitter 会同时关门,迟到结果不会再发给前端。
|
||||
reasoningSummaryFunc ReasoningSummaryFunc
|
||||
// extraEventHook 用于把关键结构化事件同步给上层做持久化。
|
||||
// 1. hook 失败不能影响 SSE 主链路;
|
||||
// 2. hook 只接收 extra 结构,避免 emitter 反向依赖业务层;
|
||||
@@ -122,6 +135,40 @@ func (e *ChunkEmitter) SetExtraEventHook(hook func(extra *OpenAIChunkExtra)) {
|
||||
e.extraEventHook = hook
|
||||
}
|
||||
|
||||
// SetReasoningSummaryFunc 设置 reasoning 摘要模型调用函数。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 这里只保存函数引用,不立即调用模型;
|
||||
// 2. 摘要触发频率、单飞、正文闸门由 ReasoningDigestor 负责;
|
||||
// 3. 传 nil 表示关闭摘要能力,后续 reasoning chunk 会被静默丢弃。
|
||||
func (e *ChunkEmitter) SetReasoningSummaryFunc(fn ReasoningSummaryFunc) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
e.reasoningSummaryFunc = fn
|
||||
}
|
||||
|
||||
// NewReasoningDigestor 为当前 block 创建一个 reasoning 摘要器。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 若未注入摘要函数,返回 nil,调用方只需跳过 raw reasoning 推送;
|
||||
// 2. 摘要结果先经过 ChunkEmitter 的正文门卫,再走统一 extra/hook 链路;
|
||||
// 3. Digestor 自身仍负责单飞、水位线和正文开始后的 in-flight 结果丢弃。
|
||||
func (e *ChunkEmitter) NewReasoningDigestor(ctx context.Context, blockID, stage string) (*ReasoningDigestor, error) {
|
||||
if e == nil || e.reasoningSummaryFunc == nil {
|
||||
return nil, nil
|
||||
}
|
||||
e.openThinkingSummaryGate(blockID, stage)
|
||||
return NewReasoningDigestor(ReasoningDigestorOptions{
|
||||
SummaryFunc: e.reasoningSummaryFunc,
|
||||
SummarySink: func(summary StreamThinkingSummaryExtra) {
|
||||
_ = e.EmitThinkingSummary(blockID, stage, summary)
|
||||
},
|
||||
BaseContext: ctx,
|
||||
SummaryTimeout: 8 * time.Second,
|
||||
})
|
||||
}
|
||||
|
||||
// EmitReasoningText 输出一段 reasoning 文字,并附带 reasoning_text extra。
|
||||
func (e *ChunkEmitter) EmitReasoningText(blockID, stage, text string, includeRole bool) error {
|
||||
if e == nil || e.emit == nil {
|
||||
@@ -160,6 +207,7 @@ func (e *ChunkEmitter) EmitAssistantText(blockID, stage, text string, includeRol
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
e.closeThinkingSummaryGate(blockID, stage)
|
||||
|
||||
payload, err := ToOpenAIAssistantChunkWithExtra(
|
||||
e.RequestID,
|
||||
@@ -178,6 +226,66 @@ func (e *ChunkEmitter) EmitAssistantText(blockID, stage, text string, includeRol
|
||||
return e.emit(payload)
|
||||
}
|
||||
|
||||
// EmitThinkingSummary 输出一次“流式思考摘要”事件。
|
||||
//
|
||||
// 协议约束:
|
||||
// 1. 该事件只走 extra.thinking_summary,不回写 delta.content / delta.reasoning_content;
|
||||
// 2. 仍复用现有 extra hook,让上层在不依赖 emitter 细节的前提下同步持久化;
|
||||
// 3. includeRole 不再需要,因为 thinking_summary 本身就是纯结构化事件。
|
||||
func (e *ChunkEmitter) EmitThinkingSummary(blockID, stage string, summary StreamThinkingSummaryExtra) error {
|
||||
if e == nil || e.emit == nil {
|
||||
return nil
|
||||
}
|
||||
if e.isThinkingSummaryGateClosed(blockID, stage) {
|
||||
return nil
|
||||
}
|
||||
return e.emitExtraOnly(NewThinkingSummaryExtra(blockID, stage, summary))
|
||||
}
|
||||
|
||||
func (e *ChunkEmitter) openThinkingSummaryGate(blockID, stage string) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
e.thinkingGateMu.Lock()
|
||||
if e.thinkingClosedBlocks != nil {
|
||||
delete(e.thinkingClosedBlocks, thinkingSummaryGateKey(blockID, stage))
|
||||
}
|
||||
e.thinkingGateMu.Unlock()
|
||||
}
|
||||
|
||||
func (e *ChunkEmitter) closeThinkingSummaryGate(blockID, stage string) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
e.thinkingGateMu.Lock()
|
||||
if e.thinkingClosedBlocks == nil {
|
||||
e.thinkingClosedBlocks = make(map[string]bool)
|
||||
}
|
||||
e.thinkingClosedBlocks[thinkingSummaryGateKey(blockID, stage)] = true
|
||||
e.thinkingGateMu.Unlock()
|
||||
}
|
||||
|
||||
func (e *ChunkEmitter) isThinkingSummaryGateClosed(blockID, stage string) bool {
|
||||
if e == nil {
|
||||
return true
|
||||
}
|
||||
e.thinkingGateMu.Lock()
|
||||
defer e.thinkingGateMu.Unlock()
|
||||
return e.thinkingClosedBlocks[thinkingSummaryGateKey(blockID, stage)]
|
||||
}
|
||||
|
||||
func thinkingSummaryGateKey(blockID, stage string) string {
|
||||
blockID = strings.TrimSpace(blockID)
|
||||
stage = strings.TrimSpace(stage)
|
||||
if blockID != "" {
|
||||
return blockID
|
||||
}
|
||||
if stage != "" {
|
||||
return stage
|
||||
}
|
||||
return "__default__"
|
||||
}
|
||||
|
||||
// EmitPseudoReasoningText 把整段 reasoning 文本按伪流式方式逐块推出。
|
||||
func (e *ChunkEmitter) EmitPseudoReasoningText(ctx context.Context, blockID, stage, text string, options PseudoStreamOptions) error {
|
||||
return e.emitPseudoText(
|
||||
@@ -304,6 +412,9 @@ func (e *ChunkEmitter) EmitConfirmRequest(ctx context.Context, blockID, stage, i
|
||||
text := buildConfirmAssistantText(title, summary)
|
||||
extra := NewConfirmRequestExtra(blockID, stage, interactionID, title, summary)
|
||||
e.emitExtraEventHook(extra)
|
||||
if strings.TrimSpace(text) != "" {
|
||||
e.closeThinkingSummaryGate(blockID, stage)
|
||||
}
|
||||
return e.emitPseudoText(
|
||||
ctx,
|
||||
text,
|
||||
@@ -341,6 +452,9 @@ func (e *ChunkEmitter) EmitInterruptMessage(ctx context.Context, blockID, stage,
|
||||
|
||||
text := buildInterruptAssistantText(interactionType, summary)
|
||||
extra := NewInterruptExtra(blockID, stage, interactionID, interactionType, summary)
|
||||
if strings.TrimSpace(text) != "" {
|
||||
e.closeThinkingSummaryGate(blockID, stage)
|
||||
}
|
||||
return e.emitPseudoText(
|
||||
ctx,
|
||||
text,
|
||||
@@ -435,6 +549,15 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
|
||||
var fullText strings.Builder
|
||||
firstChunk := true
|
||||
digestor, digestorErr := e.NewReasoningDigestor(ctx, blockID, stage)
|
||||
if digestorErr != nil {
|
||||
return "", digestorErr
|
||||
}
|
||||
defer func() {
|
||||
if digestor != nil {
|
||||
_ = digestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
chunk, err := reader.Recv()
|
||||
@@ -445,16 +568,19 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
return fullText.String(), err
|
||||
}
|
||||
|
||||
// 推送 reasoning content。
|
||||
// 1. reasoning content 只喂给摘要器,不再透传给前端。
|
||||
// 2. 未注入摘要能力时直接丢弃,避免 raw reasoning_content 泄漏到 SSE。
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := e.EmitReasoningText(blockID, stage, chunk.ReasoningContent, firstChunk); emitErr != nil {
|
||||
return fullText.String(), emitErr
|
||||
if digestor != nil {
|
||||
digestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
firstChunk = false
|
||||
}
|
||||
|
||||
// 推送 assistant 正文。
|
||||
if chunk != nil && chunk.Content != "" {
|
||||
if digestor != nil {
|
||||
digestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := e.EmitAssistantText(blockID, stage, chunk.Content, firstChunk); emitErr != nil {
|
||||
return fullText.String(), emitErr
|
||||
}
|
||||
@@ -466,9 +592,9 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
return fullText.String(), nil
|
||||
}
|
||||
|
||||
// EmitStreamReasoningText 从 StreamReader 逐 chunk 读取并实时推送 reasoning 文字。
|
||||
// EmitStreamReasoningText 从 StreamReader 逐 chunk 读取 reasoning,并转成低频 thinking_summary。
|
||||
//
|
||||
// 与 EmitStreamAssistantText 结构相同,但只推送 ReasoningContent,不推送 Content。
|
||||
// 与 EmitStreamAssistantText 结构相同,但不再输出 raw ReasoningContent。
|
||||
// 用于只需展示思考过程而无需展示正文的场景。
|
||||
func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
ctx context.Context,
|
||||
@@ -480,7 +606,15 @@ func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
}
|
||||
|
||||
var fullText strings.Builder
|
||||
firstChunk := true
|
||||
digestor, digestorErr := e.NewReasoningDigestor(ctx, blockID, stage)
|
||||
if digestorErr != nil {
|
||||
return "", digestorErr
|
||||
}
|
||||
defer func() {
|
||||
if digestor != nil {
|
||||
_ = digestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
chunk, err := reader.Recv()
|
||||
@@ -492,11 +626,10 @@ func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
}
|
||||
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := e.EmitReasoningText(blockID, stage, chunk.ReasoningContent, firstChunk); emitErr != nil {
|
||||
return fullText.String(), emitErr
|
||||
if digestor != nil {
|
||||
digestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
fullText.WriteString(chunk.ReasoningContent)
|
||||
firstChunk = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ type StreamExtraKind string
|
||||
|
||||
const (
|
||||
StreamExtraKindReasoningText StreamExtraKind = "reasoning_text"
|
||||
StreamExtraKindThinkingSummary StreamExtraKind = "thinking_summary"
|
||||
StreamExtraKindAssistantText StreamExtraKind = "assistant_text"
|
||||
StreamExtraKindStatus StreamExtraKind = "status"
|
||||
StreamExtraKindToolCall StreamExtraKind = "tool_call"
|
||||
@@ -71,6 +72,7 @@ type OpenAIChunkExtra struct {
|
||||
BlockID string `json:"block_id,omitempty"`
|
||||
Stage string `json:"stage,omitempty"`
|
||||
DisplayMode StreamDisplayMode `json:"display_mode,omitempty"`
|
||||
ThinkingSummary *StreamThinkingSummaryExtra `json:"thinking_summary,omitempty"`
|
||||
Status *StreamStatusExtra `json:"status,omitempty"`
|
||||
Tool *StreamToolExtra `json:"tool,omitempty"`
|
||||
Confirm *StreamConfirmExtra `json:"confirm,omitempty"`
|
||||
@@ -79,6 +81,20 @@ type OpenAIChunkExtra struct {
|
||||
Meta map[string]any `json:"meta,omitempty"`
|
||||
}
|
||||
|
||||
// StreamThinkingSummaryExtra 表示“流式思考摘要”事件。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. short_summary 仅用于 SSE 端快速展示短句,不要求与持久化内容完全一致;
|
||||
// 2. detail_summary 作为更完整的摘要正文,后续持久化层可直接复用;
|
||||
// 3. summary_seq / final / duration_seconds 由摘要调度层补充运行态信息,前端可据此去重和排序。
|
||||
type StreamThinkingSummaryExtra struct {
|
||||
SummarySeq int `json:"summary_seq,omitempty"`
|
||||
ShortSummary string `json:"short_summary,omitempty"`
|
||||
DetailSummary string `json:"detail_summary,omitempty"`
|
||||
Final bool `json:"final,omitempty"`
|
||||
DurationSeconds float64 `json:"duration_seconds,omitempty"`
|
||||
}
|
||||
|
||||
// StreamStatusExtra 表示普通阶段状态或提示性事件。
|
||||
type StreamStatusExtra struct {
|
||||
Code string `json:"code,omitempty"`
|
||||
@@ -195,6 +211,17 @@ func NewReasoningTextExtra(blockID, stage string) *OpenAIChunkExtra {
|
||||
}
|
||||
}
|
||||
|
||||
// NewThinkingSummaryExtra 创建“流式思考摘要”事件的 extra。
|
||||
func NewThinkingSummaryExtra(blockID, stage string, summary StreamThinkingSummaryExtra) *OpenAIChunkExtra {
|
||||
return &OpenAIChunkExtra{
|
||||
Kind: StreamExtraKindThinkingSummary,
|
||||
BlockID: blockID,
|
||||
Stage: stage,
|
||||
DisplayMode: StreamDisplayModeAppend,
|
||||
ThinkingSummary: &summary,
|
||||
}
|
||||
}
|
||||
|
||||
// NewAssistantTextExtra 创建“正文文字”事件的 extra。
|
||||
func NewAssistantTextExtra(blockID, stage string) *OpenAIChunkExtra {
|
||||
return &OpenAIChunkExtra{
|
||||
@@ -367,6 +394,7 @@ func hasStreamExtra(extra *OpenAIChunkExtra) bool {
|
||||
extra.BlockID != "" ||
|
||||
extra.Stage != "" ||
|
||||
extra.DisplayMode != "" ||
|
||||
extra.ThinkingSummary != nil ||
|
||||
extra.Status != nil ||
|
||||
extra.Tool != nil ||
|
||||
extra.Confirm != nil ||
|
||||
|
||||
599
backend/newAgent/stream/reasoning_digestor.go
Normal file
599
backend/newAgent/stream/reasoning_digestor.go
Normal file
@@ -0,0 +1,599 @@
|
||||
package newagentstream
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultReasoningDigestMinNewRunes = 120
|
||||
defaultReasoningDigestMinNewTokens = 80
|
||||
defaultReasoningDigestMinInterval = 3 * time.Second
|
||||
)
|
||||
|
||||
// ReasoningSummaryFunc 负责真正调用摘要模型。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 该函数只负责“把输入整理成一份摘要结果”,不负责调度、节流、正文闸门和结果丢弃;
|
||||
// 2. 返回值里的 short/detail 由模型或适配层填写;
|
||||
// 3. summary_seq / final / duration_seconds 由 ReasoningDigestor 统一补齐,避免上层重复维护运行态字段。
|
||||
type ReasoningSummaryFunc func(ctx context.Context, input ReasoningSummaryInput) (StreamThinkingSummaryExtra, error)
|
||||
|
||||
// ReasoningSummarySink 负责消费一条已经通过闸门校验的摘要结果。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 常见用法是把结果交给 ChunkEmitter.EmitThinkingSummary;
|
||||
// 2. 该回调不参与单飞、重试、水位线判断;
|
||||
// 3. 回调为 nil 时,Digestor 仍会维护 LatestSummary,方便调用方按需主动拉取。
|
||||
type ReasoningSummarySink func(summary StreamThinkingSummaryExtra)
|
||||
|
||||
// ReasoningSummaryInput 是注入给摘要模型调用方的统一输入。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. FullReasoning 提供完整 reasoning 缓冲区,适合做“全量重摘要”;
|
||||
// 2. DeltaReasoning + PreviousSummary 提供增量上下文,适合做“旧摘要续写”;
|
||||
// 3. CandidateSeq / Final / DurationSeconds 仅表达调度层意图,不要求模型原样回填。
|
||||
type ReasoningSummaryInput struct {
|
||||
FullReasoning string `json:"full_reasoning,omitempty"`
|
||||
DeltaReasoning string `json:"delta_reasoning,omitempty"`
|
||||
PreviousSummary *StreamThinkingSummaryExtra `json:"previous_summary,omitempty"`
|
||||
CandidateSeq int `json:"candidate_seq,omitempty"`
|
||||
Final bool `json:"final,omitempty"`
|
||||
DurationSeconds float64 `json:"duration_seconds,omitempty"`
|
||||
}
|
||||
|
||||
// ReasoningDigestorOptions 描述 reasoning 摘要器的调度参数。
|
||||
type ReasoningDigestorOptions struct {
|
||||
SummaryFunc ReasoningSummaryFunc
|
||||
SummarySink ReasoningSummarySink
|
||||
BaseContext context.Context
|
||||
MinNewRunes int
|
||||
MinNewTokens int
|
||||
MinInterval time.Duration
|
||||
SummaryTimeout time.Duration
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
// ReasoningDigestor 负责把流式 reasoning 文本整理成“低频摘要事件”。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责缓冲、单飞、水位线、正文闸门、Flush/Close,不直接依赖 AgentService;
|
||||
// 2. 只通过 SummaryFunc / SummarySink 两个函数注入模型调用与结果消费,不在这里选模型;
|
||||
// 3. 一旦正文开始或显式关闸,后续摘要结果即使返回成功也必须丢弃,避免前端和持久化出现越界数据。
|
||||
type ReasoningDigestor struct {
|
||||
summaryFunc ReasoningSummaryFunc
|
||||
summarySink ReasoningSummarySink
|
||||
baseContext context.Context
|
||||
minNewRunes int
|
||||
minNewTokens int
|
||||
minInterval time.Duration
|
||||
summaryTimeout time.Duration
|
||||
now func() time.Time
|
||||
|
||||
mu sync.Mutex
|
||||
cond *sync.Cond
|
||||
buffer strings.Builder
|
||||
deltaBuffer strings.Builder
|
||||
startedAt time.Time
|
||||
lastRequestAt time.Time
|
||||
pendingRunes int
|
||||
pendingTokens int
|
||||
summarySeq int
|
||||
latestSummary *StreamThinkingSummaryExtra
|
||||
finalEmitted bool
|
||||
inFlight bool
|
||||
gateClosed bool
|
||||
contentStarted bool
|
||||
closed bool
|
||||
timer *time.Timer
|
||||
timerArmed bool
|
||||
currentCancel context.CancelFunc
|
||||
}
|
||||
|
||||
type reasoningDigestCall struct {
|
||||
ctx context.Context
|
||||
stop context.CancelFunc
|
||||
input ReasoningSummaryInput
|
||||
final bool
|
||||
}
|
||||
|
||||
// NewReasoningDigestor 创建一个只关注“流式思考摘要调度”的核心对象。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 先校验 SummaryFunc;它是唯一必填项,因为 Digestor 不在本文件里选择模型;
|
||||
// 2. 再补齐默认水位线和最小时间间隔,让调用方即使只传核心依赖也能启动;
|
||||
// 3. 最后只初始化并发控制原语,不在构造阶段启动常驻主循环,避免引入额外 goroutine 生命周期负担。
|
||||
func NewReasoningDigestor(options ReasoningDigestorOptions) (*ReasoningDigestor, error) {
|
||||
if options.SummaryFunc == nil {
|
||||
return nil, errors.New("reasoning digestor: SummaryFunc 不能为空")
|
||||
}
|
||||
|
||||
if options.MinNewRunes < 0 {
|
||||
options.MinNewRunes = 0
|
||||
}
|
||||
if options.MinNewTokens < 0 {
|
||||
options.MinNewTokens = 0
|
||||
}
|
||||
if options.MinNewRunes == 0 && options.MinNewTokens == 0 {
|
||||
options.MinNewRunes = defaultReasoningDigestMinNewRunes
|
||||
options.MinNewTokens = defaultReasoningDigestMinNewTokens
|
||||
}
|
||||
if options.MinInterval <= 0 {
|
||||
options.MinInterval = defaultReasoningDigestMinInterval
|
||||
}
|
||||
if options.BaseContext == nil {
|
||||
options.BaseContext = context.Background()
|
||||
}
|
||||
if options.Now == nil {
|
||||
options.Now = time.Now
|
||||
}
|
||||
|
||||
digestor := &ReasoningDigestor{
|
||||
summaryFunc: options.SummaryFunc,
|
||||
summarySink: options.SummarySink,
|
||||
baseContext: options.BaseContext,
|
||||
minNewRunes: options.MinNewRunes,
|
||||
minNewTokens: options.MinNewTokens,
|
||||
minInterval: options.MinInterval,
|
||||
summaryTimeout: options.SummaryTimeout,
|
||||
now: options.Now,
|
||||
}
|
||||
digestor.cond = sync.NewCond(&digestor.mu)
|
||||
return digestor, nil
|
||||
}
|
||||
|
||||
// Append 追加一段 reasoning chunk,并按水位线决定是否后台触发摘要。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 先把原始 reasoning 文本写入 full buffer,保证 Flush/Close 可以拿到全量上下文;
|
||||
// 2. 再把本轮新增文本记入 deltaBuffer 与 rune/token 水位线,用于“最小新增量”判断;
|
||||
// 3. 若正文闸门已关闭,则只保留缓冲快照,不再调度摘要;
|
||||
// 4. 若当前已有摘要请求在飞,则只更新 dirty/latest,不排队第二个请求,等单飞请求返回后再决定是否补一次。
|
||||
func (d *ReasoningDigestor) Append(reasoning string) {
|
||||
if d == nil || reasoning == "" {
|
||||
return
|
||||
}
|
||||
|
||||
var call reasoningDigestCall
|
||||
var shouldStart bool
|
||||
|
||||
d.mu.Lock()
|
||||
if d.closed {
|
||||
d.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
if d.startedAt.IsZero() {
|
||||
d.startedAt = d.now()
|
||||
}
|
||||
d.buffer.WriteString(reasoning)
|
||||
|
||||
if d.gateClosed || d.contentStarted {
|
||||
d.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
d.deltaBuffer.WriteString(reasoning)
|
||||
d.pendingRunes += utf8.RuneCountInString(reasoning)
|
||||
d.pendingTokens += estimateReasoningTokens(reasoning)
|
||||
d.finalEmitted = false
|
||||
|
||||
call, shouldStart = d.prepareSummaryLocked(d.baseContext, false, false)
|
||||
d.mu.Unlock()
|
||||
|
||||
if shouldStart {
|
||||
go d.runSummary(call)
|
||||
}
|
||||
}
|
||||
|
||||
// MarkContentStarted 标记“正文已经开始输出”。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 该方法会直接关闭摘要闸门;
|
||||
// 2. 它不回收旧摘要结果,但会丢弃后续任何尚未完成的摘要调用;
|
||||
// 3. 调用后即使继续 Append reasoning,也只保留缓冲,不再触发新摘要。
|
||||
func (d *ReasoningDigestor) MarkContentStarted() {
|
||||
d.closeGate(true)
|
||||
}
|
||||
|
||||
// CloseGate 显式关闭摘要闸门,但不额外声明正文已经开始。
|
||||
func (d *ReasoningDigestor) CloseGate() {
|
||||
d.closeGate(false)
|
||||
}
|
||||
|
||||
// Flush 在正文尚未开始时尝试补发最后一次摘要。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 先等待当前单飞请求结束,避免 Flush 与后台自动摘要并发跑两次;
|
||||
// 2. 若正文已经开始或闸门已关,则直接返回,不再补摘要;
|
||||
// 3. 若此前已经发过 final 且没有新增 reasoning,则跳过,避免重复 final 事件;
|
||||
// 4. 其余场景会强制走一次摘要,即使新增量还没达到自动触发水位线。
|
||||
func (d *ReasoningDigestor) Flush(ctx context.Context) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
call, shouldStart := d.prepareFlushCall(ctx)
|
||||
if !shouldStart {
|
||||
return nil
|
||||
}
|
||||
return d.runSummary(call)
|
||||
}
|
||||
|
||||
// Close 结束摘要器生命周期。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 若正文还未开始,先尝试 Flush 一次 final 摘要;
|
||||
// 2. 再关闭闸门、停止等待中的定时器,并取消正在进行的摘要调用;
|
||||
// 3. 最后等待单飞调用完全退出,避免遗留后台 goroutine 持续写结果。
|
||||
func (d *ReasoningDigestor) Close(ctx context.Context) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := d.Flush(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
if d.closed {
|
||||
d.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
d.closed = true
|
||||
d.gateClosed = true
|
||||
d.stopTimerLocked()
|
||||
if d.currentCancel != nil {
|
||||
d.currentCancel()
|
||||
}
|
||||
for d.inFlight {
|
||||
d.cond.Wait()
|
||||
}
|
||||
d.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// LatestSummary 返回最近一次通过闸门校验并成功发布的摘要。
|
||||
func (d *ReasoningDigestor) LatestSummary() (StreamThinkingSummaryExtra, bool) {
|
||||
if d == nil {
|
||||
return StreamThinkingSummaryExtra{}, false
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
if d.latestSummary == nil {
|
||||
return StreamThinkingSummaryExtra{}, false
|
||||
}
|
||||
return *cloneThinkingSummaryExtra(d.latestSummary), true
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) closeGate(markContentStarted bool) {
|
||||
if d == nil {
|
||||
return
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
if markContentStarted {
|
||||
d.contentStarted = true
|
||||
}
|
||||
d.gateClosed = true
|
||||
d.pendingRunes = 0
|
||||
d.pendingTokens = 0
|
||||
d.deltaBuffer.Reset()
|
||||
d.stopTimerLocked()
|
||||
if d.currentCancel != nil {
|
||||
d.currentCancel()
|
||||
}
|
||||
d.mu.Unlock()
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) prepareFlushCall(ctx context.Context) (reasoningDigestCall, bool) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
if d.closed || d.gateClosed || d.contentStarted {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
d.stopTimerLocked()
|
||||
for d.inFlight {
|
||||
d.cond.Wait()
|
||||
if d.closed || d.gateClosed || d.contentStarted {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(d.buffer.String()) == "" {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
if d.finalEmitted && d.pendingRunes == 0 && d.pendingTokens == 0 {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
return d.prepareSummaryLocked(ctx, true, true)
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) prepareSummaryLocked(parent context.Context, force bool, final bool) (reasoningDigestCall, bool) {
|
||||
if d.closed || d.gateClosed || d.contentStarted || d.inFlight {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
fullReasoning := d.buffer.String()
|
||||
if strings.TrimSpace(fullReasoning) == "" {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
// 1. 自动摘要必须同时满足“新增量水位线 + 最小时间间隔”。
|
||||
// 2. 若新增量不足,则直接等待后续 Append,不做空转请求。
|
||||
// 3. 若时间间隔未到,则只挂一个定时器做兜底唤醒,避免排队多个请求。
|
||||
if !force {
|
||||
if !d.reachedWatermarkLocked() {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
wait := d.nextAllowedIntervalLocked()
|
||||
if wait > 0 {
|
||||
d.armTimerLocked(wait)
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
}
|
||||
|
||||
callCtx, stop := d.newCallContext(parent)
|
||||
call := reasoningDigestCall{
|
||||
ctx: callCtx,
|
||||
stop: stop,
|
||||
input: ReasoningSummaryInput{
|
||||
FullReasoning: strings.Clone(fullReasoning),
|
||||
DeltaReasoning: strings.Clone(d.deltaBuffer.String()),
|
||||
PreviousSummary: cloneThinkingSummaryExtra(d.latestSummary),
|
||||
CandidateSeq: d.summarySeq + 1,
|
||||
Final: final,
|
||||
DurationSeconds: d.durationSecondsLocked(),
|
||||
},
|
||||
final: final,
|
||||
}
|
||||
|
||||
d.stopTimerLocked()
|
||||
d.inFlight = true
|
||||
d.lastRequestAt = d.now()
|
||||
d.pendingRunes = 0
|
||||
d.pendingTokens = 0
|
||||
d.deltaBuffer.Reset()
|
||||
d.currentCancel = stop
|
||||
|
||||
return call, true
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) runSummary(call reasoningDigestCall) error {
|
||||
if call.stop == nil {
|
||||
return nil
|
||||
}
|
||||
defer call.stop()
|
||||
|
||||
summary, err := d.summaryFunc(call.ctx, call.input)
|
||||
if err != nil {
|
||||
// 1. 摘要失败时不把错误扩散回主流式链路,避免 reasoning 展示被摘要能力反向拖垮。
|
||||
// 2. 若失败期间又追加了新 reasoning,则仍按单飞规则尝试补下一次;否则等待后续 Append/Flush 兜底。
|
||||
_, _, nextCall, shouldStart := d.finishSummary(call.final, nil)
|
||||
if shouldStart {
|
||||
go d.runSummary(nextCall)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
normalized := normalizeThinkingSummary(summary, call.input.Final, call.input.DurationSeconds)
|
||||
emittedSummary, sink, nextCall, shouldStart := d.finishSummary(call.final, &normalized)
|
||||
if emittedSummary != nil && sink != nil {
|
||||
sink(*emittedSummary)
|
||||
}
|
||||
if shouldStart {
|
||||
go d.runSummary(nextCall)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) finishSummary(final bool, summary *StreamThinkingSummaryExtra) (*StreamThinkingSummaryExtra, ReasoningSummarySink, reasoningDigestCall, bool) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
d.inFlight = false
|
||||
d.currentCancel = nil
|
||||
d.cond.Broadcast()
|
||||
|
||||
var emittedSummary *StreamThinkingSummaryExtra
|
||||
var sink ReasoningSummarySink
|
||||
|
||||
// 1. 先判断正文闸门;正文一旦开始,所有晚到结果都必须丢弃。
|
||||
// 2. 再补齐 summary_seq/final/duration,并缓存 LatestSummary 供上层读取。
|
||||
// 3. 若当前请求期间又积累了新 reasoning,则只启动下一次单飞摘要,不排队多次。
|
||||
if summary != nil && !d.closed && !d.gateClosed && !d.contentStarted {
|
||||
normalized := *summary
|
||||
d.summarySeq++
|
||||
normalized.SummarySeq = d.summarySeq
|
||||
normalized.Final = final
|
||||
if normalized.DurationSeconds <= 0 {
|
||||
normalized.DurationSeconds = d.durationSecondsLocked()
|
||||
}
|
||||
d.latestSummary = cloneThinkingSummaryExtra(&normalized)
|
||||
d.finalEmitted = final
|
||||
emittedSummary = cloneThinkingSummaryExtra(&normalized)
|
||||
sink = d.summarySink
|
||||
}
|
||||
|
||||
if d.closed || d.gateClosed || d.contentStarted || final {
|
||||
return emittedSummary, sink, reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
nextCall, shouldStart := d.prepareSummaryLocked(d.baseContext, false, false)
|
||||
return emittedSummary, sink, nextCall, shouldStart
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) reachedWatermarkLocked() bool {
|
||||
return reachedReasoningWatermark(d.pendingRunes, d.pendingTokens, d.minNewRunes, d.minNewTokens)
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) nextAllowedIntervalLocked() time.Duration {
|
||||
if d.lastRequestAt.IsZero() {
|
||||
return 0
|
||||
}
|
||||
wait := d.minInterval - d.now().Sub(d.lastRequestAt)
|
||||
if wait < 0 {
|
||||
return 0
|
||||
}
|
||||
return wait
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) armTimerLocked(wait time.Duration) {
|
||||
if wait <= 0 || d.closed || d.gateClosed || d.contentStarted {
|
||||
return
|
||||
}
|
||||
if d.timer == nil {
|
||||
d.timer = time.AfterFunc(wait, d.onTimer)
|
||||
d.timerArmed = true
|
||||
return
|
||||
}
|
||||
if d.timerArmed {
|
||||
d.timer.Reset(wait)
|
||||
return
|
||||
}
|
||||
d.timer.Reset(wait)
|
||||
d.timerArmed = true
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) stopTimerLocked() {
|
||||
if d.timer == nil {
|
||||
return
|
||||
}
|
||||
if d.timer.Stop() {
|
||||
d.timerArmed = false
|
||||
return
|
||||
}
|
||||
d.timerArmed = false
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) onTimer() {
|
||||
if d == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var call reasoningDigestCall
|
||||
var shouldStart bool
|
||||
|
||||
d.mu.Lock()
|
||||
d.timerArmed = false
|
||||
call, shouldStart = d.prepareSummaryLocked(d.baseContext, false, false)
|
||||
d.mu.Unlock()
|
||||
|
||||
if shouldStart {
|
||||
go d.runSummary(call)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) newCallContext(parent context.Context) (context.Context, context.CancelFunc) {
|
||||
if parent == nil {
|
||||
parent = d.baseContext
|
||||
}
|
||||
if parent == nil {
|
||||
parent = context.Background()
|
||||
}
|
||||
|
||||
baseCtx, baseCancel := context.WithCancel(parent)
|
||||
if d.summaryTimeout <= 0 {
|
||||
return baseCtx, baseCancel
|
||||
}
|
||||
|
||||
timeoutCtx, timeoutCancel := context.WithTimeout(baseCtx, d.summaryTimeout)
|
||||
return timeoutCtx, func() {
|
||||
timeoutCancel()
|
||||
baseCancel()
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) durationSecondsLocked() float64 {
|
||||
if d.startedAt.IsZero() {
|
||||
return 0
|
||||
}
|
||||
duration := d.now().Sub(d.startedAt)
|
||||
if duration <= 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(duration.Milliseconds()) / 1000
|
||||
}
|
||||
|
||||
func reachedReasoningWatermark(pendingRunes, pendingTokens, minRunes, minTokens int) bool {
|
||||
if minRunes > 0 && pendingRunes >= minRunes {
|
||||
return true
|
||||
}
|
||||
if minTokens > 0 && pendingTokens >= minTokens {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizeThinkingSummary(summary StreamThinkingSummaryExtra, final bool, durationSeconds float64) StreamThinkingSummaryExtra {
|
||||
summary.ShortSummary = strings.TrimSpace(summary.ShortSummary)
|
||||
summary.DetailSummary = strings.TrimSpace(summary.DetailSummary)
|
||||
|
||||
// 1. 短摘要只是实时展示兜底,允许从长摘要压一个默认值。
|
||||
// 2. 反过来不能把短摘要补成 detail_summary,否则会绕过“短摘要不持久化”的产品语义。
|
||||
// 3. 若模型没有给 detail_summary,timeline 层会跳过持久化,仅保留本次 SSE 展示。
|
||||
if summary.ShortSummary == "" {
|
||||
summary.ShortSummary = summary.DetailSummary
|
||||
}
|
||||
summary.Final = final
|
||||
if summary.DurationSeconds <= 0 {
|
||||
summary.DurationSeconds = durationSeconds
|
||||
}
|
||||
return summary
|
||||
}
|
||||
|
||||
func cloneThinkingSummaryExtra(src *StreamThinkingSummaryExtra) *StreamThinkingSummaryExtra {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
clone := *src
|
||||
return &clone
|
||||
}
|
||||
|
||||
func estimateReasoningTokens(text string) int {
|
||||
text = strings.TrimSpace(text)
|
||||
if text == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
asciiRunes := 0
|
||||
totalTokens := 0
|
||||
for _, r := range text {
|
||||
switch {
|
||||
case unicode.IsSpace(r):
|
||||
if asciiRunes > 0 {
|
||||
totalTokens += compactASCIITokens(asciiRunes)
|
||||
asciiRunes = 0
|
||||
}
|
||||
case r <= unicode.MaxASCII && (unicode.IsLetter(r) || unicode.IsDigit(r)):
|
||||
asciiRunes++
|
||||
default:
|
||||
if asciiRunes > 0 {
|
||||
totalTokens += compactASCIITokens(asciiRunes)
|
||||
asciiRunes = 0
|
||||
}
|
||||
totalTokens++
|
||||
}
|
||||
}
|
||||
if asciiRunes > 0 {
|
||||
totalTokens += compactASCIITokens(asciiRunes)
|
||||
}
|
||||
return totalTokens
|
||||
}
|
||||
|
||||
func compactASCIITokens(asciiRunes int) int {
|
||||
if asciiRunes <= 0 {
|
||||
return 0
|
||||
}
|
||||
return max(1, (asciiRunes+3)/4)
|
||||
}
|
||||
@@ -353,12 +353,11 @@ func (s *AgentService) runNormalChatFlow(
|
||||
|
||||
// 6. 执行真正的流式聊天。
|
||||
// fullText 用于后续写 Redis/持久化,outChan 用于把流片段实时推给前端。
|
||||
fullText, reasoningText, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt)
|
||||
fullText, _, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt, userID, chatID)
|
||||
if streamErr != nil {
|
||||
pushErrNonBlocking(errChan, streamErr)
|
||||
return
|
||||
}
|
||||
assistantReasoning := mergeAgentReasoningText(assistantReasoningPrefix, reasoningText)
|
||||
|
||||
// 6.1 流式 usage 并入请求级 token 统计器:
|
||||
// 6.1.1 route/quicknote/taskquery 等 Generate 调用由 callback 自动累加;
|
||||
@@ -413,7 +412,7 @@ func (s *AgentService) runNormalChatFlow(
|
||||
// 8. 后置持久化(助手消息):
|
||||
// 8.1 先写 Redis,保证下一轮上下文可见;
|
||||
// 8.2 再异步可靠落库,失败通过 errChan 回传给上层。
|
||||
assistantMsg := &schema.Message{Role: schema.Assistant, Content: fullText, ReasoningContent: assistantReasoning}
|
||||
assistantMsg := &schema.Message{Role: schema.Assistant, Content: fullText}
|
||||
if reasoningDurationSeconds > 0 {
|
||||
assistantMsg.Extra = map[string]any{"reasoning_duration_seconds": reasoningDurationSeconds}
|
||||
}
|
||||
@@ -426,7 +425,7 @@ func (s *AgentService) runNormalChatFlow(
|
||||
ConversationID: chatID,
|
||||
Role: "assistant",
|
||||
Message: fullText,
|
||||
ReasoningContent: assistantReasoning,
|
||||
ReasoningContent: "",
|
||||
ReasoningDurationSeconds: reasoningDurationSeconds,
|
||||
// 口径B:助手消息记录“本轮请求总 token”。
|
||||
TokensConsumed: requestTotalTokens,
|
||||
@@ -434,9 +433,6 @@ func (s *AgentService) runNormalChatFlow(
|
||||
pushErrNonBlocking(errChan, saveErr)
|
||||
} else {
|
||||
assistantTimelinePayload := map[string]any{}
|
||||
if strings.TrimSpace(assistantReasoning) != "" {
|
||||
assistantTimelinePayload["reasoning_content"] = strings.TrimSpace(assistantReasoning)
|
||||
}
|
||||
if reasoningDurationSeconds > 0 {
|
||||
assistantTimelinePayload["reasoning_duration_seconds"] = reasoningDurationSeconds
|
||||
}
|
||||
|
||||
@@ -182,10 +182,12 @@ func (s *AgentService) runNewAgentGraph(
|
||||
planClient := infrallm.WrapArkClient(s.AIHub.Max)
|
||||
executeClient := infrallm.WrapArkClient(s.AIHub.Max)
|
||||
deliverClient := infrallm.WrapArkClient(s.AIHub.Pro)
|
||||
summaryClient := infrallm.WrapArkClient(s.AIHub.Lite)
|
||||
|
||||
// 8. 适配 SSE emitter。
|
||||
sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
|
||||
chunkEmitter := newagentstream.NewChunkEmitter(sseEmitter, traceID, resolvedModelName, requestStart.Unix())
|
||||
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(summaryClient))
|
||||
// 关键卡片事件走统一时间线持久化,保证刷新后可重建。
|
||||
chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) {
|
||||
s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra)
|
||||
@@ -451,7 +453,9 @@ func (s *AgentService) persistNewAgentConversationMessage(
|
||||
persistMsg := &schema.Message{
|
||||
Role: msg.Role,
|
||||
Content: content,
|
||||
ReasoningContent: strings.TrimSpace(msg.ReasoningContent),
|
||||
// 可见消息持久化只保存正文;模型 raw reasoning 改由 thinking_summary 生成用户可见摘要,
|
||||
// 避免历史接口或时间线刷新时重新暴露内部思考文本。
|
||||
ReasoningContent: "",
|
||||
}
|
||||
if len(msg.Extra) > 0 {
|
||||
persistMsg.Extra = make(map[string]any, len(msg.Extra))
|
||||
@@ -498,9 +502,6 @@ func (s *AgentService) persistNewAgentConversationMessage(
|
||||
timelineKind = model.AgentTimelineKindAssistantText
|
||||
}
|
||||
timelinePayload := map[string]any{}
|
||||
if persistPayload.ReasoningContent != "" {
|
||||
timelinePayload["reasoning_content"] = persistPayload.ReasoningContent
|
||||
}
|
||||
if reasoningDurationSeconds > 0 {
|
||||
timelinePayload["reasoning_duration_seconds"] = reasoningDurationSeconds
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
@@ -25,6 +26,8 @@ func (s *AgentService) streamChatFallback(
|
||||
chatHistory []*schema.Message,
|
||||
outChan chan<- string,
|
||||
reasoningStartAt *time.Time,
|
||||
userID int,
|
||||
chatID string,
|
||||
) (string, string, int, *schema.TokenUsage, error) {
|
||||
messages := make([]*schema.Message, 0, len(chatHistory)+2)
|
||||
messages = append(messages, schema.SystemMessage(newagentprompt.SystemPrompt))
|
||||
@@ -46,6 +49,24 @@ func (s *AgentService) streamChatFallback(
|
||||
requestID := "chatcmpl-" + uuid.NewString()
|
||||
created := time.Now().Unix()
|
||||
firstChunk := true
|
||||
chunkEmitter := newagentstream.NewChunkEmitter(newagentstream.NewSSEPayloadEmitter(outChan), requestID, modelName, created)
|
||||
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(infrallm.WrapArkClient(s.AIHub.Lite)))
|
||||
chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) {
|
||||
s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra)
|
||||
})
|
||||
reasoningDigestor, digestorErr := chunkEmitter.NewReasoningDigestor(ctx, "fallback.speak", "fallback")
|
||||
if digestorErr != nil {
|
||||
return "", "", 0, nil, digestorErr
|
||||
}
|
||||
digestorClosed := false
|
||||
closeDigestor := func() {
|
||||
if reasoningDigestor == nil || digestorClosed {
|
||||
return
|
||||
}
|
||||
digestorClosed = true
|
||||
_ = reasoningDigestor.Close(ctx)
|
||||
}
|
||||
defer closeDigestor()
|
||||
|
||||
var localReasoningStartAt *time.Time
|
||||
if reasoningStartAt != nil && !reasoningStartAt.IsZero() {
|
||||
@@ -61,7 +82,6 @@ func (s *AgentService) streamChatFallback(
|
||||
defer reader.Close()
|
||||
|
||||
var fullText strings.Builder
|
||||
var reasoningText strings.Builder
|
||||
var tokenUsage *schema.TokenUsage
|
||||
for {
|
||||
chunk, recvErr := reader.Recv()
|
||||
@@ -85,26 +105,31 @@ func (s *AgentService) streamChatFallback(
|
||||
now := time.Now()
|
||||
reasoningEndAt = &now
|
||||
}
|
||||
// 1. fallback 链路同样不能透传 raw reasoning_content;
|
||||
// 2. 只把 reasoning 喂给摘要器,正文出现时立即关门丢弃后续摘要。
|
||||
if strings.TrimSpace(chunk.ReasoningContent) != "" && reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
if chunk.Content != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := chunkEmitter.EmitAssistantText("fallback.speak", "fallback", chunk.Content, firstChunk); emitErr != nil {
|
||||
return "", "", 0, nil, emitErr
|
||||
}
|
||||
fullText.WriteString(chunk.Content)
|
||||
reasoningText.WriteString(chunk.ReasoningContent)
|
||||
}
|
||||
|
||||
payload, payloadErr := newagentstream.ToOpenAIStream(chunk, requestID, modelName, created, firstChunk)
|
||||
if payloadErr != nil {
|
||||
return "", "", 0, nil, payloadErr
|
||||
}
|
||||
if payload != "" {
|
||||
outChan <- payload
|
||||
firstChunk = false
|
||||
}
|
||||
}
|
||||
}
|
||||
closeDigestor()
|
||||
|
||||
finishChunk, finishErr := newagentstream.ToOpenAIFinishStream(requestID, modelName, created)
|
||||
if finishErr != nil {
|
||||
if finishErr := chunkEmitter.EmitFinish("fallback.speak", "fallback"); finishErr != nil {
|
||||
return "", "", 0, nil, finishErr
|
||||
}
|
||||
outChan <- finishChunk
|
||||
outChan <- "[DONE]"
|
||||
if doneErr := chunkEmitter.EmitDone(); doneErr != nil {
|
||||
return "", "", 0, nil, doneErr
|
||||
}
|
||||
|
||||
reasoningDurationSeconds := 0
|
||||
if localReasoningStartAt != nil {
|
||||
@@ -117,5 +142,5 @@ func (s *AgentService) streamChatFallback(
|
||||
}
|
||||
}
|
||||
|
||||
return fullText.String(), reasoningText.String(), reasoningDurationSeconds, tokenUsage, nil
|
||||
return fullText.String(), "", reasoningDurationSeconds, tokenUsage, nil
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -63,13 +64,13 @@ func (s *AgentService) GetConversationTimeline(ctx context.Context, userID int,
|
||||
return normalizeConversationTimelineItems(items), nil
|
||||
}
|
||||
|
||||
// appendConversationTimelineEvent 统一追加单条时间线事件到 Redis + MySQL。
|
||||
// appendConversationTimelineEvent 统一追加单条时间线事件到 Redis + outbox。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 先从 Redis INCR 分配 seq,若 Redis 异常则回退 DB MAX(seq)+1;
|
||||
// 2. 再写 MySQL,保证刷新时至少有权威持久化;
|
||||
// 3. 最后追加 Redis 时间线列表,失败只记日志,不影响主链路返回;
|
||||
// 4. 返回分配到的 seq,便于后续扩展在 SSE meta 回传顺序号。
|
||||
// 1. 先分配同会话内单调递增的 seq,优先走 Redis,Redis 不可用时回退 DB;
|
||||
// 2. 再把事件同步追加到 Redis timeline cache,保证刷新前的用户体验连续;
|
||||
// 3. 最后发布 outbox 事件异步落 MySQL,与 chat history 的可靠落库方式对齐;
|
||||
// 4. 未注入 eventPublisher 时走同步 MySQL fallback,方便本地极简环境启动。
|
||||
func (s *AgentService) appendConversationTimelineEvent(
|
||||
ctx context.Context,
|
||||
userID int,
|
||||
@@ -95,86 +96,260 @@ func (s *AgentService) appendConversationTimelineEvent(
|
||||
return 0, errors.New("invalid timeline event identity")
|
||||
}
|
||||
|
||||
normalizedContent, normalizedPayload, shouldPersist := normalizeConversationTimelinePersistMaterial(normalizedKind, normalizedContent, payload)
|
||||
if !shouldPersist {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
seq, err := s.nextConversationTimelineSeq(ctx, userID, normalizedChatID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
payloadJSON := marshalTimelinePayloadJSON(payload)
|
||||
persistPayload := model.ChatTimelinePersistPayload{
|
||||
persistPayload := (model.ChatTimelinePersistPayload{
|
||||
UserID: userID,
|
||||
ConversationID: normalizedChatID,
|
||||
Seq: seq,
|
||||
Kind: normalizedKind,
|
||||
Role: normalizedRole,
|
||||
Content: normalizedContent,
|
||||
PayloadJSON: payloadJSON,
|
||||
PayloadJSON: marshalTimelinePayloadJSON(normalizedPayload),
|
||||
TokensConsumed: tokensConsumed,
|
||||
}
|
||||
eventID, eventCreatedAt, err := s.repo.SaveConversationTimelineEvent(ctx, persistPayload)
|
||||
if err != nil {
|
||||
// 1. 并发极端场景下(例如 Redis seq 分配失败后 DB 兜底)可能产生重复 seq;
|
||||
// 2. 这里做一次“读取最新 MAX(seq)+1”的重试,避免主链路直接失败;
|
||||
// 3. 重试仍失败则返回错误,让调用方感知真实落库失败。
|
||||
if !isTimelineSeqConflictError(err) {
|
||||
return 0, err
|
||||
}
|
||||
maxSeq, seqErr := s.repo.GetConversationTimelineMaxSeq(ctx, userID, normalizedChatID)
|
||||
if seqErr != nil {
|
||||
return 0, err
|
||||
}
|
||||
persistPayload.Seq = maxSeq + 1
|
||||
var retryErr error
|
||||
eventID, eventCreatedAt, retryErr = s.repo.SaveConversationTimelineEvent(ctx, persistPayload)
|
||||
if retryErr != nil {
|
||||
return 0, retryErr
|
||||
}
|
||||
seq = persistPayload.Seq
|
||||
if s.cacheDAO != nil {
|
||||
if setErr := s.cacheDAO.SetConversationTimelineSeq(ctx, userID, normalizedChatID, seq); setErr != nil {
|
||||
log.Printf("时间线 seq 冲突重试后回写 Redis 失败 user=%d chat=%s seq=%d err=%v", userID, normalizedChatID, seq, setErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.cacheDAO != nil {
|
||||
}).Normalize()
|
||||
if s.eventPublisher != nil {
|
||||
now := time.Now()
|
||||
item := model.GetConversationTimelineItem{
|
||||
ID: eventID,
|
||||
Seq: seq,
|
||||
Kind: normalizedKind,
|
||||
Role: normalizedRole,
|
||||
Content: normalizedContent,
|
||||
Payload: cloneTimelinePayload(payload),
|
||||
TokensConsumed: tokensConsumed,
|
||||
}
|
||||
if eventCreatedAt != nil {
|
||||
item.CreatedAt = eventCreatedAt
|
||||
} else {
|
||||
item.CreatedAt = &now
|
||||
}
|
||||
if err := s.cacheDAO.AppendConversationTimelineEventToCache(ctx, userID, normalizedChatID, item); err != nil {
|
||||
log.Printf("追加会话时间线缓存失败 user=%d chat=%s seq=%d kind=%s err=%v", userID, normalizedChatID, seq, normalizedKind, err)
|
||||
}
|
||||
|
||||
// 1. 先写 Redis timeline cache,让刷新前的本地态和下一轮上下文都能立即看到这条事件。
|
||||
// 2. 再发布 outbox 事件,与 chat history 保持相同的“入队成功即返回”语义。
|
||||
// 3. 若 outbox 发布失败,这里返回 error 交给上层处理,不在本方法里偷偷回退成同步写库。
|
||||
s.appendConversationTimelineCacheNonBlocking(
|
||||
ctx,
|
||||
userID,
|
||||
normalizedChatID,
|
||||
buildConversationTimelineCacheItem(0, seq, normalizedKind, normalizedRole, normalizedContent, normalizedPayload, tokensConsumed, &now),
|
||||
)
|
||||
if err := eventsvc.PublishAgentTimelinePersistRequested(ctx, s.eventPublisher, persistPayload); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return seq, nil
|
||||
}
|
||||
|
||||
func isTimelineSeqConflictError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
text := strings.ToLower(err.Error())
|
||||
return strings.Contains(text, "duplicate") && strings.Contains(text, "uk_timeline_user_chat_seq")
|
||||
return s.appendConversationTimelineEventSync(ctx, userID, normalizedChatID, persistPayload, normalizedPayload)
|
||||
}
|
||||
|
||||
// persistNewAgentTimelineExtraEvent 把 SSE extra 卡片事件写入时间线。
|
||||
// appendConversationTimelineEventSync 在未启用 outbox 时同步写 MySQL。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 本方法只作为 eventPublisher 为空时的降级路径,保证本地环境不依赖总线;
|
||||
// 2. 若 seq 唯一键冲突,读取 DB 最大 seq 后补一个新序号,语义与 outbox 消费者保持一致;
|
||||
// 3. MySQL 写入成功后再追加 Redis cache,让缓存拿到数据库生成的 id/created_at。
|
||||
func (s *AgentService) appendConversationTimelineEventSync(
|
||||
ctx context.Context,
|
||||
userID int,
|
||||
chatID string,
|
||||
persistPayload model.ChatTimelinePersistPayload,
|
||||
payload map[string]any,
|
||||
) (int64, error) {
|
||||
eventID, eventCreatedAt, err := s.repo.SaveConversationTimelineEvent(ctx, persistPayload)
|
||||
if err != nil {
|
||||
// 1. 这里的冲突通常来自 Redis seq key 过期或落后于 DB。
|
||||
// 2. 由于当前是同步写库链路,可以直接读取 DB 当前最大 seq 并补一个新序号。
|
||||
// 3. 若重试后仍失败,则把数据库错误原样抛给上层,避免悄悄吞掉真实问题。
|
||||
if !model.IsTimelineSeqConflictError(err) {
|
||||
return 0, err
|
||||
}
|
||||
maxSeq, seqErr := s.repo.GetConversationTimelineMaxSeq(ctx, userID, chatID)
|
||||
if seqErr != nil {
|
||||
return 0, seqErr
|
||||
}
|
||||
persistPayload.Seq = maxSeq + 1
|
||||
eventID, eventCreatedAt, err = s.repo.SaveConversationTimelineEvent(ctx, persistPayload)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if s.cacheDAO != nil {
|
||||
if setErr := s.cacheDAO.SetConversationTimelineSeq(ctx, userID, chatID, persistPayload.Seq); setErr != nil {
|
||||
log.Printf("回填时间线 seq 到 Redis 失败 user=%d chat=%s seq=%d err=%v", userID, chatID, persistPayload.Seq, setErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.appendConversationTimelineCacheNonBlocking(
|
||||
ctx,
|
||||
userID,
|
||||
chatID,
|
||||
buildConversationTimelineCacheItem(
|
||||
eventID,
|
||||
persistPayload.Seq,
|
||||
persistPayload.Kind,
|
||||
persistPayload.Role,
|
||||
persistPayload.Content,
|
||||
payload,
|
||||
persistPayload.TokensConsumed,
|
||||
eventCreatedAt,
|
||||
),
|
||||
)
|
||||
return persistPayload.Seq, nil
|
||||
}
|
||||
|
||||
// appendConversationTimelineCacheNonBlocking 尽力把单条 timeline 事件追加到 Redis。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 缓存失败不能反向影响主链路,因为 MySQL/outbox 才是最终可靠写入;
|
||||
// 2. 这里统一记录错误日志,方便排查 Redis 不可用或 payload 序列化问题;
|
||||
// 3. item 由调用方提前标准化,本方法不再二次裁剪业务字段。
|
||||
func (s *AgentService) appendConversationTimelineCacheNonBlocking(
|
||||
ctx context.Context,
|
||||
userID int,
|
||||
chatID string,
|
||||
item model.GetConversationTimelineItem,
|
||||
) {
|
||||
if s.cacheDAO == nil {
|
||||
return
|
||||
}
|
||||
if err := s.cacheDAO.AppendConversationTimelineEventToCache(ctx, userID, chatID, item); err != nil {
|
||||
log.Printf("追加时间线缓存失败 user=%d chat=%s seq=%d kind=%s err=%v", userID, chatID, item.Seq, item.Kind, err)
|
||||
}
|
||||
}
|
||||
|
||||
// nextConversationTimelineSeq 负责分配一条新的 timeline seq。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 优先走 Redis INCR,避免所有事件都串行依赖 MySQL;
|
||||
// 2. 再用 DB MAX(seq) 做一次自检,尽量把“Redis key 过期/落后”在写入前提前修正;
|
||||
// 3. 若 Redis 不可用,则直接回退到 DB MAX(seq)+1,并把结果尽力回填回 Redis。
|
||||
func (s *AgentService) nextConversationTimelineSeq(ctx context.Context, userID int, chatID string) (int64, error) {
|
||||
if s == nil || s.repo == nil {
|
||||
return 0, errors.New("agent service is not initialized")
|
||||
}
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
|
||||
normalizedChatID := strings.TrimSpace(chatID)
|
||||
if userID <= 0 || normalizedChatID == "" {
|
||||
return 0, errors.New("invalid timeline seq identity")
|
||||
}
|
||||
|
||||
if s.cacheDAO == nil {
|
||||
return s.nextConversationTimelineSeqFromDB(ctx, userID, normalizedChatID)
|
||||
}
|
||||
|
||||
candidateSeq, err := s.cacheDAO.IncrConversationTimelineSeq(ctx, userID, normalizedChatID)
|
||||
if err != nil {
|
||||
log.Printf("分配时间线 seq 时 Redis INCR 失败,回退 DB user=%d chat=%s err=%v", userID, normalizedChatID, err)
|
||||
return s.nextConversationTimelineSeqFromDB(ctx, userID, normalizedChatID)
|
||||
}
|
||||
|
||||
// 1. Redis key 缺失时,INCR 常会从 1 重新开始,容易和已有 DB 记录撞 seq。
|
||||
// 2. 这里额外对照一次 DB 最大 seq,把明显落后的顺序号提前修正,降低 outbox 消费时的补 seq 概率。
|
||||
// 3. 该自检不会看到“尚未消费到 MySQL 的新 outbox 事件”,因此真正的极端并发兜底仍由消费者承担。
|
||||
maxSeq, err := s.repo.GetConversationTimelineMaxSeq(ctx, userID, normalizedChatID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if candidateSeq > maxSeq {
|
||||
return candidateSeq, nil
|
||||
}
|
||||
|
||||
repairedSeq := maxSeq + 1
|
||||
if err = s.cacheDAO.SetConversationTimelineSeq(ctx, userID, normalizedChatID, repairedSeq); err != nil {
|
||||
log.Printf("修正时间线 seq 到 Redis 失败 user=%d chat=%s seq=%d err=%v", userID, normalizedChatID, repairedSeq, err)
|
||||
}
|
||||
return repairedSeq, nil
|
||||
}
|
||||
|
||||
func (s *AgentService) nextConversationTimelineSeqFromDB(ctx context.Context, userID int, chatID string) (int64, error) {
|
||||
maxSeq, err := s.repo.GetConversationTimelineMaxSeq(ctx, userID, chatID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
nextSeq := maxSeq + 1
|
||||
if s.cacheDAO != nil {
|
||||
if setErr := s.cacheDAO.SetConversationTimelineSeq(ctx, userID, chatID, nextSeq); setErr != nil {
|
||||
log.Printf("回填时间线 seq 到 Redis 失败 user=%d chat=%s seq=%d err=%v", userID, chatID, nextSeq, setErr)
|
||||
}
|
||||
}
|
||||
return nextSeq, nil
|
||||
}
|
||||
|
||||
// normalizeConversationTimelinePersistMaterial 负责把 timeline 原始输入收敛成“可缓存 + 可持久化”的口径。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 对普通事件只做浅拷贝,避免调用方后续继续改 map 影响已入队 payload;
|
||||
// 2. 对 thinking_summary 只保留 detail_summary 与必要 metadata,明确剔除 short_summary;
|
||||
// 3. 若 thinking_summary 最终没有 detail_summary,则返回 shouldPersist=false,仅保留实时 SSE 展示,不进入 timeline。
|
||||
func normalizeConversationTimelinePersistMaterial(kind string, content string, payload map[string]any) (string, map[string]any, bool) {
|
||||
normalizedKind := strings.ToLower(strings.TrimSpace(kind))
|
||||
normalizedContent := strings.TrimSpace(content)
|
||||
if normalizedKind != model.AgentTimelineKindThinkingSummary {
|
||||
return normalizedContent, cloneTimelinePayload(payload), true
|
||||
}
|
||||
return sanitizeThinkingSummaryPersistMaterial(normalizedContent, payload)
|
||||
}
|
||||
|
||||
func sanitizeThinkingSummaryPersistMaterial(content string, payload map[string]any) (string, map[string]any, bool) {
|
||||
detailSummary := readTimelinePayloadString(payload, "detail_summary")
|
||||
if detailSummary == "" {
|
||||
detailSummary = strings.TrimSpace(content)
|
||||
}
|
||||
if detailSummary == "" {
|
||||
return "", nil, false
|
||||
}
|
||||
|
||||
sanitized := make(map[string]any)
|
||||
copyTrimmedTimelinePayloadField(payload, sanitized, "stage")
|
||||
copyTrimmedTimelinePayloadField(payload, sanitized, "block_id")
|
||||
copyTrimmedTimelinePayloadField(payload, sanitized, "display_mode")
|
||||
copyTimelinePayloadFieldIfPresent(payload, sanitized, "summary_seq")
|
||||
copyTimelinePayloadFieldIfPresent(payload, sanitized, "final")
|
||||
copyTimelinePayloadFieldIfPresent(payload, sanitized, "duration_seconds")
|
||||
sanitized["detail_summary"] = detailSummary
|
||||
|
||||
return detailSummary, sanitized, true
|
||||
}
|
||||
|
||||
func copyTrimmedTimelinePayloadField(src map[string]any, dst map[string]any, key string) {
|
||||
if len(src) == 0 || dst == nil {
|
||||
return
|
||||
}
|
||||
value, ok := src[key]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
text, ok := value.(string)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if trimmed == "" {
|
||||
return
|
||||
}
|
||||
dst[key] = trimmed
|
||||
}
|
||||
|
||||
func copyTimelinePayloadFieldIfPresent(src map[string]any, dst map[string]any, key string) {
|
||||
if len(src) == 0 || dst == nil {
|
||||
return
|
||||
}
|
||||
value, ok := src[key]
|
||||
if !ok || value == nil {
|
||||
return
|
||||
}
|
||||
dst[key] = value
|
||||
}
|
||||
|
||||
// persistNewAgentTimelineExtraEvent 把 SSE extra 里的结构化事件写入时间线。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 只持久化真正需要刷新后重建的卡片事件;
|
||||
// 2. status/reasoning/finish 等临时过程信号不落时间线;
|
||||
// 3. 失败只记日志,不中断当前 SSE 输出。
|
||||
func (s *AgentService) persistNewAgentTimelineExtraEvent(ctx context.Context, userID int, chatID string, extra *newagentstream.OpenAIChunkExtra) {
|
||||
// 1. 只持久化刷新后仍需重建的业务事件;
|
||||
// 2. short_summary 这类临时展示信息会在 appendConversationTimelineEvent 内被过滤掉;
|
||||
// 3. 失败只记日志,不反向打断当前 SSE 输出。
|
||||
func (s *AgentService) persistNewAgentTimelineExtraEvent(
|
||||
ctx context.Context,
|
||||
userID int,
|
||||
chatID string,
|
||||
extra *newagentstream.OpenAIChunkExtra,
|
||||
) {
|
||||
kind, ok := mapTimelineKindFromStreamExtra(extra)
|
||||
if !ok {
|
||||
return
|
||||
@@ -193,30 +368,33 @@ func (s *AgentService) persistNewAgentTimelineExtraEvent(ctx context.Context, us
|
||||
buildTimelinePayloadFromStreamExtra(extra),
|
||||
0,
|
||||
); err != nil {
|
||||
log.Printf("写入 newAgent 卡片时间线失败 user=%d chat=%s kind=%s err=%v", userID, chatID, kind, err)
|
||||
log.Printf("写入 newAgent 时间线事件失败 user=%d chat=%s kind=%s err=%v", userID, chatID, kind, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *AgentService) nextConversationTimelineSeq(ctx context.Context, userID int, chatID string) (int64, error) {
|
||||
if s.cacheDAO != nil {
|
||||
seq, err := s.cacheDAO.IncrConversationTimelineSeq(ctx, userID, chatID)
|
||||
if err == nil {
|
||||
return seq, nil
|
||||
func buildConversationTimelineCacheItem(
|
||||
eventID int64,
|
||||
seq int64,
|
||||
kind string,
|
||||
role string,
|
||||
content string,
|
||||
payload map[string]any,
|
||||
tokensConsumed int,
|
||||
createdAt *time.Time,
|
||||
) model.GetConversationTimelineItem {
|
||||
item := model.GetConversationTimelineItem{
|
||||
ID: eventID,
|
||||
Seq: seq,
|
||||
Kind: kind,
|
||||
Role: role,
|
||||
Content: content,
|
||||
Payload: cloneTimelinePayload(payload),
|
||||
TokensConsumed: tokensConsumed,
|
||||
}
|
||||
log.Printf("会话时间线 seq Redis 分配失败,回退 DB user=%d chat=%s err=%v", userID, chatID, err)
|
||||
if createdAt != nil {
|
||||
item.CreatedAt = createdAt
|
||||
}
|
||||
|
||||
maxSeq, err := s.repo.GetConversationTimelineMaxSeq(ctx, userID, chatID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
seq := maxSeq + 1
|
||||
if s.cacheDAO != nil {
|
||||
if err := s.cacheDAO.SetConversationTimelineSeq(ctx, userID, chatID, seq); err != nil {
|
||||
log.Printf("会话时间线 seq 回填 Redis 失败 user=%d chat=%s seq=%d err=%v", userID, chatID, seq, err)
|
||||
}
|
||||
}
|
||||
return seq, nil
|
||||
return item
|
||||
}
|
||||
|
||||
func buildConversationTimelineItemsFromDB(events []model.AgentTimelineEvent) []model.GetConversationTimelineItem {
|
||||
@@ -296,7 +474,8 @@ func canonicalizeTimelineKind(kind string, role string) string {
|
||||
model.AgentTimelineKindToolResult,
|
||||
model.AgentTimelineKindConfirmRequest,
|
||||
model.AgentTimelineKindBusinessCard,
|
||||
model.AgentTimelineKindScheduleCompleted:
|
||||
model.AgentTimelineKindScheduleCompleted,
|
||||
model.AgentTimelineKindThinkingSummary:
|
||||
return normalizedKind
|
||||
case "text", "message", "query":
|
||||
if normalizedRole == "user" {
|
||||
@@ -337,6 +516,9 @@ func mapTimelineKindFromStreamExtra(extra *newagentstream.OpenAIChunkExtra) (str
|
||||
if extra == nil {
|
||||
return "", false
|
||||
}
|
||||
if isThinkingSummaryStreamExtra(extra) {
|
||||
return model.AgentTimelineKindThinkingSummary, true
|
||||
}
|
||||
switch extra.Kind {
|
||||
case newagentstream.StreamExtraKindToolCall:
|
||||
return model.AgentTimelineKindToolCall, true
|
||||
@@ -357,6 +539,9 @@ func buildTimelinePayloadFromStreamExtra(extra *newagentstream.OpenAIChunkExtra)
|
||||
if extra == nil {
|
||||
return nil
|
||||
}
|
||||
if isThinkingSummaryStreamExtra(extra) {
|
||||
return buildThinkingSummaryTimelinePayload(extra)
|
||||
}
|
||||
payload := map[string]any{
|
||||
"stage": strings.TrimSpace(extra.Stage),
|
||||
"block_id": strings.TrimSpace(extra.BlockID),
|
||||
@@ -400,6 +585,67 @@ func buildTimelinePayloadFromStreamExtra(extra *newagentstream.OpenAIChunkExtra)
|
||||
return payload
|
||||
}
|
||||
|
||||
func isThinkingSummaryStreamExtra(extra *newagentstream.OpenAIChunkExtra) bool {
|
||||
if extra == nil {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(string(extra.Kind)), model.AgentTimelineKindThinkingSummary)
|
||||
}
|
||||
|
||||
func buildThinkingSummaryTimelinePayload(extra *newagentstream.OpenAIChunkExtra) map[string]any {
|
||||
payload := map[string]any{
|
||||
"stage": strings.TrimSpace(extra.Stage),
|
||||
"block_id": strings.TrimSpace(extra.BlockID),
|
||||
"display_mode": string(extra.DisplayMode),
|
||||
}
|
||||
|
||||
if extra.ThinkingSummary != nil {
|
||||
summary := extra.ThinkingSummary
|
||||
payload["summary_seq"] = summary.SummarySeq
|
||||
payload["final"] = summary.Final
|
||||
payload["duration_seconds"] = summary.DurationSeconds
|
||||
if detailSummary := strings.TrimSpace(summary.DetailSummary); detailSummary != "" {
|
||||
payload["detail_summary"] = detailSummary
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
if detailSummary := readTimelineExtraMetaString(extra.Meta, "detail_summary"); detailSummary != "" {
|
||||
payload["detail_summary"] = detailSummary
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
func readTimelineExtraMetaString(meta map[string]any, key string) string {
|
||||
if len(meta) == 0 {
|
||||
return ""
|
||||
}
|
||||
raw, ok := meta[key]
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
text, ok := raw.(string)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
func readTimelinePayloadString(payload map[string]any, key string) string {
|
||||
if len(payload) == 0 {
|
||||
return ""
|
||||
}
|
||||
raw, ok := payload[key]
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
text, ok := raw.(string)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
func cloneStreamBusinessCard(card *newagentstream.StreamBusinessCardExtra) map[string]any {
|
||||
if card == nil {
|
||||
return nil
|
||||
|
||||
112
backend/service/agentsvc/reasoning_summary.go
Normal file
112
backend/service/agentsvc/reasoning_summary.go
Normal file
@@ -0,0 +1,112 @@
|
||||
package agentsvc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
)
|
||||
|
||||
const reasoningSummaryMaxTokens = 700
|
||||
|
||||
type reasoningSummaryLLMResponse struct {
|
||||
ShortSummary string `json:"short_summary"`
|
||||
DetailSummary string `json:"detail_summary"`
|
||||
}
|
||||
|
||||
// makeReasoningSummaryFunc 把便宜模型封装成 stream 层可注入的摘要函数。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. service 层负责选择模型与 prompt,stream 层只负责调度和闸门;
|
||||
// 2. 这里不持久化摘要,持久化统一走 ChunkEmitter 的 extra hook;
|
||||
// 3. 摘要失败时返回 error,由 ReasoningDigestor 吞掉并等待下一次水位线/Flush 兜底。
|
||||
func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagentstream.ReasoningSummaryFunc {
|
||||
if client == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return func(ctx context.Context, input newagentstream.ReasoningSummaryInput) (newagentstream.StreamThinkingSummaryExtra, error) {
|
||||
previousSummary := ""
|
||||
if input.PreviousSummary != nil {
|
||||
previousSummary = input.PreviousSummary.DetailSummary
|
||||
if strings.TrimSpace(previousSummary) == "" {
|
||||
previousSummary = input.PreviousSummary.ShortSummary
|
||||
}
|
||||
}
|
||||
|
||||
messages := newagentprompt.BuildReasoningSummaryMessages(newagentprompt.ReasoningSummaryPromptInput{
|
||||
FullReasoning: input.FullReasoning,
|
||||
DeltaReasoning: input.DeltaReasoning,
|
||||
PreviousSummary: previousSummary,
|
||||
CandidateSeq: input.CandidateSeq,
|
||||
Final: input.Final,
|
||||
DurationSeconds: input.DurationSeconds,
|
||||
})
|
||||
|
||||
resp, rawResult, err := infrallm.GenerateJSON[reasoningSummaryLLMResponse](
|
||||
ctx,
|
||||
client,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
Temperature: 0.1,
|
||||
MaxTokens: reasoningSummaryMaxTokens,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": "reasoning_summary",
|
||||
"candidate_seq": input.CandidateSeq,
|
||||
"final": input.Final,
|
||||
},
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("[WARN] reasoning 摘要模型调用失败 seq=%d final=%v err=%v raw=%s",
|
||||
input.CandidateSeq,
|
||||
input.Final,
|
||||
err,
|
||||
truncateReasoningSummaryRaw(rawResult),
|
||||
)
|
||||
return newagentstream.StreamThinkingSummaryExtra{}, err
|
||||
}
|
||||
|
||||
summary := newagentstream.StreamThinkingSummaryExtra{
|
||||
ShortSummary: strings.TrimSpace(resp.ShortSummary),
|
||||
DetailSummary: limitReasoningDetailSummary(
|
||||
resp.DetailSummary,
|
||||
newagentprompt.ReasoningSummaryDetailRuneLimit(input.FullReasoning, input.DeltaReasoning),
|
||||
),
|
||||
}
|
||||
if summary.ShortSummary == "" && summary.DetailSummary == "" {
|
||||
return newagentstream.StreamThinkingSummaryExtra{}, errors.New("reasoning 摘要模型返回空摘要")
|
||||
}
|
||||
return summary, nil
|
||||
}
|
||||
}
|
||||
|
||||
func limitReasoningDetailSummary(text string, maxRunes int) string {
|
||||
text = strings.TrimSpace(text)
|
||||
if text == "" || maxRunes <= 0 {
|
||||
return text
|
||||
}
|
||||
|
||||
runes := []rune(text)
|
||||
if len(runes) <= maxRunes {
|
||||
return text
|
||||
}
|
||||
return string(runes[:maxRunes])
|
||||
}
|
||||
|
||||
func truncateReasoningSummaryRaw(raw *infrallm.TextResult) string {
|
||||
if raw == nil {
|
||||
return ""
|
||||
}
|
||||
text := strings.TrimSpace(raw.Text)
|
||||
runes := []rune(text)
|
||||
if len(runes) <= 200 {
|
||||
return text
|
||||
}
|
||||
return string(runes[:200]) + "..."
|
||||
}
|
||||
326
backend/service/events/agent_timeline_persist.go
Normal file
326
backend/service/events/agent_timeline_persist.go
Normal file
@@ -0,0 +1,326 @@
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
|
||||
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
const EventTypeAgentTimelinePersistRequested = "agent.timeline.persist.requested"
|
||||
|
||||
// RegisterAgentTimelinePersistHandler 注册“会话时间线持久化”消费者处理器。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责 timeline 事件,不处理 chat_history 等其他业务消息;
|
||||
// 2. 只负责注册 handler,不负责总线启停;
|
||||
// 3. 通过 outbox 通用消费事务,把“时间线写库 + consumed 推进”放进同一事务;
|
||||
// 4. 若遇到 seq 唯一键冲突,会先判定是否属于重放幂等,再决定是否补新 seq 并回填 Redis。
|
||||
func RegisterAgentTimelinePersistHandler(
|
||||
bus *outboxinfra.EventBus,
|
||||
outboxRepo *outboxinfra.Repository,
|
||||
agentRepo *dao.AgentDAO,
|
||||
cacheDAO *dao.CacheDAO,
|
||||
) error {
|
||||
// 1. 依赖校验:缺少任一关键依赖都无法安全消费消息。
|
||||
if bus == nil {
|
||||
return errors.New("event bus is nil")
|
||||
}
|
||||
if outboxRepo == nil {
|
||||
return errors.New("outbox repository is nil")
|
||||
}
|
||||
if agentRepo == nil {
|
||||
return errors.New("agent repo is nil")
|
||||
}
|
||||
|
||||
handler := func(ctx context.Context, envelope kafkabus.Envelope) error {
|
||||
var payload model.ChatTimelinePersistPayload
|
||||
if unmarshalErr := json.Unmarshal(envelope.Payload, &payload); unmarshalErr != nil {
|
||||
// 1. payload 无法反序列化属于不可恢复错误,直接标 dead,避免无意义重试。
|
||||
_ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "解析时间线持久化载荷失败: "+unmarshalErr.Error())
|
||||
return nil
|
||||
}
|
||||
|
||||
payload = payload.Normalize()
|
||||
if !payload.HasValidIdentity() {
|
||||
// 2. 这里只校验“能否唯一定位一条 timeline 记录”的最小字段集合。
|
||||
// 3. content / payload_json 是否为空由事件类型自行决定,不在这里一刀切限制。
|
||||
_ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "时间线持久化载荷非法: user_id/conversation_id/seq/kind 非法")
|
||||
return nil
|
||||
}
|
||||
|
||||
refreshCache := false
|
||||
finalSeq := payload.Seq
|
||||
|
||||
// 4. 统一走 outbox 消费事务入口,保证“业务写入成功 -> consumed”原子一致。
|
||||
err := outboxRepo.ConsumeAndMarkConsumed(ctx, envelope.OutboxID, func(tx *gorm.DB) error {
|
||||
finalPayload, repaired, persistErr := persistConversationTimelineEventInTx(ctx, tx, agentRepo.WithTx(tx), payload)
|
||||
if persistErr != nil {
|
||||
return persistErr
|
||||
}
|
||||
refreshCache = repaired
|
||||
finalSeq = finalPayload.Seq
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 5. 只有发生“seq 冲突且补了新 seq”时,才需要重建 Redis timeline。
|
||||
// 5.1 原因:主链路已经先写过 Redis,常规成功无需重复回写。
|
||||
// 5.2 若发生补 seq,不重建会留下旧 seq 的缓存残影,刷新后顺序会错乱。
|
||||
// 5.3 缓存重建失败只记日志,不能反向把已 consumed 的 outbox 回滚。
|
||||
if refreshCache {
|
||||
if refreshErr := rebuildConversationTimelineCache(ctx, agentRepo, cacheDAO, payload.UserID, payload.ConversationID, finalSeq); refreshErr != nil {
|
||||
log.Printf("重建时间线缓存失败 user=%d chat=%s seq=%d err=%v", payload.UserID, payload.ConversationID, finalSeq, refreshErr)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return bus.RegisterEventHandler(EventTypeAgentTimelinePersistRequested, handler)
|
||||
}
|
||||
|
||||
// PublishAgentTimelinePersistRequested 发布“会话时间线持久化请求”事件。
|
||||
//
|
||||
// 设计目的:
|
||||
// 1. 让业务层只传 DTO,不重复拼事件元数据;
|
||||
// 2. 统一以 conversation_id 作为 MessageKey / AggregateID,尽量降低同会话乱序概率;
|
||||
// 3. 发布失败显式返回 error,由调用方决定是否中断主链路。
|
||||
func PublishAgentTimelinePersistRequested(
|
||||
ctx context.Context,
|
||||
publisher outboxinfra.EventPublisher,
|
||||
payload model.ChatTimelinePersistPayload,
|
||||
) error {
|
||||
if publisher == nil {
|
||||
return errors.New("event publisher is nil")
|
||||
}
|
||||
|
||||
payload = payload.Normalize()
|
||||
if !payload.HasValidIdentity() {
|
||||
return errors.New("invalid timeline persist payload")
|
||||
}
|
||||
|
||||
return publisher.Publish(ctx, outboxinfra.PublishRequest{
|
||||
EventType: EventTypeAgentTimelinePersistRequested,
|
||||
EventVersion: outboxinfra.DefaultEventVersion,
|
||||
MessageKey: payload.ConversationID,
|
||||
AggregateID: payload.ConversationID,
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
|
||||
// persistConversationTimelineEventInTx 负责在单个事务里完成 timeline 事件写库。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 先按 payload 原始 seq 尝试写入;
|
||||
// 2. 若命中 seq 唯一键冲突,先查询同 seq 记录,判断是否属于“重放同一事件”;
|
||||
// 3. 若不是重放,而是 Redis seq 漂移导致的新旧事件撞 seq,则用 max(seq)+1 重新分配;
|
||||
// 4. 最多修复 3 次,避免异常数据把消费者拖进无限循环。
|
||||
func persistConversationTimelineEventInTx(
|
||||
ctx context.Context,
|
||||
tx *gorm.DB,
|
||||
agentRepo *dao.AgentDAO,
|
||||
payload model.ChatTimelinePersistPayload,
|
||||
) (model.ChatTimelinePersistPayload, bool, error) {
|
||||
if tx == nil {
|
||||
return payload, false, errors.New("transaction is nil")
|
||||
}
|
||||
if agentRepo == nil {
|
||||
return payload, false, errors.New("agent repo is nil")
|
||||
}
|
||||
|
||||
working := payload.Normalize()
|
||||
repaired := false
|
||||
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
if _, _, err := agentRepo.SaveConversationTimelineEvent(ctx, working); err == nil {
|
||||
return working, repaired, nil
|
||||
} else if !model.IsTimelineSeqConflictError(err) {
|
||||
return working, repaired, err
|
||||
}
|
||||
|
||||
// 1. 先判断是否属于“同一条事件被重复消费”。
|
||||
// 2. 若库里已有记录且字段完全一致,说明前一次其实已经成功落库,本次可视为幂等成功。
|
||||
// 3. 若字段不一致,再进入“补新 seq”分支,避免把真正的新事件吞掉。
|
||||
existing, findErr := findConversationTimelineEventBySeq(ctx, tx, working.UserID, working.ConversationID, working.Seq)
|
||||
if findErr == nil && working.MatchesStoredEvent(existing) {
|
||||
return working, repaired, nil
|
||||
}
|
||||
if findErr != nil && !errors.Is(findErr, gorm.ErrRecordNotFound) {
|
||||
return working, repaired, findErr
|
||||
}
|
||||
|
||||
maxSeq, maxErr := loadConversationTimelineMaxSeq(ctx, tx, working.UserID, working.ConversationID)
|
||||
if maxErr != nil {
|
||||
return working, repaired, maxErr
|
||||
}
|
||||
working.Seq = maxSeq + 1
|
||||
repaired = true
|
||||
}
|
||||
|
||||
return working, repaired, fmt.Errorf("timeline seq repair exceeded limit user=%d chat=%s", working.UserID, working.ConversationID)
|
||||
}
|
||||
|
||||
func findConversationTimelineEventBySeq(
|
||||
ctx context.Context,
|
||||
tx *gorm.DB,
|
||||
userID int,
|
||||
conversationID string,
|
||||
seq int64,
|
||||
) (model.AgentTimelineEvent, error) {
|
||||
var event model.AgentTimelineEvent
|
||||
err := tx.WithContext(ctx).
|
||||
Where("user_id = ? AND chat_id = ? AND seq = ?", userID, strings.TrimSpace(conversationID), seq).
|
||||
Take(&event).Error
|
||||
return event, err
|
||||
}
|
||||
|
||||
func loadConversationTimelineMaxSeq(
|
||||
ctx context.Context,
|
||||
tx *gorm.DB,
|
||||
userID int,
|
||||
conversationID string,
|
||||
) (int64, error) {
|
||||
var maxSeq int64
|
||||
err := tx.WithContext(ctx).
|
||||
Model(&model.AgentTimelineEvent{}).
|
||||
Where("user_id = ? AND chat_id = ?", userID, strings.TrimSpace(conversationID)).
|
||||
Select("COALESCE(MAX(seq), 0)").
|
||||
Scan(&maxSeq).Error
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return maxSeq, nil
|
||||
}
|
||||
|
||||
// rebuildConversationTimelineCache 在“补新 seq”后重建 Redis timeline 缓存。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 这里只在缓存存在时执行;未接 Redis 的环境直接跳过即可;
|
||||
// 2. 需要整表重建而不是只 append 一条,因为旧缓存里已经存在错误 seq 的事件;
|
||||
// 3. 这里不抽到 agentsvc 复用,是因为 events 不能反向依赖 service,否则会形成循环依赖。
|
||||
func rebuildConversationTimelineCache(
|
||||
ctx context.Context,
|
||||
agentRepo *dao.AgentDAO,
|
||||
cacheDAO *dao.CacheDAO,
|
||||
userID int,
|
||||
conversationID string,
|
||||
finalSeq int64,
|
||||
) error {
|
||||
if cacheDAO == nil || agentRepo == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
events, err := agentRepo.ListConversationTimelineEvents(ctx, userID, conversationID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
items := buildConversationTimelineCacheItems(events)
|
||||
if err = cacheDAO.SetConversationTimelineToCache(ctx, userID, conversationID, items); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(items) > 0 {
|
||||
finalSeq = items[len(items)-1].Seq
|
||||
}
|
||||
return cacheDAO.SetConversationTimelineSeq(ctx, userID, conversationID, finalSeq)
|
||||
}
|
||||
|
||||
func buildConversationTimelineCacheItems(events []model.AgentTimelineEvent) []model.GetConversationTimelineItem {
|
||||
if len(events) == 0 {
|
||||
return make([]model.GetConversationTimelineItem, 0)
|
||||
}
|
||||
|
||||
items := make([]model.GetConversationTimelineItem, 0, len(events))
|
||||
for _, event := range events {
|
||||
item := model.GetConversationTimelineItem{
|
||||
ID: event.ID,
|
||||
Seq: event.Seq,
|
||||
Kind: strings.TrimSpace(event.Kind),
|
||||
TokensConsumed: event.TokensConsumed,
|
||||
CreatedAt: event.CreatedAt,
|
||||
}
|
||||
if event.Role != nil {
|
||||
item.Role = strings.TrimSpace(*event.Role)
|
||||
}
|
||||
if event.Content != nil {
|
||||
item.Content = strings.TrimSpace(*event.Content)
|
||||
}
|
||||
if event.Payload != nil {
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal([]byte(strings.TrimSpace(*event.Payload)), &payload); err == nil && len(payload) > 0 {
|
||||
item.Payload = payload
|
||||
}
|
||||
}
|
||||
items = append(items, item)
|
||||
}
|
||||
return normalizeConversationTimelineCacheItems(items)
|
||||
}
|
||||
|
||||
func normalizeConversationTimelineCacheItems(items []model.GetConversationTimelineItem) []model.GetConversationTimelineItem {
|
||||
if len(items) == 0 {
|
||||
return make([]model.GetConversationTimelineItem, 0)
|
||||
}
|
||||
|
||||
normalized := make([]model.GetConversationTimelineItem, 0, len(items))
|
||||
for _, item := range items {
|
||||
role := strings.ToLower(strings.TrimSpace(item.Role))
|
||||
kind := canonicalizeConversationTimelineKind(item.Kind, role)
|
||||
|
||||
if kind == "" {
|
||||
switch role {
|
||||
case "user":
|
||||
kind = model.AgentTimelineKindUserText
|
||||
case "assistant":
|
||||
kind = model.AgentTimelineKindAssistantText
|
||||
}
|
||||
}
|
||||
if role == "" {
|
||||
switch kind {
|
||||
case model.AgentTimelineKindUserText:
|
||||
role = "user"
|
||||
case model.AgentTimelineKindAssistantText:
|
||||
role = "assistant"
|
||||
}
|
||||
}
|
||||
|
||||
item.Kind = kind
|
||||
item.Role = role
|
||||
normalized = append(normalized, item)
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
func canonicalizeConversationTimelineKind(kind string, role string) string {
|
||||
normalizedKind := strings.ToLower(strings.TrimSpace(kind))
|
||||
normalizedRole := strings.ToLower(strings.TrimSpace(role))
|
||||
|
||||
switch normalizedKind {
|
||||
case model.AgentTimelineKindUserText,
|
||||
model.AgentTimelineKindAssistantText,
|
||||
model.AgentTimelineKindToolCall,
|
||||
model.AgentTimelineKindToolResult,
|
||||
model.AgentTimelineKindConfirmRequest,
|
||||
model.AgentTimelineKindBusinessCard,
|
||||
model.AgentTimelineKindScheduleCompleted,
|
||||
model.AgentTimelineKindThinkingSummary:
|
||||
return normalizedKind
|
||||
case "text", "message", "query":
|
||||
if normalizedRole == "user" {
|
||||
return model.AgentTimelineKindUserText
|
||||
}
|
||||
if normalizedRole == "assistant" {
|
||||
return model.AgentTimelineKindAssistantText
|
||||
}
|
||||
}
|
||||
return normalizedKind
|
||||
}
|
||||
389
docs/frontend/newagent_thinking_summary_对接说明.md
Normal file
389
docs/frontend/newagent_thinking_summary_对接说明.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# NewAgent 思考摘要前端对接说明
|
||||
|
||||
## 背景
|
||||
|
||||
后端已经不再把模型原始 `reasoning_content` 直接透传给前端。新的展示入口是 SSE 顶层 `extra.kind = "thinking_summary"` 事件。
|
||||
|
||||
目标体验:
|
||||
|
||||
- 用户等待模型深度思考时,前端每隔几秒收到一条短摘要,作为当前思考状态的轻量提示。
|
||||
- 展开后展示稍长的 `detail_summary`,多条按时间追加。
|
||||
- 模型开始输出正文后,当前思考摘要停止更新。
|
||||
- 刷新会话后,只恢复长摘要,不恢复短摘要。
|
||||
|
||||
## 实时 SSE 协议
|
||||
|
||||
聊天接口仍然是:
|
||||
|
||||
```http
|
||||
POST /api/v1/agent/chat
|
||||
Content-Type: application/json
|
||||
Accept: text/event-stream
|
||||
```
|
||||
|
||||
SSE 每个业务包仍是标准格式:
|
||||
|
||||
```text
|
||||
data: {json}
|
||||
|
||||
data: [DONE]
|
||||
```
|
||||
|
||||
后端保活心跳是 SSE 注释行:
|
||||
|
||||
```text
|
||||
: ping
|
||||
```
|
||||
|
||||
前端按现有逻辑忽略不能 JSON.parse 的块即可。
|
||||
|
||||
## thinking_summary 事件
|
||||
|
||||
实时思考摘要事件没有 `delta.content`,也没有 `delta.reasoning_content`。前端应从顶层 `extra.thinking_summary` 读取。
|
||||
|
||||
示例:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "trace-id",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1777399000,
|
||||
"model": "pro",
|
||||
"extra": {
|
||||
"kind": "thinking_summary",
|
||||
"block_id": "plan.speak",
|
||||
"stage": "plan",
|
||||
"display_mode": "append",
|
||||
"thinking_summary": {
|
||||
"summary_seq": 1,
|
||||
"short_summary": "正在梳理计划",
|
||||
"detail_summary": "正在把用户目标拆成可执行步骤,并检查是否需要补充约束。",
|
||||
"duration_seconds": 3.214
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
字段说明:
|
||||
|
||||
| 字段 | 说明 |
|
||||
| --- | --- |
|
||||
| `extra.kind` | 固定为 `thinking_summary`。 |
|
||||
| `extra.block_id` | 当前摘要所属展示块,例如 `plan.speak`、`execute.speak`、`fallback.speak`。建议作为分组 key 的一部分。 |
|
||||
| `extra.stage` | 当前节点阶段,例如 `plan`、`execute`、`fallback`。 |
|
||||
| `extra.display_mode` | 当前固定为 `append`,表示长摘要按条追加。 |
|
||||
| `thinking_summary.summary_seq` | 同一个摘要器内递增,用于忽略重复或乱序摘要。不要当作全局 timeline seq。 |
|
||||
| `thinking_summary.short_summary` | 实时短摘要,只用于当前流式展示,不持久化。 |
|
||||
| `thinking_summary.detail_summary` | 展开态长摘要,按 append 语义追加;刷新后也只恢复这个字段。 |
|
||||
| `thinking_summary.duration_seconds` | 从首次收到 reasoning 到生成该摘要的耗时秒数,可能是小数。 |
|
||||
| `thinking_summary.final` | 可选。若出现 `true`,表示该摘要器在没有正文打断的情况下自然收口。不要依赖它一定出现。 |
|
||||
|
||||
已删除字段:
|
||||
|
||||
- `state` 已从协议、prompt、timeline 持久化里删除,前端不要再依赖或展示。
|
||||
|
||||
## 前端处理建议
|
||||
|
||||
建议把思考摘要作为 assistant 消息内的一个子结构,而不是普通正文。
|
||||
|
||||
推荐 key:
|
||||
|
||||
```ts
|
||||
const key = extra.block_id || extra.stage || 'thinking'
|
||||
```
|
||||
|
||||
推荐类型:
|
||||
|
||||
```ts
|
||||
export interface ThinkingSummaryPayload {
|
||||
summary_seq?: number
|
||||
short_summary?: string
|
||||
detail_summary?: string
|
||||
final?: boolean
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface ThinkingSummaryBlock {
|
||||
key: string
|
||||
stage?: string
|
||||
blockId?: string
|
||||
latestSeq: number
|
||||
latestShort: string
|
||||
details: Array<{
|
||||
seq: number
|
||||
text: string
|
||||
durationSeconds?: number
|
||||
final?: boolean
|
||||
}>
|
||||
active: boolean
|
||||
collapsed: boolean
|
||||
}
|
||||
```
|
||||
|
||||
实时处理伪代码:
|
||||
|
||||
```ts
|
||||
function handleThinkingSummary(extra: StreamExtra, message: AssistantMessage) {
|
||||
if (extra.kind !== 'thinking_summary') return false
|
||||
|
||||
const summary = extra.thinking_summary
|
||||
if (!summary) return true
|
||||
|
||||
const key = extra.block_id || extra.stage || 'thinking'
|
||||
const block = ensureThinkingSummaryBlock(message, key, {
|
||||
stage: extra.stage,
|
||||
blockId: extra.block_id,
|
||||
})
|
||||
|
||||
const seq = summary.summary_seq ?? block.latestSeq + 1
|
||||
if (seq <= block.latestSeq) return true
|
||||
|
||||
block.latestSeq = seq
|
||||
block.active = summary.final !== true
|
||||
|
||||
if (summary.short_summary?.trim()) {
|
||||
block.latestShort = summary.short_summary.trim()
|
||||
}
|
||||
|
||||
if (summary.detail_summary?.trim()) {
|
||||
block.details.push({
|
||||
seq,
|
||||
text: summary.detail_summary.trim(),
|
||||
durationSeconds: summary.duration_seconds,
|
||||
final: summary.final,
|
||||
})
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
```
|
||||
|
||||
正文开始时的处理:
|
||||
|
||||
```ts
|
||||
function handleAssistantContentStart(message: AssistantMessage) {
|
||||
// 后端正文一出现就会停止当前 block 的摘要;
|
||||
// 前端这里也可以把活跃思考块收口,避免动效继续闪。
|
||||
message.thinkingSummaryBlocks?.forEach(block => {
|
||||
block.active = false
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
注意:
|
||||
|
||||
- 收到 `thinking_summary` 时,不要追加到 `assistantMessage.content`。
|
||||
- 收到 `thinking_summary` 时,不要写入旧的 `assistantMessage.reasoning`。
|
||||
- 若仍收到旧链路 `delta.reasoning_content`,可以保留兼容,但新样式应优先使用 `thinking_summary`。
|
||||
- `summary_seq` 只在同一个 `block_id/stage` 下去重;不同 block 不要互相比较。
|
||||
|
||||
## 展示语义
|
||||
|
||||
短摘要:
|
||||
|
||||
- 展示最新一条 `short_summary`。
|
||||
- 适合放在折叠态标题、胶囊、加载条旁边。
|
||||
- 不要持久化到本地历史,也不要在刷新恢复后强行补出来。
|
||||
|
||||
长摘要:
|
||||
|
||||
- 每次收到非空 `detail_summary` 就追加一条。
|
||||
- 展开态展示 `details` 列表。
|
||||
- 如果你想做得更像 Gemini/豆包,可以折叠态只露最新短摘要,展开态按时间展示长摘要列表。
|
||||
|
||||
收口条件:
|
||||
|
||||
- 收到第一段 `delta.content`:关闭当前 assistant 消息里的活跃思考态。
|
||||
- 收到 `finish_reason` 或 `[DONE]`:关闭所有活跃思考态。
|
||||
- 收到 `thinking_summary.final === true`:可以关闭对应 block,但不要依赖它总会出现。
|
||||
|
||||
## 历史 timeline 恢复
|
||||
|
||||
刷新会话时读取:
|
||||
|
||||
```http
|
||||
GET /api/v1/agent/conversation-timeline?conversation_id={conversation_id}
|
||||
```
|
||||
|
||||
统一响应仍是:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "0",
|
||||
"info": "success",
|
||||
"data": []
|
||||
}
|
||||
```
|
||||
|
||||
`thinking_summary` timeline item 示例:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": 123,
|
||||
"seq": 8,
|
||||
"kind": "thinking_summary",
|
||||
"content": "正在把用户目标拆成可执行步骤,并检查是否需要补充约束。",
|
||||
"payload": {
|
||||
"stage": "plan",
|
||||
"block_id": "plan.speak",
|
||||
"display_mode": "append",
|
||||
"summary_seq": 1,
|
||||
"detail_summary": "正在把用户目标拆成可执行步骤,并检查是否需要补充约束。",
|
||||
"duration_seconds": 3.214
|
||||
},
|
||||
"created_at": "2026-04-28T21:00:00+08:00"
|
||||
}
|
||||
```
|
||||
|
||||
历史恢复规则:
|
||||
|
||||
- 只恢复 `detail_summary`,没有 `short_summary`。
|
||||
- 按 timeline item 的 `seq` 排序渲染即可,后端已升序返回。
|
||||
- 可用 `payload.block_id || payload.stage || "thinking"` 归组到对应 assistant 消息附近。
|
||||
- 如果当前前端还没做跨事件归组,可以先把它渲染为 assistant 消息里的“思考摘要条目”,位置按 timeline 顺序插入。
|
||||
|
||||
建议更新现有前端类型:
|
||||
|
||||
```ts
|
||||
export interface TimelineThinkingSummaryPayload {
|
||||
stage?: string
|
||||
block_id?: string
|
||||
display_mode?: 'append'
|
||||
summary_seq?: number
|
||||
detail_summary?: string
|
||||
duration_seconds?: number
|
||||
final?: boolean
|
||||
}
|
||||
|
||||
export interface TimelineEvent {
|
||||
id: number
|
||||
seq: number
|
||||
kind:
|
||||
| 'user_text'
|
||||
| 'assistant_text'
|
||||
| 'tool_call'
|
||||
| 'tool_result'
|
||||
| 'confirm_request'
|
||||
| 'schedule_completed'
|
||||
| 'business_card'
|
||||
| 'thinking_summary'
|
||||
role?: 'user' | 'assistant'
|
||||
content?: string
|
||||
payload?: {
|
||||
stage?: string
|
||||
block_id?: string
|
||||
display_mode?: 'append' | 'replace' | 'card'
|
||||
thinking_summary?: never
|
||||
detail_summary?: string
|
||||
summary_seq?: number
|
||||
duration_seconds?: number
|
||||
final?: boolean
|
||||
tool?: TimelineToolPayload
|
||||
confirm?: TimelineConfirmPayload
|
||||
business_card?: TimelineBusinessCardPayload
|
||||
}
|
||||
tokens_consumed?: number
|
||||
created_at?: string
|
||||
}
|
||||
```
|
||||
|
||||
## 与正文/工具卡片的关系
|
||||
|
||||
同一轮流里可能出现:
|
||||
|
||||
1. `thinking_summary`
|
||||
2. `tool_call` / `tool_result`
|
||||
3. `assistant_text` 或 `delta.content`
|
||||
4. `finish`
|
||||
5. `[DONE]`
|
||||
|
||||
前端建议:
|
||||
|
||||
- `thinking_summary` 是“等待过程”组件。
|
||||
- `tool_call` / `tool_result` 继续走现有工具卡片。
|
||||
- `delta.content` 继续追加到 assistant 正文。
|
||||
- `finish` / `[DONE]` 只负责收尾,不需要生成可见消息。
|
||||
|
||||
## 测试用例
|
||||
|
||||
### 1. 只有摘要,还没正文
|
||||
|
||||
输入事件:
|
||||
|
||||
```json
|
||||
{
|
||||
"extra": {
|
||||
"kind": "thinking_summary",
|
||||
"block_id": "plan.speak",
|
||||
"stage": "plan",
|
||||
"display_mode": "append",
|
||||
"thinking_summary": {
|
||||
"summary_seq": 1,
|
||||
"short_summary": "正在理解需求",
|
||||
"detail_summary": "正在识别用户的目标、约束和需要补充的信息。",
|
||||
"duration_seconds": 2.1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
预期:
|
||||
|
||||
- 折叠态显示“正在理解需求”。
|
||||
- 展开态新增一条 detail。
|
||||
- 正文区域不新增文字。
|
||||
|
||||
### 2. 多条摘要追加
|
||||
|
||||
输入 `summary_seq=1,2,3`。
|
||||
|
||||
预期:
|
||||
|
||||
- `latestShort` 使用第 3 条短摘要。
|
||||
- `details` 有 3 条,按收到顺序或 seq 升序展示。
|
||||
|
||||
### 3. 乱序或重复摘要
|
||||
|
||||
已处理到 `summary_seq=3` 后,又收到 `summary_seq=2`。
|
||||
|
||||
预期:
|
||||
|
||||
- 忽略旧事件,不回退短摘要,不追加 detail。
|
||||
|
||||
### 4. 正文开始
|
||||
|
||||
收到:
|
||||
|
||||
```json
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": { "content": "我整理好了,下面是建议:" }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
预期:
|
||||
|
||||
- 当前活跃思考块停止 loading 动效。
|
||||
- 正文正常追加。
|
||||
- 后续若仍意外收到同 block 摘要,可按 seq 处理,但 UI 上建议不再重新激活。
|
||||
|
||||
### 5. 历史恢复
|
||||
|
||||
timeline 返回 `kind=thinking_summary`。
|
||||
|
||||
预期:
|
||||
|
||||
- 只展示 `payload.detail_summary || content`。
|
||||
- 不展示短摘要占位。
|
||||
- 不需要显示 `state`,协议里已经没有这个字段。
|
||||
|
||||
## 最小改动清单
|
||||
|
||||
1. `StreamEventPayload.extra` 增加 `thinking_summary` 字段。
|
||||
2. `TimelineEvent.kind` 增加 `thinking_summary`。
|
||||
3. SSE 解析里在 `handleStreamExtraEvent` 增加 `extra.kind === "thinking_summary"` 分支。
|
||||
4. 收到正文 `delta.content` 时,把当前思考摘要块置为非活跃。
|
||||
5. 历史 timeline 恢复时支持 `kind === "thinking_summary"`,只恢复长摘要。
|
||||
@@ -71,6 +71,16 @@ export interface TaskRecordCardData {
|
||||
export type BusinessCardType = 'task_query' | 'task_record'
|
||||
export type TaskRecordSource = 'quick_note' | 'create_task'
|
||||
|
||||
export interface TimelineThinkingSummaryPayload {
|
||||
stage?: string
|
||||
block_id?: string
|
||||
display_mode?: 'append'
|
||||
summary_seq?: number
|
||||
detail_summary?: string
|
||||
duration_seconds?: number
|
||||
final?: boolean
|
||||
}
|
||||
|
||||
export interface TimelineBusinessCardPayload {
|
||||
card_type: BusinessCardType
|
||||
title?: string
|
||||
@@ -92,16 +102,21 @@ export interface TimelineEvent {
|
||||
| 'interrupt'
|
||||
| 'status'
|
||||
| 'business_card'
|
||||
| 'thinking_summary'
|
||||
role?: 'user' | 'assistant'
|
||||
content?: string
|
||||
payload?: {
|
||||
reasoning_content?: string
|
||||
stage?: string
|
||||
block_id?: string
|
||||
display_mode?: 'card'
|
||||
display_mode?: 'card' | 'append'
|
||||
tool?: TimelineToolPayload
|
||||
confirm?: TimelineConfirmPayload
|
||||
business_card?: TimelineBusinessCardPayload
|
||||
summary_seq?: number
|
||||
detail_summary?: string
|
||||
duration_seconds?: number
|
||||
final?: boolean
|
||||
}
|
||||
tokens_consumed?: number
|
||||
created_at: string
|
||||
|
||||
@@ -256,6 +256,7 @@ const unavailableHistoryMap = reactive<Record<string, boolean>>({})
|
||||
const thinkingMessageMap = reactive<Record<string, boolean>>({})
|
||||
const reasoningCollapsedMap = reactive<Record<string, boolean>>({})
|
||||
const reasoningStartedAtMap = reactive<Record<string, number>>({})
|
||||
const reasoningCurrentShortSummaryMap = reactive<Record<string, string>>({})
|
||||
const reasoningDurationMap = reactive<Record<string, number>>({})
|
||||
const confirmOnlyStreamMap = reactive<Record<string, boolean>>({})
|
||||
const confirmVisiblePrefixMap = reactive<Record<string, boolean>>({})
|
||||
@@ -931,7 +932,7 @@ function appendAssistantReasoningChunk(messageId: string, chunk: string) {
|
||||
|
||||
// 记录块级别的起始时间和初始折叠状态
|
||||
reasoningStartedAtMap[blockId] = Date.now()
|
||||
reasoningCollapsedMap[blockId] = false
|
||||
reasoningCollapsedMap[blockId] = true
|
||||
|
||||
assistantTimelineLastKindMap[messageId] = 'reasoning'
|
||||
}
|
||||
@@ -1130,6 +1131,7 @@ function cleanupHiddenAssistantMessageState(messageId: string) {
|
||||
delete thinkingMessageMap[messageId]
|
||||
delete reasoningCollapsedMap[messageId]
|
||||
delete reasoningStartedAtMap[messageId]
|
||||
delete reasoningCurrentShortSummaryMap[messageId]
|
||||
delete reasoningDurationMap[messageId]
|
||||
delete confirmOnlyStreamMap[messageId]
|
||||
delete confirmVisiblePrefixMap[messageId]
|
||||
@@ -1318,8 +1320,24 @@ function syncConversationListItemFromMeta(
|
||||
}
|
||||
}
|
||||
|
||||
function renderMessageMarkdown(content: string) {
|
||||
return renderMarkdown(content)
|
||||
function renderMessageMarkdown(content: string, isStreaming = false) {
|
||||
let html = renderMarkdown(content)
|
||||
if (isStreaming) {
|
||||
const dotHtml = '<span class="thinking-dot-inline"></span>'
|
||||
// 1. 找到最后一个能容纳行内文本的闭合标签(如 </p>、</li>、</code> 等),
|
||||
// 并在该标签之前插入圆点,这样圆点就始终位于文字流的末端。
|
||||
// 2. 需要从后往前搜索,避免匹配到中间段落的闭合标签。
|
||||
// 3. 如果找不到(纯文本无标签),则直接追加到末尾。
|
||||
// 4. code 属于行内文本容器,pre 属于外层包裹容器,保证代码块场景下圆点深入到代码内部。
|
||||
const inlineContainerPattern = /<\/(p|li|td|th|h[1-6]|code)>\s*(<\/(ol|ul|table|div|blockquote|pre)>\s*)*$/i
|
||||
const match = html.match(inlineContainerPattern)
|
||||
if (match && match.index !== undefined) {
|
||||
html = html.substring(0, match.index) + dotHtml + html.substring(match.index)
|
||||
} else {
|
||||
html += dotHtml
|
||||
}
|
||||
}
|
||||
return html
|
||||
}
|
||||
|
||||
function isStreamingMessage(message: AssistantMessage) {
|
||||
@@ -1431,6 +1449,11 @@ function markReasoningFinished(blockId: string, messageId: string) {
|
||||
reasoningDurationMap[blockId] = Math.max(1, Math.round((Date.now() - startedAt) / 1000))
|
||||
}
|
||||
thinkingMessageMap[messageId] = false
|
||||
|
||||
// 若被展开,则思考完毕后自动闭合
|
||||
if (reasoningCollapsedMap[blockId] === false) {
|
||||
reasoningCollapsedMap[blockId] = true
|
||||
}
|
||||
}
|
||||
|
||||
function getReasoningDurationSeconds(blockId: string) {
|
||||
@@ -1448,13 +1471,15 @@ function getReasoningDurationSeconds(blockId: string) {
|
||||
}
|
||||
|
||||
function getReasoningStatusLabel(block: DisplayAssistantBlock) {
|
||||
const durationSeconds = getReasoningDurationSeconds(block.id)
|
||||
if (durationSeconds > 0) {
|
||||
return `已思考(用时 ${durationSeconds} 秒)`
|
||||
const isThinking = block.sourceId === activeStreamingMessageId.value && thinkingMessageMap[block.sourceId]
|
||||
|
||||
if (isThinking) {
|
||||
// 状态栏显示当前阶段的短摘要
|
||||
return reasoningCurrentShortSummaryMap[block.id] || '正在思考...'
|
||||
}
|
||||
|
||||
const isThinking = block.sourceId === activeStreamingMessageId.value && thinkingMessageMap[block.sourceId]
|
||||
return isThinking ? '思考中' : '已思考'
|
||||
// 思考结束后,状态栏显示固定文案
|
||||
return '已完成深度思考'
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1635,16 +1660,25 @@ function getDisplayAssistantBlocks(dm: DisplayMessage): DisplayAssistantBlock[]
|
||||
})
|
||||
}
|
||||
|
||||
if (shouldShowDisplayAnsweringIndicator(dm)) {
|
||||
if (shouldShowDisplayAnsweringIndicator(dm) && blocks.length === 0) {
|
||||
const maxSeq = blocks.length > 0 ? Math.max(...blocks.map((item) => item.seq)) : 0
|
||||
blocks.push({
|
||||
id: `${dm.id}:content-indicator`,
|
||||
type: 'content_indicator',
|
||||
seq: maxSeq + 1,
|
||||
})
|
||||
} as any)
|
||||
}
|
||||
|
||||
return blocks.sort((left, right) => left.seq - right.seq)
|
||||
const sortedBlocks = blocks.sort((left, right) => left.seq - right.seq)
|
||||
|
||||
// 核心修复:确保全消息流中只有一个点。
|
||||
// 只有当整个 DisplayMessage 处于流式状态,且当前块是最后一块时,才标记为 isStreaming。
|
||||
if (isDisplayStreaming(dm) && sortedBlocks.length > 0) {
|
||||
const lastBlock = sortedBlocks[sortedBlocks.length - 1] as any
|
||||
lastBlock.isStreaming = true
|
||||
}
|
||||
|
||||
return sortedBlocks
|
||||
}
|
||||
|
||||
function getToolTraceStateLabel(state: ToolTraceState): string {
|
||||
@@ -1661,9 +1695,8 @@ function getToolTraceStateLabel(state: ToolTraceState): string {
|
||||
}
|
||||
|
||||
function shouldShowDisplayAnsweringIndicator(dm: DisplayMessage): boolean {
|
||||
return isDisplayStreaming(dm) &&
|
||||
dm.sources.every(m => thinkingMessageMap[m.id] !== true) &&
|
||||
!dm.content.trim()
|
||||
// 基础判断:处于流式,且还没有任何实质性内容(包括推理和正文)
|
||||
return isDisplayStreaming(dm) && !dm.content.trim()
|
||||
}
|
||||
|
||||
function getDisplayReasoningStatusLabel(dm: DisplayMessage): string {
|
||||
@@ -2534,7 +2567,7 @@ function prepareAssistantMessageForStreaming(message: AssistantMessage, createdA
|
||||
message.reasoning = ''
|
||||
message.createdAt = createdAt
|
||||
thinkingMessageMap[message.id] = isManualThinkingEnabled(selectedThinkingMode.value)
|
||||
reasoningCollapsedMap[message.id] = false
|
||||
reasoningCollapsedMap[message.id] = true
|
||||
delete reasoningStartedAtMap[message.id]
|
||||
delete reasoningDurationMap[message.id]
|
||||
clearToolTraceState(message.id)
|
||||
@@ -2914,7 +2947,7 @@ async function sendMessageInternal(options: SendMessageOptions = {}) {
|
||||
})
|
||||
|
||||
thinkingMessageMap[assistantMessage.id] = isManualThinkingEnabled(selectedThinkingMode.value)
|
||||
reasoningCollapsedMap[assistantMessage.id] = false
|
||||
reasoningCollapsedMap[assistantMessage.id] = true
|
||||
activeStreamingMessageId.value = assistantMessage.id
|
||||
|
||||
messageInput.value = ''
|
||||
@@ -2966,7 +2999,7 @@ async function sendMessageInternal(options: SendMessageOptions = {}) {
|
||||
}
|
||||
ElMessage.error(error instanceof Error ? error.message : '发送消息失败,请稍后重试')
|
||||
}
|
||||
reasoningCollapsedMap[assistantMessage.id] = false
|
||||
reasoningCollapsedMap[assistantMessage.id] = true
|
||||
} finally {
|
||||
streamAbortController.value = null
|
||||
activeStreamingMessageId.value = ''
|
||||
@@ -3181,7 +3214,7 @@ onBeforeUnmount(() => {
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
<div v-else class="chat-message__markdown" v-html="renderMessageMarkdown(dm.content)" />
|
||||
<div v-else class="chat-message__markdown" v-html="renderMessageMarkdown(dm.content, false)" />
|
||||
</div>
|
||||
<div v-if="!isEditingUserMessage(dm.id)" class="chat-message__action-bar chat-message__action-bar--user">
|
||||
<button
|
||||
@@ -3207,7 +3240,7 @@ onBeforeUnmount(() => {
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
<span class="chat-message__time chat-message__time--user">{{ formatMessageTime(dm.createdAt) }}</span>
|
||||
|
||||
</div>
|
||||
|
||||
<div v-else class="chat-message__assistant-flow">
|
||||
@@ -3245,7 +3278,10 @@ onBeforeUnmount(() => {
|
||||
|
||||
<div v-else-if="block.type === 'reasoning'" class="chat-message__reasoning">
|
||||
<div class="chat-message__reasoning-head">
|
||||
<div class="chat-message__reasoning-title">
|
||||
<div
|
||||
class="chat-message__reasoning-title"
|
||||
:class="{ 'chat-message__reasoning-title--shimmering': activeStreamingMessageId === block.sourceId && thinkingMessageMap[block.sourceId] }"
|
||||
>
|
||||
<span class="chat-message__reasoning-icon">
|
||||
<svg
|
||||
class="chat-message__reasoning-icon-svg"
|
||||
@@ -3267,7 +3303,6 @@ onBeforeUnmount(() => {
|
||||
</svg>
|
||||
</span>
|
||||
<span class="chat-message__reasoning-status">{{ getReasoningStatusLabel(block) }}</span>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
class="chat-message__reasoning-toggle"
|
||||
@@ -3293,27 +3328,36 @@ onBeforeUnmount(() => {
|
||||
</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Transition name="reasoning-bounce">
|
||||
<div v-if="isReasoningCollapsed(block.id) === false" class="chat-message__reasoning-body">
|
||||
<div
|
||||
v-if="block.text"
|
||||
class="chat-message__markdown chat-message__markdown--reasoning"
|
||||
v-html="renderMessageMarkdown(block.text || '')"
|
||||
:class="{ 'chat-message__markdown--streaming': (block as any).isStreaming }"
|
||||
v-html="renderMessageMarkdown(block.text || '', (block as any).isStreaming)"
|
||||
/>
|
||||
<div v-else class="chat-message__streaming chat-message__streaming--reasoning">
|
||||
<div class="thinking-indicator">
|
||||
<span class="thinking-indicator__text">正在思考</span>
|
||||
</div>
|
||||
<div class="thinking-dot"></div>
|
||||
</div>
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
|
||||
<div v-else-if="block.type === 'business_card' && block.businessCard" class="chat-message__business-card">
|
||||
<BusinessCardRenderer :payload="block.businessCard" />
|
||||
</div>
|
||||
|
||||
<div v-else-if="block.type === 'content'" class="chat-message__assistant-content">
|
||||
<div class="chat-message__markdown chat-message__markdown--assistant" v-html="renderMessageMarkdown(block.text || '')" />
|
||||
<div
|
||||
v-else-if="block.type === 'content'"
|
||||
class="chat-message__assistant-content"
|
||||
>
|
||||
<div
|
||||
class="chat-message__markdown chat-message__markdown--assistant"
|
||||
:class="{ 'chat-message__markdown--streaming': (block as any).isStreaming }"
|
||||
v-html="renderMessageMarkdown(block.text || '', (block as any).isStreaming)"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<template v-else-if="block.type === 'schedule_card' && block.schedulePreview">
|
||||
@@ -3324,9 +3368,7 @@ onBeforeUnmount(() => {
|
||||
</template>
|
||||
|
||||
<div v-else-if="block.type === 'content_indicator'" class="assistant-timeline__answering-indicator">
|
||||
<div class="thinking-indicator">
|
||||
<span class="thinking-indicator__text">正在思考</span>
|
||||
</div>
|
||||
<div class="thinking-dot"></div>
|
||||
</div>
|
||||
</div>
|
||||
</TransitionGroup>
|
||||
@@ -3344,7 +3386,7 @@ onBeforeUnmount(() => {
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
<span class="chat-message__time">{{ formatMessageTime(dm.createdAt) }}</span>
|
||||
|
||||
</div>
|
||||
</article>
|
||||
</TransitionGroup>
|
||||
@@ -3727,6 +3769,7 @@ onBeforeUnmount(() => {
|
||||
filter: blur(8px);
|
||||
}
|
||||
|
||||
|
||||
.assistant-shell {
|
||||
height: 100%;
|
||||
min-height: 0;
|
||||
@@ -4891,6 +4934,7 @@ onBeforeUnmount(() => {
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
color: #5a6577;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* --- Tooling & Selector Beautification --- */
|
||||
@@ -4966,8 +5010,8 @@ onBeforeUnmount(() => {
|
||||
}
|
||||
|
||||
.chat-message__reasoning-status {
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
font-size: 15px;
|
||||
font-weight: 500;
|
||||
line-height: 1.35;
|
||||
}
|
||||
|
||||
@@ -4975,7 +5019,7 @@ onBeforeUnmount(() => {
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
display: inline-flex;
|
||||
color: #4f76ea;
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
.chat-message__reasoning-icon-svg {
|
||||
@@ -4998,8 +5042,8 @@ onBeforeUnmount(() => {
|
||||
}
|
||||
|
||||
.chat-message__reasoning-toggle:hover {
|
||||
background: rgba(79, 118, 234, 0.1);
|
||||
color: #4f76ea;
|
||||
background: rgba(148, 163, 184, 0.1);
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.chat-message__reasoning-chevron {
|
||||
@@ -5020,7 +5064,7 @@ onBeforeUnmount(() => {
|
||||
.chat-message__reasoning-body {
|
||||
margin: 10px 0 10px 7px;
|
||||
padding-left: 16px;
|
||||
border-left: 2px dashed rgba(59, 130, 246, 0.3); /* 改为虚线,更具“思考中”的科技感 */
|
||||
border-left: 2px dashed rgba(148, 163, 184, 0.4); /* 灰色虚线(同 debug 页) */
|
||||
font-style: italic;
|
||||
color: #64748b;
|
||||
}
|
||||
@@ -5171,6 +5215,19 @@ onBeforeUnmount(() => {
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
/* 消息流式输出时的右侧呼吸圆点(直接嵌入 HTML) */
|
||||
:deep(.thinking-dot-inline) {
|
||||
display: inline-block;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
background-color: #94a3b8;
|
||||
border-radius: 50%;
|
||||
margin-left: 8px;
|
||||
vertical-align: middle;
|
||||
animation: thinking-pulse 1.5s infinite ease-in-out;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.assistant-actions {
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
@@ -5368,28 +5425,19 @@ onBeforeUnmount(() => {
|
||||
border-color: rgba(15, 23, 42, 0.08);
|
||||
}
|
||||
|
||||
.thinking-indicator {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
.thinking-dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
background-color: #94a3b8;
|
||||
border-radius: 50%;
|
||||
margin: 0;
|
||||
animation: thinking-pulse 1.5s infinite ease-in-out;
|
||||
}
|
||||
|
||||
.thinking-indicator__text {
|
||||
font-size: 15px;
|
||||
font-weight: 600;
|
||||
color: #64748b;
|
||||
background: linear-gradient(
|
||||
90deg,
|
||||
#64748b 0%,
|
||||
#64748b 25%,
|
||||
#e2e8f0 50%,
|
||||
#64748b 75%,
|
||||
#64748b 100%
|
||||
);
|
||||
background-size: 200% 100%;
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
animation: thinking-shimmer 2s infinite linear;
|
||||
@keyframes thinking-pulse {
|
||||
0% { transform: scale(0.8); opacity: 0.5; }
|
||||
50% { transform: scale(1.2); opacity: 1; }
|
||||
100% { transform: scale(0.8); opacity: 0.5; }
|
||||
}
|
||||
|
||||
@keyframes thinking-shimmer {
|
||||
@@ -5543,6 +5591,51 @@ onBeforeUnmount(() => {
|
||||
max-height: 260px;
|
||||
}
|
||||
}
|
||||
|
||||
/* 扫光动效:位于标题上的白色光线从左到右划过 */
|
||||
.chat-message__reasoning-title--shimmering {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.chat-message__reasoning-title--shimmering::after {
|
||||
content: "";
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: -100%;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background: linear-gradient(
|
||||
90deg,
|
||||
transparent,
|
||||
rgba(255, 255, 255, 0.9),
|
||||
transparent
|
||||
);
|
||||
transform: skewX(-20deg);
|
||||
animation: shimmer-sweep 1.2s infinite linear;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
@keyframes shimmer-sweep {
|
||||
from { left: -150%; }
|
||||
to { left: 150%; }
|
||||
}
|
||||
|
||||
/* 推理框展开收起弹性动效 */
|
||||
.reasoning-bounce-enter-active {
|
||||
transition: all 0.4s cubic-bezier(0.34, 1.56, 0.64, 1);
|
||||
transform-origin: top center;
|
||||
}
|
||||
|
||||
.reasoning-bounce-leave-active {
|
||||
transition: all 0.2s ease;
|
||||
transform-origin: top center;
|
||||
}
|
||||
|
||||
.reasoning-bounce-enter-from,
|
||||
.reasoning-bounce-leave-to {
|
||||
opacity: 0;
|
||||
transform: translateY(-15px);
|
||||
}
|
||||
</style>
|
||||
<style>
|
||||
/* --- AI 助手确认卡片 & 弹窗高级样式 --- */
|
||||
|
||||
@@ -5,6 +5,7 @@ import AuthView from '@/views/AuthView.vue'
|
||||
import AssistantView from '@/views/AssistantView.vue'
|
||||
import DashboardView from '@/views/DashboardView.vue'
|
||||
import ScheduleView from '@/views/ScheduleView.vue'
|
||||
import AssistantReasoningDebug from '@/views/debug/AssistantReasoningDebug.vue'
|
||||
|
||||
const router = createRouter({
|
||||
history: createWebHistory(),
|
||||
@@ -55,6 +56,11 @@ const router = createRouter({
|
||||
name: 'debug-tool-cards',
|
||||
component: () => import('@/views/debug/ToolCardMockPage.vue'),
|
||||
},
|
||||
{
|
||||
path: '/debug/assistant/:id?',
|
||||
name: 'debug-assistant',
|
||||
component: AssistantReasoningDebug,
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
|
||||
@@ -97,6 +97,31 @@ export interface ConversationMeta {
|
||||
status: string
|
||||
}
|
||||
|
||||
export interface ThinkingSummaryPayload {
|
||||
summary_seq?: number
|
||||
short_summary?: string
|
||||
detail_summary?: string
|
||||
final?: boolean
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface ThinkingSummaryBlock {
|
||||
key: string
|
||||
stage?: string
|
||||
blockId?: string
|
||||
latestSeq: number
|
||||
globalSeq: number
|
||||
latestShort: string
|
||||
details: Array<{
|
||||
seq: number
|
||||
text: string
|
||||
durationSeconds?: number
|
||||
final?: boolean
|
||||
}>
|
||||
active: boolean
|
||||
collapsed: boolean
|
||||
}
|
||||
|
||||
export interface AssistantMessage {
|
||||
id: string
|
||||
role: 'user' | 'assistant' | 'system'
|
||||
@@ -104,6 +129,7 @@ export interface AssistantMessage {
|
||||
createdAt: string
|
||||
reasoning?: string
|
||||
extra?: any
|
||||
thinkingSummaryBlocks?: ThinkingSummaryBlock[]
|
||||
}
|
||||
|
||||
export type ThinkingModeType = 'auto' | 'true' | 'false'
|
||||
@@ -130,6 +156,7 @@ export interface ChatStreamRequest {
|
||||
model?: string
|
||||
thinking?: ThinkingModeType
|
||||
extra?: ChatRequestExtra
|
||||
thinking_summary?: ThinkingSummaryPayload
|
||||
}
|
||||
|
||||
export interface HybridScheduleEntry {
|
||||
|
||||
6075
frontend/src/views/debug/AssistantPanelDebug.vue
Normal file
6075
frontend/src/views/debug/AssistantPanelDebug.vue
Normal file
File diff suppressed because it is too large
Load Diff
25
frontend/src/views/debug/AssistantReasoningDebug.vue
Normal file
25
frontend/src/views/debug/AssistantReasoningDebug.vue
Normal file
@@ -0,0 +1,25 @@
|
||||
<script setup lang="ts">
|
||||
import AssistantPanelDebug from './AssistantPanelDebug.vue'
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div class="assistant-debug-page">
|
||||
<AssistantPanelDebug class="assistant-debug-panel" view-mode="standalone" />
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style scoped>
|
||||
.assistant-debug-page {
|
||||
width: 100vw;
|
||||
height: 100vh;
|
||||
background: #f0f2f5;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.assistant-debug-panel {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
</style>
|
||||
Reference in New Issue
Block a user