Version: 0.9.53.dev.260429
后端: 1. 流式思考链路从 raw reasoning_content 切到 `thinking_summary` 摘要协议,补齐摘要 prompt、digestor 与 Lite 压缩链路,plan / execute / fallback 统一改为“只出摘要、不透原始推理”,正文开始后自动关停摘要流。 2. thinking_summary 打通 timeline / SSE / outbox 持久化闭环,只落 detail_summary 与必要 metadata,并补强 seq 自检、冲突幂等识别与补 seq 回填,提升重放恢复稳定性。 3. 会话历史口径继续收紧,assistant 正文与时间线不再回写 raw reasoning_content,仅保留正文与思考耗时,避免刷新恢复时再次暴露内部推理文本。 前端: 4. 助手页开始接入 thinking_summary 实时流与历史恢复,补齐短摘要状态、长摘要折叠区、正文开流后自动收口,并增加调试入口用于协议联调与验收。 5. 当前前端助手页仍是残次过渡态,本版先以 thinking_summary 协议接通和基础渲染为主,样式、交互与细节体验暂未收平,下一版集中修复。 仓库: 6. 补充 thinking_summary 对接说明,明确 SSE 协议、timeline 恢复口径与 short/detail summary 的使用边界。
This commit is contained in:
@@ -56,6 +56,15 @@ func collectExecuteDecisionFromLLM(
|
||||
parser := newagentrouter.NewStreamDecisionParser()
|
||||
output := &executeDecisionStreamOutput{firstChunk: true}
|
||||
var fullText strings.Builder
|
||||
reasoningDigestor, digestorErr := emitter.NewReasoningDigestor(ctx, executeSpeakBlockID, executeStageName)
|
||||
if digestorErr != nil {
|
||||
return nil, fmt.Errorf("执行 thinking 摘要器初始化失败: %w", digestorErr)
|
||||
}
|
||||
defer func() {
|
||||
if reasoningDigestor != nil {
|
||||
_ = reasoningDigestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
chunk, recvErr := reader.Recv()
|
||||
@@ -68,15 +77,9 @@ func collectExecuteDecisionFromLLM(
|
||||
}
|
||||
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := emitter.EmitReasoningText(
|
||||
executeSpeakBlockID,
|
||||
executeStageName,
|
||||
chunk.ReasoningContent,
|
||||
output.firstChunk,
|
||||
); emitErr != nil {
|
||||
return nil, fmt.Errorf("执行 thinking 推送失败: %w", emitErr)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
output.firstChunk = false
|
||||
}
|
||||
|
||||
content := ""
|
||||
@@ -148,6 +151,9 @@ func collectExecuteDecisionFromLLM(
|
||||
output.decision = decision
|
||||
|
||||
if visible != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(
|
||||
executeSpeakBlockID,
|
||||
executeStageName,
|
||||
@@ -174,9 +180,14 @@ func collectExecuteDecisionFromLLM(
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(chunk2.ReasoningContent) != "" {
|
||||
_ = emitter.EmitReasoningText(executeSpeakBlockID, executeStageName, chunk2.ReasoningContent, false)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk2.ReasoningContent)
|
||||
}
|
||||
}
|
||||
if chunk2.Content != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(
|
||||
executeSpeakBlockID,
|
||||
executeStageName,
|
||||
|
||||
@@ -106,6 +106,15 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
parser := newagentrouter.NewStreamDecisionParser()
|
||||
firstChunk := true
|
||||
speakStreamed := false
|
||||
reasoningDigestor, digestorErr := emitter.NewReasoningDigestor(ctx, planSpeakBlockID, planStageName)
|
||||
if digestorErr != nil {
|
||||
return fmt.Errorf("规划 thinking 摘要器初始化失败: %w", digestorErr)
|
||||
}
|
||||
defer func() {
|
||||
if reasoningDigestor != nil {
|
||||
_ = reasoningDigestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
// 3.1 阶段一:解析决策标签。
|
||||
for {
|
||||
@@ -118,12 +127,11 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
break
|
||||
}
|
||||
|
||||
// thinking 内容独立推流。
|
||||
// thinking 内容只进入摘要器,不再把 raw reasoning_content 透传给前端。
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := emitter.EmitReasoningText(planSpeakBlockID, planStageName, chunk.ReasoningContent, firstChunk); emitErr != nil {
|
||||
return fmt.Errorf("规划 thinking 推送失败: %w", emitErr)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
firstChunk = false
|
||||
}
|
||||
|
||||
content := ""
|
||||
@@ -152,6 +160,9 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
// 3.2 阶段二:流式推送 speak(同一 reader 继续读取)。
|
||||
var fullText strings.Builder
|
||||
if visible != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(planSpeakBlockID, planStageName, visible, firstChunk); emitErr != nil {
|
||||
return fmt.Errorf("规划文案推送失败: %w", emitErr)
|
||||
}
|
||||
@@ -172,9 +183,14 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(chunk2.ReasoningContent) != "" {
|
||||
_ = emitter.EmitReasoningText(planSpeakBlockID, planStageName, chunk2.ReasoningContent, false)
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.Append(chunk2.ReasoningContent)
|
||||
}
|
||||
}
|
||||
if chunk2.Content != "" {
|
||||
if reasoningDigestor != nil {
|
||||
reasoningDigestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := emitter.EmitAssistantText(planSpeakBlockID, planStageName, chunk2.Content, firstChunk); emitErr != nil {
|
||||
return fmt.Errorf("规划文案推送失败: %w", emitErr)
|
||||
}
|
||||
|
||||
128
backend/newAgent/prompt/reasoning_summary.go
Normal file
128
backend/newAgent/prompt/reasoning_summary.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package newagentprompt
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
reasoningSummaryMaxFullRunes = 6000
|
||||
reasoningSummaryMaxDeltaRunes = 1800
|
||||
)
|
||||
|
||||
// ReasoningSummaryPromptInput 描述一次“思考摘要”模型调用所需的最小输入。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只承载摘要模型需要看的文本与运行态,不绑定 stream 包的 DTO,避免 prompt 层反向依赖输出协议;
|
||||
// 2. FullReasoning 会在构造 prompt 时只保留尾部,避免长时间思考把便宜模型上下文撑爆;
|
||||
// 3. PreviousSummary 只作为连续摘要的参考,不要求模型逐字继承。
|
||||
type ReasoningSummaryPromptInput struct {
|
||||
FullReasoning string
|
||||
DeltaReasoning string
|
||||
PreviousSummary string
|
||||
CandidateSeq int
|
||||
Final bool
|
||||
DurationSeconds float64
|
||||
}
|
||||
|
||||
type reasoningSummaryPromptPayload struct {
|
||||
CandidateSeq int `json:"candidate_seq"`
|
||||
Final bool `json:"final"`
|
||||
DurationSeconds float64 `json:"duration_seconds"`
|
||||
PreviousSummary string `json:"previous_summary,omitempty"`
|
||||
RecentReasoning string `json:"recent_reasoning,omitempty"`
|
||||
DeltaReasoning string `json:"delta_reasoning,omitempty"`
|
||||
SourceTextRunes int `json:"source_text_runes,omitempty"`
|
||||
MaxDetailSummaryRunes int `json:"max_detail_summary_runes,omitempty"`
|
||||
}
|
||||
|
||||
// BuildReasoningSummaryMessages 构造思考摘要模型调用的 messages。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. system prompt 明确“只做用户可见摘要”,禁止复述原始思考链和内部推理细节;
|
||||
// 2. user prompt 使用 JSON 承载输入,便于后续扩展字段且减少模型误读;
|
||||
// 3. 长文本只保留尾部窗口,保证异步摘要请求稳定、便宜、可控。
|
||||
func BuildReasoningSummaryMessages(input ReasoningSummaryPromptInput) []*schema.Message {
|
||||
recentReasoning := trimRunesFromEnd(input.FullReasoning, reasoningSummaryMaxFullRunes)
|
||||
deltaReasoning := trimRunesFromEnd(input.DeltaReasoning, reasoningSummaryMaxDeltaRunes)
|
||||
payload := reasoningSummaryPromptPayload{
|
||||
CandidateSeq: input.CandidateSeq,
|
||||
Final: input.Final,
|
||||
DurationSeconds: input.DurationSeconds,
|
||||
PreviousSummary: strings.TrimSpace(input.PreviousSummary),
|
||||
RecentReasoning: recentReasoning,
|
||||
DeltaReasoning: deltaReasoning,
|
||||
SourceTextRunes: reasoningSummarySourceRunes(recentReasoning, deltaReasoning),
|
||||
MaxDetailSummaryRunes: ReasoningSummaryDetailRuneLimit(input.FullReasoning, input.DeltaReasoning),
|
||||
}
|
||||
|
||||
raw, err := json.MarshalIndent(payload, "", " ")
|
||||
if err != nil {
|
||||
raw = []byte(fmt.Sprintf(`{"recent_reasoning":%q}`, trimRunesFromEnd(input.FullReasoning, reasoningSummaryMaxFullRunes)))
|
||||
}
|
||||
|
||||
return []*schema.Message{
|
||||
schema.SystemMessage(buildReasoningSummarySystemPrompt()),
|
||||
schema.UserMessage("请把下面的模型思考内容整理成用户可见的进度摘要。\n输入:\n" + string(raw)),
|
||||
}
|
||||
}
|
||||
|
||||
func buildReasoningSummarySystemPrompt() string {
|
||||
return strings.TrimSpace(`你是 SmartMate 的“思考摘要器”。你的任务是把模型内部 reasoning 整理成用户可见的进度摘要。
|
||||
|
||||
输出必须是严格 JSON 对象:
|
||||
{
|
||||
"short_summary": "8到18个汉字的短摘要",
|
||||
"detail_summary": "不超过 max_detail_summary_runes 个字的展开摘要"
|
||||
}
|
||||
|
||||
规则:
|
||||
1. 只描述“正在做什么”和“目前推进到哪一步”,不要复述、引用或暴露原始思考链。
|
||||
2. 不输出 markdown,不输出代码块,不解释 JSON 以外的内容。
|
||||
3. short_summary 要短、稳定、适合前端几秒刷新一次。
|
||||
4. detail_summary 不按固定句数限制,而按输入长度控制:字数必须小于等于 max_detail_summary_runes;不需要凑满上限,信息密度优先。
|
||||
5. detail_summary 仍然面向用户,不写内部推理细节、隐含假设链、逐步演算。
|
||||
6. 若输入为空或噪声较多,用保守摘要,例如“正在整理思路”“正在核对可用信息”。
|
||||
7. final=true 时,detail_summary 用完成态语气,说明思考已收拢到下一步答复或动作。`)
|
||||
}
|
||||
|
||||
// ReasoningSummaryDetailRuneLimit 返回 detail_summary 的最大字数。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 与 BuildReasoningSummaryMessages 使用同一套输入窗口,避免 prompt 提示和服务端兜底口径不一致;
|
||||
// 2. 上限取“提供给摘要模型的主要文本段”的一半,并向上取整,适配极短文本;
|
||||
// 3. 返回 0 表示没有有效输入文本,调用方不应做硬裁剪。
|
||||
func ReasoningSummaryDetailRuneLimit(fullReasoning, deltaReasoning string) int {
|
||||
recentReasoning := trimRunesFromEnd(fullReasoning, reasoningSummaryMaxFullRunes)
|
||||
delta := trimRunesFromEnd(deltaReasoning, reasoningSummaryMaxDeltaRunes)
|
||||
sourceRunes := reasoningSummarySourceRunes(recentReasoning, delta)
|
||||
if sourceRunes <= 0 {
|
||||
return 0
|
||||
}
|
||||
return (sourceRunes + 1) / 2
|
||||
}
|
||||
|
||||
func reasoningSummarySourceRunes(recentReasoning, deltaReasoning string) int {
|
||||
recentReasoning = strings.TrimSpace(recentReasoning)
|
||||
if recentReasoning != "" {
|
||||
return utf8.RuneCountInString(recentReasoning)
|
||||
}
|
||||
return utf8.RuneCountInString(strings.TrimSpace(deltaReasoning))
|
||||
}
|
||||
|
||||
func trimRunesFromEnd(text string, maxRunes int) string {
|
||||
text = strings.TrimSpace(text)
|
||||
if text == "" || maxRunes <= 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
runes := []rune(text)
|
||||
if len(runes) <= maxRunes {
|
||||
return text
|
||||
}
|
||||
return string(runes[len(runes)-maxRunes:])
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
@@ -62,6 +63,18 @@ type ChunkEmitter struct {
|
||||
RequestID string
|
||||
ModelName string
|
||||
Created int64
|
||||
|
||||
// thinkingGateMu 是“正文门卫”的轻量保护。
|
||||
// 1. 它只保护 thinking_summary 是否还能发,不串行化全部 SSE;
|
||||
// 2. 正文一旦开始,对应 block 的门会被关闭,后续同 block 摘要直接丢弃;
|
||||
// 3. 这样既避免摘要 goroutine 在正文之后补发旧思考,又不误杀后续节点的新一轮思考。
|
||||
thinkingGateMu sync.Mutex
|
||||
thinkingClosedBlocks map[string]bool
|
||||
// reasoningSummaryFunc 用于把原始 reasoning 压成用户可见摘要。
|
||||
// 1. 该函数由 service 层注入,stream 包只负责调度,不负责选择模型;
|
||||
// 2. 未注入时模型 reasoning 只会被静默丢弃,不再回退成 raw reasoning_content;
|
||||
// 3. 正文一旦开始,ReasoningDigestor 和 ChunkEmitter 会同时关门,迟到结果不会再发给前端。
|
||||
reasoningSummaryFunc ReasoningSummaryFunc
|
||||
// extraEventHook 用于把关键结构化事件同步给上层做持久化。
|
||||
// 1. hook 失败不能影响 SSE 主链路;
|
||||
// 2. hook 只接收 extra 结构,避免 emitter 反向依赖业务层;
|
||||
@@ -122,6 +135,40 @@ func (e *ChunkEmitter) SetExtraEventHook(hook func(extra *OpenAIChunkExtra)) {
|
||||
e.extraEventHook = hook
|
||||
}
|
||||
|
||||
// SetReasoningSummaryFunc 设置 reasoning 摘要模型调用函数。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 这里只保存函数引用,不立即调用模型;
|
||||
// 2. 摘要触发频率、单飞、正文闸门由 ReasoningDigestor 负责;
|
||||
// 3. 传 nil 表示关闭摘要能力,后续 reasoning chunk 会被静默丢弃。
|
||||
func (e *ChunkEmitter) SetReasoningSummaryFunc(fn ReasoningSummaryFunc) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
e.reasoningSummaryFunc = fn
|
||||
}
|
||||
|
||||
// NewReasoningDigestor 为当前 block 创建一个 reasoning 摘要器。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 若未注入摘要函数,返回 nil,调用方只需跳过 raw reasoning 推送;
|
||||
// 2. 摘要结果先经过 ChunkEmitter 的正文门卫,再走统一 extra/hook 链路;
|
||||
// 3. Digestor 自身仍负责单飞、水位线和正文开始后的 in-flight 结果丢弃。
|
||||
func (e *ChunkEmitter) NewReasoningDigestor(ctx context.Context, blockID, stage string) (*ReasoningDigestor, error) {
|
||||
if e == nil || e.reasoningSummaryFunc == nil {
|
||||
return nil, nil
|
||||
}
|
||||
e.openThinkingSummaryGate(blockID, stage)
|
||||
return NewReasoningDigestor(ReasoningDigestorOptions{
|
||||
SummaryFunc: e.reasoningSummaryFunc,
|
||||
SummarySink: func(summary StreamThinkingSummaryExtra) {
|
||||
_ = e.EmitThinkingSummary(blockID, stage, summary)
|
||||
},
|
||||
BaseContext: ctx,
|
||||
SummaryTimeout: 8 * time.Second,
|
||||
})
|
||||
}
|
||||
|
||||
// EmitReasoningText 输出一段 reasoning 文字,并附带 reasoning_text extra。
|
||||
func (e *ChunkEmitter) EmitReasoningText(blockID, stage, text string, includeRole bool) error {
|
||||
if e == nil || e.emit == nil {
|
||||
@@ -160,6 +207,7 @@ func (e *ChunkEmitter) EmitAssistantText(blockID, stage, text string, includeRol
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
e.closeThinkingSummaryGate(blockID, stage)
|
||||
|
||||
payload, err := ToOpenAIAssistantChunkWithExtra(
|
||||
e.RequestID,
|
||||
@@ -178,6 +226,66 @@ func (e *ChunkEmitter) EmitAssistantText(blockID, stage, text string, includeRol
|
||||
return e.emit(payload)
|
||||
}
|
||||
|
||||
// EmitThinkingSummary 输出一次“流式思考摘要”事件。
|
||||
//
|
||||
// 协议约束:
|
||||
// 1. 该事件只走 extra.thinking_summary,不回写 delta.content / delta.reasoning_content;
|
||||
// 2. 仍复用现有 extra hook,让上层在不依赖 emitter 细节的前提下同步持久化;
|
||||
// 3. includeRole 不再需要,因为 thinking_summary 本身就是纯结构化事件。
|
||||
func (e *ChunkEmitter) EmitThinkingSummary(blockID, stage string, summary StreamThinkingSummaryExtra) error {
|
||||
if e == nil || e.emit == nil {
|
||||
return nil
|
||||
}
|
||||
if e.isThinkingSummaryGateClosed(blockID, stage) {
|
||||
return nil
|
||||
}
|
||||
return e.emitExtraOnly(NewThinkingSummaryExtra(blockID, stage, summary))
|
||||
}
|
||||
|
||||
func (e *ChunkEmitter) openThinkingSummaryGate(blockID, stage string) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
e.thinkingGateMu.Lock()
|
||||
if e.thinkingClosedBlocks != nil {
|
||||
delete(e.thinkingClosedBlocks, thinkingSummaryGateKey(blockID, stage))
|
||||
}
|
||||
e.thinkingGateMu.Unlock()
|
||||
}
|
||||
|
||||
func (e *ChunkEmitter) closeThinkingSummaryGate(blockID, stage string) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
e.thinkingGateMu.Lock()
|
||||
if e.thinkingClosedBlocks == nil {
|
||||
e.thinkingClosedBlocks = make(map[string]bool)
|
||||
}
|
||||
e.thinkingClosedBlocks[thinkingSummaryGateKey(blockID, stage)] = true
|
||||
e.thinkingGateMu.Unlock()
|
||||
}
|
||||
|
||||
func (e *ChunkEmitter) isThinkingSummaryGateClosed(blockID, stage string) bool {
|
||||
if e == nil {
|
||||
return true
|
||||
}
|
||||
e.thinkingGateMu.Lock()
|
||||
defer e.thinkingGateMu.Unlock()
|
||||
return e.thinkingClosedBlocks[thinkingSummaryGateKey(blockID, stage)]
|
||||
}
|
||||
|
||||
func thinkingSummaryGateKey(blockID, stage string) string {
|
||||
blockID = strings.TrimSpace(blockID)
|
||||
stage = strings.TrimSpace(stage)
|
||||
if blockID != "" {
|
||||
return blockID
|
||||
}
|
||||
if stage != "" {
|
||||
return stage
|
||||
}
|
||||
return "__default__"
|
||||
}
|
||||
|
||||
// EmitPseudoReasoningText 把整段 reasoning 文本按伪流式方式逐块推出。
|
||||
func (e *ChunkEmitter) EmitPseudoReasoningText(ctx context.Context, blockID, stage, text string, options PseudoStreamOptions) error {
|
||||
return e.emitPseudoText(
|
||||
@@ -304,6 +412,9 @@ func (e *ChunkEmitter) EmitConfirmRequest(ctx context.Context, blockID, stage, i
|
||||
text := buildConfirmAssistantText(title, summary)
|
||||
extra := NewConfirmRequestExtra(blockID, stage, interactionID, title, summary)
|
||||
e.emitExtraEventHook(extra)
|
||||
if strings.TrimSpace(text) != "" {
|
||||
e.closeThinkingSummaryGate(blockID, stage)
|
||||
}
|
||||
return e.emitPseudoText(
|
||||
ctx,
|
||||
text,
|
||||
@@ -341,6 +452,9 @@ func (e *ChunkEmitter) EmitInterruptMessage(ctx context.Context, blockID, stage,
|
||||
|
||||
text := buildInterruptAssistantText(interactionType, summary)
|
||||
extra := NewInterruptExtra(blockID, stage, interactionID, interactionType, summary)
|
||||
if strings.TrimSpace(text) != "" {
|
||||
e.closeThinkingSummaryGate(blockID, stage)
|
||||
}
|
||||
return e.emitPseudoText(
|
||||
ctx,
|
||||
text,
|
||||
@@ -435,6 +549,15 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
|
||||
var fullText strings.Builder
|
||||
firstChunk := true
|
||||
digestor, digestorErr := e.NewReasoningDigestor(ctx, blockID, stage)
|
||||
if digestorErr != nil {
|
||||
return "", digestorErr
|
||||
}
|
||||
defer func() {
|
||||
if digestor != nil {
|
||||
_ = digestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
chunk, err := reader.Recv()
|
||||
@@ -445,16 +568,19 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
return fullText.String(), err
|
||||
}
|
||||
|
||||
// 推送 reasoning content。
|
||||
// 1. reasoning content 只喂给摘要器,不再透传给前端。
|
||||
// 2. 未注入摘要能力时直接丢弃,避免 raw reasoning_content 泄漏到 SSE。
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := e.EmitReasoningText(blockID, stage, chunk.ReasoningContent, firstChunk); emitErr != nil {
|
||||
return fullText.String(), emitErr
|
||||
if digestor != nil {
|
||||
digestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
firstChunk = false
|
||||
}
|
||||
|
||||
// 推送 assistant 正文。
|
||||
if chunk != nil && chunk.Content != "" {
|
||||
if digestor != nil {
|
||||
digestor.MarkContentStarted()
|
||||
}
|
||||
if emitErr := e.EmitAssistantText(blockID, stage, chunk.Content, firstChunk); emitErr != nil {
|
||||
return fullText.String(), emitErr
|
||||
}
|
||||
@@ -466,9 +592,9 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
return fullText.String(), nil
|
||||
}
|
||||
|
||||
// EmitStreamReasoningText 从 StreamReader 逐 chunk 读取并实时推送 reasoning 文字。
|
||||
// EmitStreamReasoningText 从 StreamReader 逐 chunk 读取 reasoning,并转成低频 thinking_summary。
|
||||
//
|
||||
// 与 EmitStreamAssistantText 结构相同,但只推送 ReasoningContent,不推送 Content。
|
||||
// 与 EmitStreamAssistantText 结构相同,但不再输出 raw ReasoningContent。
|
||||
// 用于只需展示思考过程而无需展示正文的场景。
|
||||
func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
ctx context.Context,
|
||||
@@ -480,7 +606,15 @@ func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
}
|
||||
|
||||
var fullText strings.Builder
|
||||
firstChunk := true
|
||||
digestor, digestorErr := e.NewReasoningDigestor(ctx, blockID, stage)
|
||||
if digestorErr != nil {
|
||||
return "", digestorErr
|
||||
}
|
||||
defer func() {
|
||||
if digestor != nil {
|
||||
_ = digestor.Close(ctx)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
chunk, err := reader.Recv()
|
||||
@@ -492,11 +626,10 @@ func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
}
|
||||
|
||||
if chunk != nil && strings.TrimSpace(chunk.ReasoningContent) != "" {
|
||||
if emitErr := e.EmitReasoningText(blockID, stage, chunk.ReasoningContent, firstChunk); emitErr != nil {
|
||||
return fullText.String(), emitErr
|
||||
if digestor != nil {
|
||||
digestor.Append(chunk.ReasoningContent)
|
||||
}
|
||||
fullText.WriteString(chunk.ReasoningContent)
|
||||
firstChunk = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ type StreamExtraKind string
|
||||
|
||||
const (
|
||||
StreamExtraKindReasoningText StreamExtraKind = "reasoning_text"
|
||||
StreamExtraKindThinkingSummary StreamExtraKind = "thinking_summary"
|
||||
StreamExtraKindAssistantText StreamExtraKind = "assistant_text"
|
||||
StreamExtraKindStatus StreamExtraKind = "status"
|
||||
StreamExtraKindToolCall StreamExtraKind = "tool_call"
|
||||
@@ -67,16 +68,31 @@ const (
|
||||
// 2. Status / Tool / Confirm / Interrupt / BusinessCard 只存展示层真正需要的摘要,不直接耦合后端完整状态对象;
|
||||
// 3. Meta 留给后续做灰度扩展,避免每加一种小字段都要立刻改 DTO 结构。
|
||||
type OpenAIChunkExtra struct {
|
||||
Kind StreamExtraKind `json:"kind,omitempty"`
|
||||
BlockID string `json:"block_id,omitempty"`
|
||||
Stage string `json:"stage,omitempty"`
|
||||
DisplayMode StreamDisplayMode `json:"display_mode,omitempty"`
|
||||
Status *StreamStatusExtra `json:"status,omitempty"`
|
||||
Tool *StreamToolExtra `json:"tool,omitempty"`
|
||||
Confirm *StreamConfirmExtra `json:"confirm,omitempty"`
|
||||
Interrupt *StreamInterruptExtra `json:"interrupt,omitempty"`
|
||||
BusinessCard *StreamBusinessCardExtra `json:"business_card,omitempty"`
|
||||
Meta map[string]any `json:"meta,omitempty"`
|
||||
Kind StreamExtraKind `json:"kind,omitempty"`
|
||||
BlockID string `json:"block_id,omitempty"`
|
||||
Stage string `json:"stage,omitempty"`
|
||||
DisplayMode StreamDisplayMode `json:"display_mode,omitempty"`
|
||||
ThinkingSummary *StreamThinkingSummaryExtra `json:"thinking_summary,omitempty"`
|
||||
Status *StreamStatusExtra `json:"status,omitempty"`
|
||||
Tool *StreamToolExtra `json:"tool,omitempty"`
|
||||
Confirm *StreamConfirmExtra `json:"confirm,omitempty"`
|
||||
Interrupt *StreamInterruptExtra `json:"interrupt,omitempty"`
|
||||
BusinessCard *StreamBusinessCardExtra `json:"business_card,omitempty"`
|
||||
Meta map[string]any `json:"meta,omitempty"`
|
||||
}
|
||||
|
||||
// StreamThinkingSummaryExtra 表示“流式思考摘要”事件。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. short_summary 仅用于 SSE 端快速展示短句,不要求与持久化内容完全一致;
|
||||
// 2. detail_summary 作为更完整的摘要正文,后续持久化层可直接复用;
|
||||
// 3. summary_seq / final / duration_seconds 由摘要调度层补充运行态信息,前端可据此去重和排序。
|
||||
type StreamThinkingSummaryExtra struct {
|
||||
SummarySeq int `json:"summary_seq,omitempty"`
|
||||
ShortSummary string `json:"short_summary,omitempty"`
|
||||
DetailSummary string `json:"detail_summary,omitempty"`
|
||||
Final bool `json:"final,omitempty"`
|
||||
DurationSeconds float64 `json:"duration_seconds,omitempty"`
|
||||
}
|
||||
|
||||
// StreamStatusExtra 表示普通阶段状态或提示性事件。
|
||||
@@ -195,6 +211,17 @@ func NewReasoningTextExtra(blockID, stage string) *OpenAIChunkExtra {
|
||||
}
|
||||
}
|
||||
|
||||
// NewThinkingSummaryExtra 创建“流式思考摘要”事件的 extra。
|
||||
func NewThinkingSummaryExtra(blockID, stage string, summary StreamThinkingSummaryExtra) *OpenAIChunkExtra {
|
||||
return &OpenAIChunkExtra{
|
||||
Kind: StreamExtraKindThinkingSummary,
|
||||
BlockID: blockID,
|
||||
Stage: stage,
|
||||
DisplayMode: StreamDisplayModeAppend,
|
||||
ThinkingSummary: &summary,
|
||||
}
|
||||
}
|
||||
|
||||
// NewAssistantTextExtra 创建“正文文字”事件的 extra。
|
||||
func NewAssistantTextExtra(blockID, stage string) *OpenAIChunkExtra {
|
||||
return &OpenAIChunkExtra{
|
||||
@@ -367,6 +394,7 @@ func hasStreamExtra(extra *OpenAIChunkExtra) bool {
|
||||
extra.BlockID != "" ||
|
||||
extra.Stage != "" ||
|
||||
extra.DisplayMode != "" ||
|
||||
extra.ThinkingSummary != nil ||
|
||||
extra.Status != nil ||
|
||||
extra.Tool != nil ||
|
||||
extra.Confirm != nil ||
|
||||
|
||||
599
backend/newAgent/stream/reasoning_digestor.go
Normal file
599
backend/newAgent/stream/reasoning_digestor.go
Normal file
@@ -0,0 +1,599 @@
|
||||
package newagentstream
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultReasoningDigestMinNewRunes = 120
|
||||
defaultReasoningDigestMinNewTokens = 80
|
||||
defaultReasoningDigestMinInterval = 3 * time.Second
|
||||
)
|
||||
|
||||
// ReasoningSummaryFunc 负责真正调用摘要模型。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 该函数只负责“把输入整理成一份摘要结果”,不负责调度、节流、正文闸门和结果丢弃;
|
||||
// 2. 返回值里的 short/detail 由模型或适配层填写;
|
||||
// 3. summary_seq / final / duration_seconds 由 ReasoningDigestor 统一补齐,避免上层重复维护运行态字段。
|
||||
type ReasoningSummaryFunc func(ctx context.Context, input ReasoningSummaryInput) (StreamThinkingSummaryExtra, error)
|
||||
|
||||
// ReasoningSummarySink 负责消费一条已经通过闸门校验的摘要结果。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 常见用法是把结果交给 ChunkEmitter.EmitThinkingSummary;
|
||||
// 2. 该回调不参与单飞、重试、水位线判断;
|
||||
// 3. 回调为 nil 时,Digestor 仍会维护 LatestSummary,方便调用方按需主动拉取。
|
||||
type ReasoningSummarySink func(summary StreamThinkingSummaryExtra)
|
||||
|
||||
// ReasoningSummaryInput 是注入给摘要模型调用方的统一输入。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. FullReasoning 提供完整 reasoning 缓冲区,适合做“全量重摘要”;
|
||||
// 2. DeltaReasoning + PreviousSummary 提供增量上下文,适合做“旧摘要续写”;
|
||||
// 3. CandidateSeq / Final / DurationSeconds 仅表达调度层意图,不要求模型原样回填。
|
||||
type ReasoningSummaryInput struct {
|
||||
FullReasoning string `json:"full_reasoning,omitempty"`
|
||||
DeltaReasoning string `json:"delta_reasoning,omitempty"`
|
||||
PreviousSummary *StreamThinkingSummaryExtra `json:"previous_summary,omitempty"`
|
||||
CandidateSeq int `json:"candidate_seq,omitempty"`
|
||||
Final bool `json:"final,omitempty"`
|
||||
DurationSeconds float64 `json:"duration_seconds,omitempty"`
|
||||
}
|
||||
|
||||
// ReasoningDigestorOptions 描述 reasoning 摘要器的调度参数。
|
||||
type ReasoningDigestorOptions struct {
|
||||
SummaryFunc ReasoningSummaryFunc
|
||||
SummarySink ReasoningSummarySink
|
||||
BaseContext context.Context
|
||||
MinNewRunes int
|
||||
MinNewTokens int
|
||||
MinInterval time.Duration
|
||||
SummaryTimeout time.Duration
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
// ReasoningDigestor 负责把流式 reasoning 文本整理成“低频摘要事件”。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责缓冲、单飞、水位线、正文闸门、Flush/Close,不直接依赖 AgentService;
|
||||
// 2. 只通过 SummaryFunc / SummarySink 两个函数注入模型调用与结果消费,不在这里选模型;
|
||||
// 3. 一旦正文开始或显式关闸,后续摘要结果即使返回成功也必须丢弃,避免前端和持久化出现越界数据。
|
||||
type ReasoningDigestor struct {
|
||||
summaryFunc ReasoningSummaryFunc
|
||||
summarySink ReasoningSummarySink
|
||||
baseContext context.Context
|
||||
minNewRunes int
|
||||
minNewTokens int
|
||||
minInterval time.Duration
|
||||
summaryTimeout time.Duration
|
||||
now func() time.Time
|
||||
|
||||
mu sync.Mutex
|
||||
cond *sync.Cond
|
||||
buffer strings.Builder
|
||||
deltaBuffer strings.Builder
|
||||
startedAt time.Time
|
||||
lastRequestAt time.Time
|
||||
pendingRunes int
|
||||
pendingTokens int
|
||||
summarySeq int
|
||||
latestSummary *StreamThinkingSummaryExtra
|
||||
finalEmitted bool
|
||||
inFlight bool
|
||||
gateClosed bool
|
||||
contentStarted bool
|
||||
closed bool
|
||||
timer *time.Timer
|
||||
timerArmed bool
|
||||
currentCancel context.CancelFunc
|
||||
}
|
||||
|
||||
type reasoningDigestCall struct {
|
||||
ctx context.Context
|
||||
stop context.CancelFunc
|
||||
input ReasoningSummaryInput
|
||||
final bool
|
||||
}
|
||||
|
||||
// NewReasoningDigestor 创建一个只关注“流式思考摘要调度”的核心对象。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 先校验 SummaryFunc;它是唯一必填项,因为 Digestor 不在本文件里选择模型;
|
||||
// 2. 再补齐默认水位线和最小时间间隔,让调用方即使只传核心依赖也能启动;
|
||||
// 3. 最后只初始化并发控制原语,不在构造阶段启动常驻主循环,避免引入额外 goroutine 生命周期负担。
|
||||
func NewReasoningDigestor(options ReasoningDigestorOptions) (*ReasoningDigestor, error) {
|
||||
if options.SummaryFunc == nil {
|
||||
return nil, errors.New("reasoning digestor: SummaryFunc 不能为空")
|
||||
}
|
||||
|
||||
if options.MinNewRunes < 0 {
|
||||
options.MinNewRunes = 0
|
||||
}
|
||||
if options.MinNewTokens < 0 {
|
||||
options.MinNewTokens = 0
|
||||
}
|
||||
if options.MinNewRunes == 0 && options.MinNewTokens == 0 {
|
||||
options.MinNewRunes = defaultReasoningDigestMinNewRunes
|
||||
options.MinNewTokens = defaultReasoningDigestMinNewTokens
|
||||
}
|
||||
if options.MinInterval <= 0 {
|
||||
options.MinInterval = defaultReasoningDigestMinInterval
|
||||
}
|
||||
if options.BaseContext == nil {
|
||||
options.BaseContext = context.Background()
|
||||
}
|
||||
if options.Now == nil {
|
||||
options.Now = time.Now
|
||||
}
|
||||
|
||||
digestor := &ReasoningDigestor{
|
||||
summaryFunc: options.SummaryFunc,
|
||||
summarySink: options.SummarySink,
|
||||
baseContext: options.BaseContext,
|
||||
minNewRunes: options.MinNewRunes,
|
||||
minNewTokens: options.MinNewTokens,
|
||||
minInterval: options.MinInterval,
|
||||
summaryTimeout: options.SummaryTimeout,
|
||||
now: options.Now,
|
||||
}
|
||||
digestor.cond = sync.NewCond(&digestor.mu)
|
||||
return digestor, nil
|
||||
}
|
||||
|
||||
// Append 追加一段 reasoning chunk,并按水位线决定是否后台触发摘要。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 先把原始 reasoning 文本写入 full buffer,保证 Flush/Close 可以拿到全量上下文;
|
||||
// 2. 再把本轮新增文本记入 deltaBuffer 与 rune/token 水位线,用于“最小新增量”判断;
|
||||
// 3. 若正文闸门已关闭,则只保留缓冲快照,不再调度摘要;
|
||||
// 4. 若当前已有摘要请求在飞,则只更新 dirty/latest,不排队第二个请求,等单飞请求返回后再决定是否补一次。
|
||||
func (d *ReasoningDigestor) Append(reasoning string) {
|
||||
if d == nil || reasoning == "" {
|
||||
return
|
||||
}
|
||||
|
||||
var call reasoningDigestCall
|
||||
var shouldStart bool
|
||||
|
||||
d.mu.Lock()
|
||||
if d.closed {
|
||||
d.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
if d.startedAt.IsZero() {
|
||||
d.startedAt = d.now()
|
||||
}
|
||||
d.buffer.WriteString(reasoning)
|
||||
|
||||
if d.gateClosed || d.contentStarted {
|
||||
d.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
d.deltaBuffer.WriteString(reasoning)
|
||||
d.pendingRunes += utf8.RuneCountInString(reasoning)
|
||||
d.pendingTokens += estimateReasoningTokens(reasoning)
|
||||
d.finalEmitted = false
|
||||
|
||||
call, shouldStart = d.prepareSummaryLocked(d.baseContext, false, false)
|
||||
d.mu.Unlock()
|
||||
|
||||
if shouldStart {
|
||||
go d.runSummary(call)
|
||||
}
|
||||
}
|
||||
|
||||
// MarkContentStarted 标记“正文已经开始输出”。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 该方法会直接关闭摘要闸门;
|
||||
// 2. 它不回收旧摘要结果,但会丢弃后续任何尚未完成的摘要调用;
|
||||
// 3. 调用后即使继续 Append reasoning,也只保留缓冲,不再触发新摘要。
|
||||
func (d *ReasoningDigestor) MarkContentStarted() {
|
||||
d.closeGate(true)
|
||||
}
|
||||
|
||||
// CloseGate 显式关闭摘要闸门,但不额外声明正文已经开始。
|
||||
func (d *ReasoningDigestor) CloseGate() {
|
||||
d.closeGate(false)
|
||||
}
|
||||
|
||||
// Flush 在正文尚未开始时尝试补发最后一次摘要。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 先等待当前单飞请求结束,避免 Flush 与后台自动摘要并发跑两次;
|
||||
// 2. 若正文已经开始或闸门已关,则直接返回,不再补摘要;
|
||||
// 3. 若此前已经发过 final 且没有新增 reasoning,则跳过,避免重复 final 事件;
|
||||
// 4. 其余场景会强制走一次摘要,即使新增量还没达到自动触发水位线。
|
||||
func (d *ReasoningDigestor) Flush(ctx context.Context) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
call, shouldStart := d.prepareFlushCall(ctx)
|
||||
if !shouldStart {
|
||||
return nil
|
||||
}
|
||||
return d.runSummary(call)
|
||||
}
|
||||
|
||||
// Close 结束摘要器生命周期。
|
||||
//
|
||||
// 步骤说明:
|
||||
// 1. 若正文还未开始,先尝试 Flush 一次 final 摘要;
|
||||
// 2. 再关闭闸门、停止等待中的定时器,并取消正在进行的摘要调用;
|
||||
// 3. 最后等待单飞调用完全退出,避免遗留后台 goroutine 持续写结果。
|
||||
func (d *ReasoningDigestor) Close(ctx context.Context) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := d.Flush(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
if d.closed {
|
||||
d.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
d.closed = true
|
||||
d.gateClosed = true
|
||||
d.stopTimerLocked()
|
||||
if d.currentCancel != nil {
|
||||
d.currentCancel()
|
||||
}
|
||||
for d.inFlight {
|
||||
d.cond.Wait()
|
||||
}
|
||||
d.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// LatestSummary 返回最近一次通过闸门校验并成功发布的摘要。
|
||||
func (d *ReasoningDigestor) LatestSummary() (StreamThinkingSummaryExtra, bool) {
|
||||
if d == nil {
|
||||
return StreamThinkingSummaryExtra{}, false
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
if d.latestSummary == nil {
|
||||
return StreamThinkingSummaryExtra{}, false
|
||||
}
|
||||
return *cloneThinkingSummaryExtra(d.latestSummary), true
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) closeGate(markContentStarted bool) {
|
||||
if d == nil {
|
||||
return
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
if markContentStarted {
|
||||
d.contentStarted = true
|
||||
}
|
||||
d.gateClosed = true
|
||||
d.pendingRunes = 0
|
||||
d.pendingTokens = 0
|
||||
d.deltaBuffer.Reset()
|
||||
d.stopTimerLocked()
|
||||
if d.currentCancel != nil {
|
||||
d.currentCancel()
|
||||
}
|
||||
d.mu.Unlock()
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) prepareFlushCall(ctx context.Context) (reasoningDigestCall, bool) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
if d.closed || d.gateClosed || d.contentStarted {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
d.stopTimerLocked()
|
||||
for d.inFlight {
|
||||
d.cond.Wait()
|
||||
if d.closed || d.gateClosed || d.contentStarted {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(d.buffer.String()) == "" {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
if d.finalEmitted && d.pendingRunes == 0 && d.pendingTokens == 0 {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
return d.prepareSummaryLocked(ctx, true, true)
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) prepareSummaryLocked(parent context.Context, force bool, final bool) (reasoningDigestCall, bool) {
|
||||
if d.closed || d.gateClosed || d.contentStarted || d.inFlight {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
fullReasoning := d.buffer.String()
|
||||
if strings.TrimSpace(fullReasoning) == "" {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
// 1. 自动摘要必须同时满足“新增量水位线 + 最小时间间隔”。
|
||||
// 2. 若新增量不足,则直接等待后续 Append,不做空转请求。
|
||||
// 3. 若时间间隔未到,则只挂一个定时器做兜底唤醒,避免排队多个请求。
|
||||
if !force {
|
||||
if !d.reachedWatermarkLocked() {
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
wait := d.nextAllowedIntervalLocked()
|
||||
if wait > 0 {
|
||||
d.armTimerLocked(wait)
|
||||
return reasoningDigestCall{}, false
|
||||
}
|
||||
}
|
||||
|
||||
callCtx, stop := d.newCallContext(parent)
|
||||
call := reasoningDigestCall{
|
||||
ctx: callCtx,
|
||||
stop: stop,
|
||||
input: ReasoningSummaryInput{
|
||||
FullReasoning: strings.Clone(fullReasoning),
|
||||
DeltaReasoning: strings.Clone(d.deltaBuffer.String()),
|
||||
PreviousSummary: cloneThinkingSummaryExtra(d.latestSummary),
|
||||
CandidateSeq: d.summarySeq + 1,
|
||||
Final: final,
|
||||
DurationSeconds: d.durationSecondsLocked(),
|
||||
},
|
||||
final: final,
|
||||
}
|
||||
|
||||
d.stopTimerLocked()
|
||||
d.inFlight = true
|
||||
d.lastRequestAt = d.now()
|
||||
d.pendingRunes = 0
|
||||
d.pendingTokens = 0
|
||||
d.deltaBuffer.Reset()
|
||||
d.currentCancel = stop
|
||||
|
||||
return call, true
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) runSummary(call reasoningDigestCall) error {
|
||||
if call.stop == nil {
|
||||
return nil
|
||||
}
|
||||
defer call.stop()
|
||||
|
||||
summary, err := d.summaryFunc(call.ctx, call.input)
|
||||
if err != nil {
|
||||
// 1. 摘要失败时不把错误扩散回主流式链路,避免 reasoning 展示被摘要能力反向拖垮。
|
||||
// 2. 若失败期间又追加了新 reasoning,则仍按单飞规则尝试补下一次;否则等待后续 Append/Flush 兜底。
|
||||
_, _, nextCall, shouldStart := d.finishSummary(call.final, nil)
|
||||
if shouldStart {
|
||||
go d.runSummary(nextCall)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
normalized := normalizeThinkingSummary(summary, call.input.Final, call.input.DurationSeconds)
|
||||
emittedSummary, sink, nextCall, shouldStart := d.finishSummary(call.final, &normalized)
|
||||
if emittedSummary != nil && sink != nil {
|
||||
sink(*emittedSummary)
|
||||
}
|
||||
if shouldStart {
|
||||
go d.runSummary(nextCall)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) finishSummary(final bool, summary *StreamThinkingSummaryExtra) (*StreamThinkingSummaryExtra, ReasoningSummarySink, reasoningDigestCall, bool) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
d.inFlight = false
|
||||
d.currentCancel = nil
|
||||
d.cond.Broadcast()
|
||||
|
||||
var emittedSummary *StreamThinkingSummaryExtra
|
||||
var sink ReasoningSummarySink
|
||||
|
||||
// 1. 先判断正文闸门;正文一旦开始,所有晚到结果都必须丢弃。
|
||||
// 2. 再补齐 summary_seq/final/duration,并缓存 LatestSummary 供上层读取。
|
||||
// 3. 若当前请求期间又积累了新 reasoning,则只启动下一次单飞摘要,不排队多次。
|
||||
if summary != nil && !d.closed && !d.gateClosed && !d.contentStarted {
|
||||
normalized := *summary
|
||||
d.summarySeq++
|
||||
normalized.SummarySeq = d.summarySeq
|
||||
normalized.Final = final
|
||||
if normalized.DurationSeconds <= 0 {
|
||||
normalized.DurationSeconds = d.durationSecondsLocked()
|
||||
}
|
||||
d.latestSummary = cloneThinkingSummaryExtra(&normalized)
|
||||
d.finalEmitted = final
|
||||
emittedSummary = cloneThinkingSummaryExtra(&normalized)
|
||||
sink = d.summarySink
|
||||
}
|
||||
|
||||
if d.closed || d.gateClosed || d.contentStarted || final {
|
||||
return emittedSummary, sink, reasoningDigestCall{}, false
|
||||
}
|
||||
|
||||
nextCall, shouldStart := d.prepareSummaryLocked(d.baseContext, false, false)
|
||||
return emittedSummary, sink, nextCall, shouldStart
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) reachedWatermarkLocked() bool {
|
||||
return reachedReasoningWatermark(d.pendingRunes, d.pendingTokens, d.minNewRunes, d.minNewTokens)
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) nextAllowedIntervalLocked() time.Duration {
|
||||
if d.lastRequestAt.IsZero() {
|
||||
return 0
|
||||
}
|
||||
wait := d.minInterval - d.now().Sub(d.lastRequestAt)
|
||||
if wait < 0 {
|
||||
return 0
|
||||
}
|
||||
return wait
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) armTimerLocked(wait time.Duration) {
|
||||
if wait <= 0 || d.closed || d.gateClosed || d.contentStarted {
|
||||
return
|
||||
}
|
||||
if d.timer == nil {
|
||||
d.timer = time.AfterFunc(wait, d.onTimer)
|
||||
d.timerArmed = true
|
||||
return
|
||||
}
|
||||
if d.timerArmed {
|
||||
d.timer.Reset(wait)
|
||||
return
|
||||
}
|
||||
d.timer.Reset(wait)
|
||||
d.timerArmed = true
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) stopTimerLocked() {
|
||||
if d.timer == nil {
|
||||
return
|
||||
}
|
||||
if d.timer.Stop() {
|
||||
d.timerArmed = false
|
||||
return
|
||||
}
|
||||
d.timerArmed = false
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) onTimer() {
|
||||
if d == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var call reasoningDigestCall
|
||||
var shouldStart bool
|
||||
|
||||
d.mu.Lock()
|
||||
d.timerArmed = false
|
||||
call, shouldStart = d.prepareSummaryLocked(d.baseContext, false, false)
|
||||
d.mu.Unlock()
|
||||
|
||||
if shouldStart {
|
||||
go d.runSummary(call)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) newCallContext(parent context.Context) (context.Context, context.CancelFunc) {
|
||||
if parent == nil {
|
||||
parent = d.baseContext
|
||||
}
|
||||
if parent == nil {
|
||||
parent = context.Background()
|
||||
}
|
||||
|
||||
baseCtx, baseCancel := context.WithCancel(parent)
|
||||
if d.summaryTimeout <= 0 {
|
||||
return baseCtx, baseCancel
|
||||
}
|
||||
|
||||
timeoutCtx, timeoutCancel := context.WithTimeout(baseCtx, d.summaryTimeout)
|
||||
return timeoutCtx, func() {
|
||||
timeoutCancel()
|
||||
baseCancel()
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReasoningDigestor) durationSecondsLocked() float64 {
|
||||
if d.startedAt.IsZero() {
|
||||
return 0
|
||||
}
|
||||
duration := d.now().Sub(d.startedAt)
|
||||
if duration <= 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(duration.Milliseconds()) / 1000
|
||||
}
|
||||
|
||||
func reachedReasoningWatermark(pendingRunes, pendingTokens, minRunes, minTokens int) bool {
|
||||
if minRunes > 0 && pendingRunes >= minRunes {
|
||||
return true
|
||||
}
|
||||
if minTokens > 0 && pendingTokens >= minTokens {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizeThinkingSummary(summary StreamThinkingSummaryExtra, final bool, durationSeconds float64) StreamThinkingSummaryExtra {
|
||||
summary.ShortSummary = strings.TrimSpace(summary.ShortSummary)
|
||||
summary.DetailSummary = strings.TrimSpace(summary.DetailSummary)
|
||||
|
||||
// 1. 短摘要只是实时展示兜底,允许从长摘要压一个默认值。
|
||||
// 2. 反过来不能把短摘要补成 detail_summary,否则会绕过“短摘要不持久化”的产品语义。
|
||||
// 3. 若模型没有给 detail_summary,timeline 层会跳过持久化,仅保留本次 SSE 展示。
|
||||
if summary.ShortSummary == "" {
|
||||
summary.ShortSummary = summary.DetailSummary
|
||||
}
|
||||
summary.Final = final
|
||||
if summary.DurationSeconds <= 0 {
|
||||
summary.DurationSeconds = durationSeconds
|
||||
}
|
||||
return summary
|
||||
}
|
||||
|
||||
func cloneThinkingSummaryExtra(src *StreamThinkingSummaryExtra) *StreamThinkingSummaryExtra {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
clone := *src
|
||||
return &clone
|
||||
}
|
||||
|
||||
func estimateReasoningTokens(text string) int {
|
||||
text = strings.TrimSpace(text)
|
||||
if text == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
asciiRunes := 0
|
||||
totalTokens := 0
|
||||
for _, r := range text {
|
||||
switch {
|
||||
case unicode.IsSpace(r):
|
||||
if asciiRunes > 0 {
|
||||
totalTokens += compactASCIITokens(asciiRunes)
|
||||
asciiRunes = 0
|
||||
}
|
||||
case r <= unicode.MaxASCII && (unicode.IsLetter(r) || unicode.IsDigit(r)):
|
||||
asciiRunes++
|
||||
default:
|
||||
if asciiRunes > 0 {
|
||||
totalTokens += compactASCIITokens(asciiRunes)
|
||||
asciiRunes = 0
|
||||
}
|
||||
totalTokens++
|
||||
}
|
||||
}
|
||||
if asciiRunes > 0 {
|
||||
totalTokens += compactASCIITokens(asciiRunes)
|
||||
}
|
||||
return totalTokens
|
||||
}
|
||||
|
||||
func compactASCIITokens(asciiRunes int) int {
|
||||
if asciiRunes <= 0 {
|
||||
return 0
|
||||
}
|
||||
return max(1, (asciiRunes+3)/4)
|
||||
}
|
||||
Reference in New Issue
Block a user