Version: 0.5.4.dev.260313

feat(agent): 重构随口记为模型控制码分流 + 单请求聚合规划,关闭非流式thinking并修复假成功,将随口记全流程从10s+缩短到5s左右,显著提升用户体验

路由层改为“模型控制码协议”分流(quick_note|chat),替换关键词/置信度猜测
路由命中 quick_note 时信任路由,graph 跳过二次意图判定(减少一次 LLM 调用)
新增单请求聚合规划:一次返回 title/deadline_at/priority_group/priority_reason/banter
快路径优先复用聚合结果;优先级缺失时本地兜底,避免再次触发优先级模型调用
最终回复优先使用聚合 banter,聚合路径缺失时使用固定文案,不再额外润色调用
非流式 Generate 全面显式关闭 thinking,并收紧 max_tokens/temperature(路由、JSON规划、banter)
保留并强化写库成功门槛:task_id > 0 才允许成功回包,修复“回复成功但未落库”风险
增加/更新测试:控制码解析、nonce 校验、标题提取、banter 复用与无效 task_id 防假成功
保持 OpenAI 兼容 SSE 格式与现有流式聊天链路不变
This commit is contained in:
Losita
2026-03-13 18:17:57 +08:00
parent e2d9347731
commit 0b7d1b999c
8 changed files with 629 additions and 75 deletions

View File

@@ -197,62 +197,38 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
}
}
// 3) 如果命中“任务安排关键词”,开启随口记阶段推送(伪装成 reasoning chunk
if shouldEmitQuickNoteProgress(userMessage) {
go func() {
defer close(outChan)
// 3) 统一异步分流:
// - 先走“模型控制码路由”决定 quick_note / chat
// - 路由命中 quick_note 时推阶段状态并执行 graph
// - 路由命中 chat 时直接普通流式聊天。
go func() {
defer close(outChan)
progress := newQuickNoteProgressEmitter(outChan, resolvedModelName, true)
progress.Emit("request.accepted", "检测到任务安排请求,开始执行随口记流程。")
quickHandled, quickState, quickErr := s.tryHandleQuickNoteWithGraph(
ctx,
selectedModel,
userMessage,
userID,
chatID,
traceID,
progress.Emit,
)
if quickErr != nil {
log.Printf("随口记 graph 执行失败,回退普通聊天 trace_id=%s chat_id=%s err=%v", traceID, chatID, quickErr)
}
if quickHandled {
progress.Emit("quick_note.reply.polishing", "正在结合你的话题润色回复。")
quickReply := buildQuickNoteFinalReply(ctx, selectedModel, userMessage, quickState)
if emitErr := emitSingleAssistantCompletion(outChan, resolvedModelName, quickReply); emitErr != nil {
pushErrNonBlocking(errChan, emitErr)
return
}
s.persistChatAfterReply(ctx, userID, chatID, userMessage, quickReply, errChan)
return
}
progress.Emit("quick_note.fallback", "当前输入不是随口记请求,切换到普通对话。")
routing := s.decideQuickNoteRouting(ctx, selectedModel, userMessage)
if !routing.EnterQuickNote {
s.runNormalChatFlow(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
}()
return outChan, errChan
}
return
}
// 4) 无阶段推送模式:保持原逻辑,先尝试随口记,不命中再走普通聊天。
quickHandled, quickState, quickErr := s.tryHandleQuickNoteWithGraph(
ctx,
selectedModel,
userMessage,
userID,
chatID,
traceID,
nil,
)
if quickErr != nil {
log.Printf("随口记 graph 执行失败,回退普通聊天 trace_id=%s chat_id=%s err=%v", traceID, chatID, quickErr)
}
if quickHandled {
go func() {
defer close(outChan)
progress := newQuickNoteProgressEmitter(outChan, resolvedModelName, true)
progress.Emit("request.accepted", routing.Detail)
quickHandled, quickState, quickErr := s.tryHandleQuickNoteWithGraph(
ctx,
selectedModel,
userMessage,
userID,
chatID,
traceID,
routing.TrustRoute,
progress.Emit,
)
if quickErr != nil {
log.Printf("随口记 graph 执行失败,回退普通聊天 trace_id=%s chat_id=%s err=%v", traceID, chatID, quickErr)
}
if quickHandled {
progress.Emit("quick_note.reply.polishing", "正在结合你的话题润色回复。")
quickReply := buildQuickNoteFinalReply(ctx, selectedModel, userMessage, quickState)
if emitErr := emitSingleAssistantCompletion(outChan, resolvedModelName, quickReply); emitErr != nil {
pushErrNonBlocking(errChan, emitErr)
@@ -260,13 +236,10 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
}
s.persistChatAfterReply(ctx, userID, chatID, userMessage, quickReply, errChan)
}()
return outChan, errChan
}
return
}
// 5) 普通流式聊天。
go func() {
defer close(outChan)
progress.Emit("quick_note.fallback", "当前输入不是随口记请求,切换到普通对话。")
s.runNormalChatFlow(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, userID, chatID, traceID, requestStart, outChan, errChan)
}()

View File

@@ -4,16 +4,65 @@ import (
"context"
"fmt"
"log"
"regexp"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/agent"
"github.com/LoveLosita/smartflow/backend/model"
"github.com/cloudwego/eino-ext/components/model/ark"
einoModel "github.com/cloudwego/eino/components/model"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
const (
// quickNoteRouteControlTimeout 是“模型控制码分流”这一步的额外超时。
// 说明:
// 1) 设为 0 代表“不额外加子超时”,完全跟随父请求上下文;
// 2) 避免路由步骤因过短子超时反复触发 context deadline exceeded
// 3) 若后续需要强制保护,可再改为 >0 的值并通过配置化管理。
quickNoteRouteControlTimeout = 0 * time.Second
)
var (
// quickNoteRouteHeaderRegex 解析模型返回的控制头:
// <SMARTFLOW_ROUTE nonce="xxx" action="quick_note|chat"></SMARTFLOW_ROUTE>
quickNoteRouteHeaderRegex = regexp.MustCompile(`(?is)<\s*smartflow_route\b[^>]*\bnonce\s*=\s*["']?([a-zA-Z0-9\-]+)["']?[^>]*\baction\s*=\s*["']?(quick_note|chat)["']?[^>]*>`)
// quickNoteRouteReasonRegex 解析可选理由块:
// <SMARTFLOW_REASON>...</SMARTFLOW_REASON>
quickNoteRouteReasonRegex = regexp.MustCompile(`(?is)<\s*smartflow_reason\s*>(.*?)<\s*/\s*smartflow_reason\s*>`)
)
type quickNoteRouteAction string
const (
quickNoteRouteActionChat quickNoteRouteAction = "chat"
quickNoteRouteActionQuickNote quickNoteRouteAction = "quick_note"
)
// quickNoteRouteControlDecision 是“模型控制码分流”的结构化结果。
// 该结构不会直接暴露给前端,仅用于服务端决定后续链路:
// - action=quick_note -> 进入随口记 graph
// - action=chat -> 进入普通聊天流。
type quickNoteRouteControlDecision struct {
Action quickNoteRouteAction
Reason string
Raw string
}
// quickNoteRoutingDecision 是对“是否进入随口记 graph”的最终决策。
// 字段说明:
// - EnterQuickNote是否进入随口记 graph
// - TrustRoute是否信任上游控制码并跳过 graph 内的二次意图判定;
// - Detail阶段状态文案用于前端/调试可观测性。
type quickNoteRoutingDecision struct {
EnterQuickNote bool
TrustRoute bool
Detail string
}
// quickNoteProgressEmitter 负责把“链路阶段状态”伪装成 OpenAI 兼容的 reasoning_content chunk。
// 设计目标:
// 1) 不改现有 OpenAI 兼容协议外壳;
@@ -75,6 +124,8 @@ func (e *quickNoteProgressEmitter) Emit(stage, detail string) {
// - handled=true本次请求已在随口记链路处理完成成功/失败都会返回文案);
// - handled=false不是随口记意图调用方应回落普通聊天链路
// - state用于拼接最终“一次性正文回复”。
// 参数说明:
// - trustRoute=true信任上游控制码graph 跳过二次意图判定,直接进入时间校验/优先级/写库流程。
func (s *AgentService) tryHandleQuickNoteWithGraph(
ctx context.Context,
selectedModel *ark.ChatModel,
@@ -82,6 +133,7 @@ func (s *AgentService) tryHandleQuickNoteWithGraph(
userID int,
chatID string,
traceID string,
trustRoute bool,
emitStage func(stage, detail string),
) (handled bool, state *agent.QuickNoteState, err error) {
if s.taskRepo == nil || selectedModel == nil {
@@ -116,7 +168,8 @@ func (s *AgentService) tryHandleQuickNoteWithGraph(
}, nil
},
},
EmitStage: emitStage,
SkipIntentVerification: trustRoute,
EmitStage: emitStage,
})
if runErr != nil {
return false, nil, runErr
@@ -166,7 +219,8 @@ func buildQuickNoteFinalReply(ctx context.Context, selectedModel *ark.ChatModel,
return "我这次没成功记上,别急,再发我一次我马上补上。"
}
if state.Persisted {
// 仅当“确实拿到了有效 task_id”时才走成功文案避免出现“回复成功但库里没数据”的错觉。
if state.Persisted && state.PersistedTaskID > 0 {
title := strings.TrimSpace(state.ExtractedTitle)
if title == "" {
title = "这条任务"
@@ -184,6 +238,16 @@ func buildQuickNoteFinalReply(ctx context.Context, selectedModel *ark.ChatModel,
factLine := fmt.Sprintf("好,给你安排上了:%s%s%s。", title, priorityText, deadlineText)
// 优先复用“聚合规划阶段”产出的跟进句,避免再触发一次润色模型调用。
if strings.TrimSpace(state.ExtractedBanter) != "" {
return factLine + " " + strings.TrimSpace(state.ExtractedBanter)
}
if state.PlannedBySingleCall {
// 快路径兜底:单请求聚合已走过一次模型调用,若未产出 banter 则直接使用固定文案,
// 避免再发起额外模型请求拉高总时延。
return factLine + " 已帮你稳稳记下,放心推进。"
}
banter, err := generateQuickNoteBanter(ctx, selectedModel, userMessage, title, priorityText, deadlineText)
if err != nil {
return factLine + " 这下可以先安心推进,不用等 ddl 来敲门了。"
@@ -239,7 +303,11 @@ func generateQuickNoteBanter(
schema.UserMessage(prompt),
}
resp, err := selectedModel.Generate(ctx, messages)
resp, err := selectedModel.Generate(ctx, messages,
ark.WithThinking(&arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}),
einoModel.WithTemperature(0.7),
einoModel.WithMaxTokens(72),
)
if err != nil {
return "", err
}
@@ -260,20 +328,163 @@ func generateQuickNoteBanter(
return text, nil
}
// shouldEmitQuickNoteProgress 用于判断是否应在“等待阶段”推送状态块
// 规则偏保守:只要出现明显“记任务/提醒”语义,就开启阶段推送
func shouldEmitQuickNoteProgress(userMessage string) bool {
text := strings.TrimSpace(userMessage)
if text == "" {
return false
}
keywords := []string{"记一下", "帮我记", "提醒", "任务", "待办", "日程", "安排", "截止", "ddl"}
for _, kw := range keywords {
if strings.Contains(text, kw) {
return true
// decideQuickNoteRouting 决定当前输入是否进入“随口记 graph”
// 新策略:改为“模型控制码分流”,不再依赖关键词和本地猜测
//
// 处理流程:
// 1) 先调用路由模型拿控制码quick_note / chat
// 2) 控制码可解析时按模型判定分流;
// 3) 控制码超时/解析失败时,进入随口记 graph 做兜底意图识别,避免遗漏任务。
//
// 返回值说明:
// - EnterQuickNote=true进入随口记 graph
// - TrustRoute=true跳过 graph 内二次意图判定;
// - Detail用于阶段推送向前端解释“为何进入该分支”。
func (s *AgentService) decideQuickNoteRouting(ctx context.Context, selectedModel *ark.ChatModel, userMessage string) quickNoteRoutingDecision {
decision, err := s.routeByModelControlTag(ctx, selectedModel, userMessage)
if err != nil {
if deadline, ok := ctx.Deadline(); ok {
log.Printf("quick note 路由控制码失败,进入 graph 兜底: err=%v parent_deadline_in_ms=%d route_timeout_ms=%d",
err,
time.Until(deadline).Milliseconds(),
quickNoteRouteControlTimeout.Milliseconds(),
)
} else {
log.Printf("quick note 路由控制码失败,进入 graph 兜底: err=%v parent_deadline=none route_timeout_ms=%d",
err,
quickNoteRouteControlTimeout.Milliseconds(),
)
}
return quickNoteRoutingDecision{
EnterQuickNote: true,
TrustRoute: false,
Detail: "路由判定暂不可用,已进入任务识别兜底流程。",
}
}
return false
switch decision.Action {
case quickNoteRouteActionQuickNote:
reason := strings.TrimSpace(decision.Reason)
if reason == "" {
reason = "模型识别到任务安排请求,准备执行随口记。"
}
return quickNoteRoutingDecision{
EnterQuickNote: true,
TrustRoute: true,
Detail: reason,
}
case quickNoteRouteActionChat:
return quickNoteRoutingDecision{
EnterQuickNote: false,
TrustRoute: false,
Detail: "",
}
default:
log.Printf("quick note 未知路由动作,进入 graph 兜底: action=%s raw=%s", decision.Action, decision.Raw)
return quickNoteRoutingDecision{
EnterQuickNote: true,
TrustRoute: false,
Detail: "路由结果异常,已进入任务识别兜底流程。",
}
}
}
// routeByModelControlTag 通过模型返回“控制码”完成分流。
// 输出协议由 QuickNoteRouteControlPrompt 约束,核心字段:
// - nonce防伪随机串防止模型回显历史脏内容
// - actionquick_note / chat。
func (s *AgentService) routeByModelControlTag(ctx context.Context, selectedModel *ark.ChatModel, userMessage string) (*quickNoteRouteControlDecision, error) {
if selectedModel == nil {
return nil, fmt.Errorf("model is nil")
}
nonce := strings.ToLower(strings.ReplaceAll(uuid.NewString(), "-", ""))
routeCtx, cancel := deriveRouteControlContext(ctx, quickNoteRouteControlTimeout)
defer cancel()
nowText := time.Now().In(time.Local).Format("2006-01-02 15:04")
userPrompt := fmt.Sprintf("nonce=%s\n当前时间=%s\n用户输入=%s", nonce, nowText, strings.TrimSpace(userMessage))
resp, err := selectedModel.Generate(routeCtx, []*schema.Message{
schema.SystemMessage(agent.QuickNoteRouteControlPrompt),
schema.UserMessage(userPrompt),
},
ark.WithThinking(&arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}),
einoModel.WithTemperature(0),
einoModel.WithMaxTokens(80),
)
if err != nil {
return nil, err
}
if resp == nil {
return nil, fmt.Errorf("empty route response")
}
raw := strings.TrimSpace(resp.Content)
if raw == "" {
return nil, fmt.Errorf("empty route content")
}
decision, parseErr := parseQuickNoteRouteControlTag(raw, nonce)
if parseErr != nil {
return nil, parseErr
}
return decision, nil
}
// deriveRouteControlContext 为“控制码路由”创建子上下文。
// 设计要点:
// 1. 如果父 ctx 没有 deadline则增加一个默认上限防止异常请求无限等待
// 2. 如果父 ctx 已有更紧 deadline则直接沿用父 ctx不再额外缩短
// 避免出现“父请求还活着,但子路由因更短超时提前失败”的误判。
func deriveRouteControlContext(parent context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
if timeout <= 0 {
return context.WithCancel(parent)
}
if deadline, ok := parent.Deadline(); ok {
if time.Until(deadline) <= timeout {
return context.WithCancel(parent)
}
}
return context.WithTimeout(parent, timeout)
}
// parseQuickNoteRouteControlTag 解析模型输出控制码。
// 容错策略:
// - 允许大小写、属性顺序、标签内额外属性有差异;
// - 但 nonce 必须精确匹配action 必须为 quick_note/chat。
func parseQuickNoteRouteControlTag(raw, expectedNonce string) (*quickNoteRouteControlDecision, error) {
text := strings.TrimSpace(raw)
if text == "" {
return nil, fmt.Errorf("route content is empty")
}
header := quickNoteRouteHeaderRegex.FindStringSubmatch(text)
if len(header) < 3 {
return nil, fmt.Errorf("route header not found: %s", text)
}
nonce := strings.ToLower(strings.TrimSpace(header[1]))
if nonce != strings.ToLower(strings.TrimSpace(expectedNonce)) {
return nil, fmt.Errorf("route nonce mismatch")
}
actionText := strings.ToLower(strings.TrimSpace(header[2]))
action := quickNoteRouteAction(actionText)
if action != quickNoteRouteActionQuickNote && action != quickNoteRouteActionChat {
return nil, fmt.Errorf("invalid route action: %s", actionText)
}
reason := ""
reasonMatch := quickNoteRouteReasonRegex.FindStringSubmatch(text)
if len(reasonMatch) >= 2 {
reason = strings.TrimSpace(reasonMatch[1])
}
return &quickNoteRouteControlDecision{
Action: action,
Reason: reason,
Raw: text,
}, nil
}
// persistChatAfterReply 在“随口记 graph”返回后复用当前项目的后置持久化策略

View File

@@ -0,0 +1,72 @@
package service
import (
"strings"
"testing"
"github.com/LoveLosita/smartflow/backend/agent"
)
// TestParseQuickNoteRouteControlTag_QuickNote
// 目的:验证模型控制码在 action=quick_note 时可被稳定解析,
// 并且会校验 nonce避免历史脏内容或伪造片段误命中。
func TestParseQuickNoteRouteControlTag_QuickNote(t *testing.T) {
nonce := "abc123nonce"
raw := `<SMARTFLOW_ROUTE nonce="abc123nonce" action="quick_note"></SMARTFLOW_ROUTE>
<SMARTFLOW_REASON>用户明确在请求未来提醒</SMARTFLOW_REASON>`
decision, err := parseQuickNoteRouteControlTag(raw, nonce)
if err != nil {
t.Fatalf("解析失败: %v", err)
}
if decision == nil {
t.Fatalf("decision 不应为空")
}
if decision.Action != quickNoteRouteActionQuickNote {
t.Fatalf("action 解析错误,期望=%s 实际=%s", quickNoteRouteActionQuickNote, decision.Action)
}
if strings.TrimSpace(decision.Reason) == "" {
t.Fatalf("reason 不应为空")
}
}
// TestParseQuickNoteRouteControlTag_NonceMismatch
// 目的:确保 nonce 不匹配时直接报错,避免把非本次请求的控制码当作有效路由。
func TestParseQuickNoteRouteControlTag_NonceMismatch(t *testing.T) {
raw := `<SMARTFLOW_ROUTE nonce="wrongnonce" action="chat"></SMARTFLOW_ROUTE>`
if _, err := parseQuickNoteRouteControlTag(raw, "expectednonce"); err == nil {
t.Fatalf("期望 nonce 不匹配时报错,但未报错")
}
}
// TestBuildQuickNoteFinalReply_NoFalseSuccessWithoutTaskID
// 目的:即使 state.Persisted 被错误置为 true只要 task_id 无效,也不能返回“安排成功”文案。
func TestBuildQuickNoteFinalReply_NoFalseSuccessWithoutTaskID(t *testing.T) {
state := &agent.QuickNoteState{
Persisted: true,
PersistedTaskID: 0,
ExtractedTitle: "去下馆子",
}
reply := buildQuickNoteFinalReply(nil, nil, "我今天晚上6点要去下馆子记得喊我", state)
if strings.Contains(reply, "给你安排上了") || strings.Contains(reply, "已安排") {
t.Fatalf("不应返回成功文案,实际回复=%s", reply)
}
}
// TestBuildQuickNoteFinalReply_UseExtractedBanter
// 目的:当聚合规划阶段已经产出 banter 时,最终回复应直接复用,避免再次调用润色模型。
func TestBuildQuickNoteFinalReply_UseExtractedBanter(t *testing.T) {
state := &agent.QuickNoteState{
Persisted: true,
PersistedTaskID: 12,
ExtractedTitle: "明天去取快递",
ExtractedPriority: 2,
ExtractedBanter: "取件路上注意保暖,别被风吹懵了。",
}
reply := buildQuickNoteFinalReply(nil, nil, "明天上午12点我要去取快递到时候记得q我", state)
if !strings.Contains(reply, "取件路上注意保暖") {
t.Fatalf("期望复用 ExtractedBanter实际回复=%s", reply)
}
}