Version: 0.4.6.dev.260307

feat: 🎯 新增 Token 估算与裁剪工具

* 在 `backend/pkg/token_budget.go` 中新增 Token 估算与裁剪工具

  * 最大上下文 Token 数量设置为 224000,预留冗余 28000
  * 从最旧消息开始裁剪,直到历史 Token 数量低于预算
  * 根据裁剪后的历史消息数量动态计算 Redis 动态窗口大小

refactor: ♻️ 接入 Token 裁剪至 Service 主流程

* 在 `backend/service/agent.go` 中接入 Token 裁剪逻辑

  * 先从历史数据获取(缓存未命中则查询数据库)
  * 按 Token 预算裁剪历史消息,裁剪后再喂模型
  * 根据裁剪结果动态调整 Redis 会话窗口

refactor: ♻️ 改造 Redis 历史队列为会话级动态窗口

* 在 `backend/dao/agent-cache.go` 中新增 `SetSessionWindowSize` 与 `EnforceHistoryWindow`
* `PushMessage` 和 `BackfillHistory` 方法使用会话动态窗口,而非固定 20 条历史消息
* 默认窗口大小提升至 128,但会被会话动态窗口值覆盖
This commit is contained in:
LoveLosita
2026-03-07 16:37:07 +08:00
parent 3f95d23376
commit 4906f814fd
6 changed files with 276 additions and 49 deletions

View File

@@ -9,6 +9,7 @@ import (
"github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao"
"github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/pkg"
"github.com/cloudwego/eino-ext/components/model/ark"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
@@ -82,7 +83,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
}
}
// 4) 构建历史上下文
// 4) 组装历史上下文(先读缓存,缓存未命中再读数据库)
chatHistory, err := s.agentCache.GetHistory(ctx, chatID)
if err != nil {
errChan <- err
@@ -90,8 +91,11 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
close(errChan)
return outChan, errChan
}
cacheMiss := false
if chatHistory == nil {
histories, err := s.repo.GetUserChatHistories(ctx, userID, 20, chatID)
cacheMiss = true
histories, err := s.repo.GetUserChatHistories(ctx, userID, pkg.HistoryFetchLimitByModel(resolvedModelName), chatID)
if err != nil {
errChan <- err
close(outChan)
@@ -99,7 +103,30 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
return outChan, errChan
}
chatHistory = conv.ToEinoMessages(histories)
if err = s.agentCache.BackfillHistory(ctx, chatID, chatHistory); err != nil {
}
// 5) 按 token 预算裁剪历史:从最旧消息开始持续弹出,直到满足预算
historyBudget := pkg.HistoryTokenBudgetByModel(resolvedModelName, agent.SystemPrompt, userMessage)
trimmedHistory, totalHistoryTokens, keptHistoryTokens, droppedCount := pkg.TrimHistoryByTokenBudget(chatHistory, historyBudget)
chatHistory = trimmedHistory
// 6) 根据最新裁剪结果动态调整 Redis 会话窗口
targetWindow := pkg.CalcSessionWindowSize(len(chatHistory))
if err := s.agentCache.SetSessionWindowSize(ctx, chatID, targetWindow); err != nil {
log.Printf("failed to set history window for %s: %v", chatID, err)
}
if err := s.agentCache.EnforceHistoryWindow(ctx, chatID); err != nil {
log.Printf("failed to enforce history window for %s: %v", chatID, err)
}
if droppedCount > 0 {
log.Printf("agent history trimmed: chat=%s total_tokens=%d kept_tokens=%d dropped=%d budget=%d target_window=%d",
chatID, totalHistoryTokens, keptHistoryTokens, droppedCount, historyBudget, targetWindow)
}
// 缓存未命中时,把“裁剪后的历史”回填进缓存
if cacheMiss {
if err := s.agentCache.BackfillHistory(ctx, chatID, chatHistory); err != nil {
errChan <- err
close(outChan)
close(errChan)
@@ -107,7 +134,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
}
}
// 5) 异步落用户消息
// 7) 异步落用户消息(先写缓存再写库)
go func() {
bg := context.Background()
_ = s.agentCache.PushMessage(bg, chatID, &schema.Message{
@@ -117,7 +144,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
_ = s.repo.SaveChatHistory(bg, userID, chatID, "user", userMessage)
}()
// 6) 流式输出模型回复
// 8) 启动流式聊天
go func() {
defer close(outChan)
@@ -127,7 +154,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
return
}
// 7) 异步落助手消息
// 9) 异步落助手回复
go func() {
bg := context.Background()
_ = s.agentCache.PushMessage(bg, chatID, &schema.Message{