Files
smartmate/backend/services/agent/shared/token_budget.go
Losita 3b6fca44a6 Version: 0.9.77.dev.260505
后端:
1.阶段 6 CP4/CP5 目录收口与共享边界纯化
- 将 backend 根目录收口为 services、client、gateway、cmd、shared 五个一级目录
- 收拢 bootstrap、inits、infra/kafka、infra/outbox、conv、respond、pkg、middleware,移除根目录旧实现与空目录
- 将 utils 下沉到 services/userauth/internal/auth,将 logic 下沉到 services/schedule/core/planning
- 将迁移期 runtime 桥接实现统一收拢到 services/runtime/{conv,dao,eventsvc,model},删除 shared/legacy 与未再被 import 的旧 service 实现
- 将 gateway/shared/respond 收口为 HTTP/Gin 错误写回适配,shared/respond 仅保留共享错误语义与状态映射
- 将 HTTP IdempotencyMiddleware 与 RateLimitMiddleware 收口到 gateway/middleware
- 将 GormCachePlugin 下沉到 shared/infra/gormcache,将共享 RateLimiter 下沉到 shared/infra/ratelimit,将 agent token budget 下沉到 services/agent/shared
- 删除 InitEino 兼容壳,收缩 cmd/internal/coreinit 仅保留旧组合壳残留域初始化语义
- 更新微服务迁移计划与桌面 checklist,补齐 CP4/CP5 当前切流点、目录终态与验证结果
- 完成 go test ./...、git diff --check 与最终真实 smoke;health、register/login、task/create+get、schedule/today、task-class/list、memory/items、agent chat/meta/timeline/context-stats 全部 200,SSE 合并结果为 CP5_OK 且 [DONE] 只有 1 个
2026-05-05 23:25:07 +08:00

210 lines
6.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package agentshared
import (
"math"
"strings"
"unicode"
"github.com/cloudwego/eino/schema"
)
const (
// Worker 模型最大输入上下文(用户提供)
WorkerMaxInputTokens = 224000
// 给模型输出和协议开销预留的冗余 token
ContextReserveTokens = 28000
// 缓存未命中时,从数据库拉取的历史消息上限
DefaultHistoryFetchLimit = 1200
// Redis 会话窗口上下限与缓冲
SessionWindowMin = 32
SessionWindowMax = 4096
SessionWindowBuffer = 2
// ---- Execute Context Compaction 预算 ----
// Execute 阶段 prompt 总 token 上限
ExecuteTokenBudget = 80000
// msg0 + msg3 固定开销 + 安全余量
ExecuteReserveTokens = 8000
StageTokenBudget = ExecuteTokenBudget
StageReserveTokens = ExecuteReserveTokens
)
// MaxContextTokensByModel 返回指定模型的最大上下文 token。
func MaxContextTokensByModel(modelName string) int {
switch strings.ToLower(strings.TrimSpace(modelName)) {
case "worker", "strategist":
return WorkerMaxInputTokens
default:
return WorkerMaxInputTokens
}
}
// HistoryFetchLimitByModel 返回缓存未命中时的历史拉取条数。
func HistoryFetchLimitByModel(_ string) int {
return DefaultHistoryFetchLimit
}
// HistoryTokenBudgetByModel 计算“历史上下文”可使用的 token 预算。
func HistoryTokenBudgetByModel(modelName, systemPrompt, userInput string) int {
maxTokens := MaxContextTokensByModel(modelName)
baseTokens := EstimateTextTokens(systemPrompt) + EstimateTextTokens(userInput) + 64
budget := maxTokens - ContextReserveTokens - baseTokens
if budget < 0 {
return 0
}
return budget
}
// EstimateTextTokens 粗略估算文本 token
// - CJK 字符约 1:1
// - ASCII 字符约 4:1
// - 其他字符约 2:1
func EstimateTextTokens(text string) int {
if strings.TrimSpace(text) == "" {
return 0
}
var cjkCount, asciiCount, otherCount int
for _, r := range text {
switch {
case unicode.IsSpace(r):
continue
case r <= unicode.MaxASCII:
asciiCount++
case isCJK(r):
cjkCount++
default:
otherCount++
}
}
tokens := cjkCount + int(math.Ceil(float64(asciiCount)/4.0)) + int(math.Ceil(float64(otherCount)/2.0))
if tokens <= 0 {
return 1
}
return tokens
}
// EstimateMessageTokens 估算单条消息 token包含固定协议开销
func EstimateMessageTokens(msg *schema.Message) int {
if msg == nil {
return 0
}
const messageOverhead = 6
return messageOverhead + EstimateTextTokens(msg.Content) + EstimateTextTokens(msg.ReasoningContent)
}
// EstimateHistoryTokens 估算历史消息总 token。
func EstimateHistoryTokens(history []*schema.Message) int {
total := 0
for _, msg := range history {
total += EstimateMessageTokens(msg)
}
return total
}
// TrimHistoryByTokenBudget 从最旧消息开始裁剪,直到历史 token 不超过预算。
// 返回值:裁剪后历史、裁剪前 token、裁剪后 token、裁掉条数。
func TrimHistoryByTokenBudget(history []*schema.Message, historyBudget int) ([]*schema.Message, int, int, int) {
if len(history) == 0 {
return history, 0, 0, 0
}
totalBefore := EstimateHistoryTokens(history)
if historyBudget <= 0 {
return []*schema.Message{}, totalBefore, 0, len(history)
}
if totalBefore <= historyBudget {
return history, totalBefore, totalBefore, 0
}
tokenPerMsg := make([]int, len(history))
total := 0
for i, msg := range history {
t := EstimateMessageTokens(msg)
tokenPerMsg[i] = t
total += t
}
drop := 0
for total > historyBudget && drop < len(history) {
total -= tokenPerMsg[drop]
drop++
}
return history[drop:], totalBefore, total, drop
}
// CalcSessionWindowSize 根据裁剪后消息条数计算 Redis 队列窗口大小。
func CalcSessionWindowSize(trimmedHistoryLen int) int {
size := trimmedHistoryLen + SessionWindowBuffer
if size < SessionWindowMin {
size = SessionWindowMin
}
if size > SessionWindowMax {
size = SessionWindowMax
}
return size
}
func isCJK(r rune) bool {
return unicode.Is(unicode.Han, r) || unicode.Is(unicode.Hiragana, r) || unicode.Is(unicode.Katakana, r) || unicode.Is(unicode.Hangul, r)
}
// StageTokenBreakdown 记录四条阶段消息的 token 分布。
type StageTokenBreakdown struct {
Msg0 int `json:"msg0"`
Msg1 int `json:"msg1"`
Msg2 int `json:"msg2"`
Msg3 int `json:"msg3"`
Total int `json:"total"`
Budget int `json:"budget"`
}
// ExecuteTokenBreakdown 保留为历史兼容别名,避免旧调用点改动。
type ExecuteTokenBreakdown = StageTokenBreakdown
// EstimateStageMessagesTokens 估算四条阶段消息的 token 分布。
func EstimateStageMessagesTokens(msg0, msg1, msg2, msg3 string) StageTokenBreakdown {
b := StageTokenBreakdown{
Msg0: EstimateTextTokens(msg0),
Msg1: EstimateTextTokens(msg1),
Msg2: EstimateTextTokens(msg2),
Msg3: EstimateTextTokens(msg3),
Budget: StageTokenBudget,
}
b.Total = b.Msg0 + b.Msg1 + b.Msg2 + b.Msg3
return b
}
// CheckStageTokenBudget 检查是否超出阶段预算,并给出需要压缩的消息标记。
//
// 1. 先计算四条消息的 token 分布,便于后续日志和统计。
// 2. 如果总量没有超预算,直接返回。
// 3. 如果超预算,则按 msg1 / msg2 的相对占比判断是否需要分别压缩。
func CheckStageTokenBudget(msg0, msg1, msg2, msg3 string) (breakdown StageTokenBreakdown, overBudget bool, needCompactMsg1 bool, needCompactMsg2 bool) {
breakdown = EstimateStageMessagesTokens(msg0, msg1, msg2, msg3)
overBudget = breakdown.Total > StageTokenBudget
if !overBudget {
return
}
// msg1 过大时,优先压缩历史对话。
available := StageTokenBudget - StageReserveTokens
needCompactMsg1 = breakdown.Msg1 > available/2
// 若压缩 msg1 后仍然超限,再压缩执行记录区。
needCompactMsg2 = (breakdown.Total - breakdown.Msg1 + available/4) > StageTokenBudget
return
}
// EstimateExecuteMessagesTokens 保留旧名称,内部复用阶段预算实现。
func EstimateExecuteMessagesTokens(msg0, msg1, msg2, msg3 string) StageTokenBreakdown {
return EstimateStageMessagesTokens(msg0, msg1, msg2, msg3)
}
// CheckExecuteTokenBudget 保留旧名称,内部复用阶段预算实现。
func CheckExecuteTokenBudget(msg0, msg1, msg2, msg3 string) (breakdown StageTokenBreakdown, overBudget bool, needCompactMsg1 bool, needCompactMsg2 bool) {
return CheckStageTokenBudget(msg0, msg1, msg2, msg3)
}