Version: 0.9.24.dev.260416

后端:
1. Memory 预取缓存改为会话级隔离 + 管理面自动失效 + 空检索清理
- 预取缓存 key 从 smartflow:memory_prefetch:{userID} 改为 smartflow:memory_prefetch:u:{userID}:c:{chatID},隔离不同会话的记忆上下文,避免会话间互相覆盖
- 新增 DeleteMemoryPrefetchCacheByUser 方法,使用 SCAN+UNLINK 按模式批量删除指定用户所有会话的预取缓存
- ItemRepo 四个变更方法(SoftDeleteByID / RestoreByIDAt / UpdateManagedFieldsByIDAt / UpdateStatusByIDAt)通过 Model 携带 UserID,使 GORM cache deleter 可精准定位用户
- GormCachePlugin 将 MemoryItem 从忽略列表移至主动处理,新增 invalidMemoryPrefetchCache 异步失效方法
- 后台检索返回空结果时主动清除该用户所有预取缓存,避免过期记忆残留
2. 修复 RAG 召回未过滤 deleted 状态记忆的严重 bug
- MemoryCorpus.BuildRetrieveFilter 新增 status="active" 硬过滤,Milvus 向量检索直接排除已删除/已归档记忆
- 此前删除记忆后即使 MySQL 标记为 deleted,Milvus 中向量仍可被语义召回并注入 prompt
前端:无
仓库:无
This commit is contained in:
Losita
2026-04-16 23:33:38 +08:00
parent d554b52784
commit dd6638f8db
17 changed files with 414 additions and 102 deletions

View File

@@ -39,6 +39,7 @@ func LoadConfigFromViper() memorymodel.Config {
DecisionCandidateMinScore: viper.GetFloat64("memory.decision.candidateMinScore"),
DecisionFallbackMode: viper.GetString("memory.decision.fallbackMode"),
WriteMode: viper.GetString("memory.write.mode"),
WriteMinConfidence: viper.GetFloat64("memory.write.minConfidence"),
}
if cfg.Threshold <= 0 {
@@ -83,6 +84,9 @@ func LoadConfigFromViper() memorymodel.Config {
if cfg.WriteMode == "" {
cfg.WriteMode = "legacy"
}
if cfg.WriteMinConfidence <= 0 {
cfg.WriteMinConfidence = 0.5
}
return cfg
}

View File

@@ -10,12 +10,12 @@ import (
"github.com/LoveLosita/smartflow/backend/model"
)
// HybridRetrieve 统一承接读取侧混合召回链路。
// HybridRetrieve 统一承接读取侧 RAG-first 召回链路。
//
// 步骤化说明:
// 1. 结构化路由先取 constraint / 高置信 preference给模型一份稳定“硬约束底座”
// 2. 再补语义候选,优先走 RAGRAG 报错或 0 命中时回退 MySQL保证链路韧性
// 3. 两路结果统一做三级去重、排序与类型预算裁剪,只对最终真正注入的条目刷新 last_access_at
// 1. 优先走 RAG 语义搜索,按 query 相关性召回候选记忆
// 2. RAG 报错或 0 命中时回退 MySQL保证链路韧性
// 3. 召回结果做三级去重、排序与类型预算裁剪(总量不超过调用方 limit
// 4. 旧 legacy 链路完全保留,方便通过配置快速回滚。
func (s *ReadService) HybridRetrieve(
ctx context.Context,
@@ -32,41 +32,33 @@ func (s *ReadService) HybridRetrieve(
return nil, telemetry, nil
}
pinnedItems, err := s.retrievePinnedCandidates(ctx, req, effectiveSetting, now)
// RAG-first只走语义召回不再全量拉 MySQL pinned。
items, semanticTelemetry, err := s.retrieveSemanticCandidates(ctx, req, effectiveSetting, limit, now)
if err != nil {
return nil, telemetry, err
}
telemetry.PinnedHitCount = len(pinnedItems)
semanticItems, semanticTelemetry, err := s.retrieveSemanticCandidates(ctx, req, effectiveSetting, limit, now)
if err != nil {
return nil, telemetry, err
}
telemetry.SemanticHitCount = len(semanticItems)
telemetry.SemanticHitCount = semanticTelemetry.HitCount
telemetry.Degraded = semanticTelemetry.Degraded
telemetry.RAGFallbackUsed = semanticTelemetry.RAGFallbackUsed
merged := make([]memorymodel.ItemDTO, 0, len(pinnedItems)+len(semanticItems))
merged = append(merged, pinnedItems...)
merged = append(merged, semanticItems...)
if len(merged) == 0 {
if len(items) == 0 {
return nil, telemetry, nil
}
beforeDedupCount := len(merged)
merged = dedupByID(merged)
merged = dedupByHash(merged)
merged = dedupByText(merged)
telemetry.DedupDropCount = beforeDedupCount - len(merged)
merged = RankItems(merged, now)
merged = applyTypeBudget(merged, s.cfg)
if len(merged) == 0 {
beforeDedupCount := len(items)
items = dedupByID(items)
items = dedupByHash(items)
items = dedupByText(items)
telemetry.DedupDropCount = beforeDedupCount - len(items)
items = RankItems(items, now)
items = applyTypeBudget(items, s.cfg, limit)
if len(items) == 0 {
return nil, telemetry, nil
}
telemetry.FinalCount = len(merged)
telemetry.FinalCount = len(items)
_ = s.itemRepo.TouchLastAccessAt(ctx, collectItemDTOIDs(merged), now)
return merged, telemetry, nil
_ = s.itemRepo.TouchLastAccessAt(ctx, collectItemDTOIDs(items), now)
return items, telemetry, nil
}
func (s *ReadService) retrievePinnedCandidates(
@@ -155,7 +147,7 @@ func (s *ReadService) retrieveSemanticCandidatesByMySQL(
req,
now,
[]string{model.MemoryItemStatusActive},
normalizeLimit(candidateLimit*3, candidateLimit*3, maxRetrieveLimit*3),
normalizeLimit(candidateLimit, candidateLimit, maxRetrieveLimit),
)
items, err := s.itemRepo.FindByQuery(ctx, query)
@@ -255,17 +247,22 @@ func preferCurrentItem(previous memorymodel.ItemDTO, current memorymodel.ItemDTO
return true
}
// applyTypeBudget 在排序结果上应用四类记忆预算。
// applyTypeBudget 在排序结果上应用四类记忆预算,并以 callerLimit 作为总量硬上限
//
// 说明:
// 1. 每种类型先保底自己的预算上限,避免 fact 抢掉 constraint 的位置;
// 2. 裁剪时保持当前排序顺序,不在这里重新打分;
// 3. 最终总量由四类预算之和共同决定,默认 18 条
func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memorymodel.ItemDTO {
// 3. 最终总量不超过 min(callerLimit, cfg.TotalReadBudget())
func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config, callerLimit int) []memorymodel.ItemDTO {
if len(items) == 0 {
return nil
}
hardCap := cfg.TotalReadBudget()
if callerLimit > 0 && callerLimit < hardCap {
hardCap = callerLimit
}
budgetByType := map[string]int{
memorymodel.MemoryTypeConstraint: cfg.EffectiveReadConstraintLimit(),
memorymodel.MemoryTypePreference: cfg.EffectiveReadPreferenceLimit(),
@@ -273,9 +270,9 @@ func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memo
memorymodel.MemoryTypeTodoHint: cfg.EffectiveReadTodoHintLimit(),
}
usedByType := make(map[string]int, len(budgetByType))
result := make([]memorymodel.ItemDTO, 0, minInt(len(items), cfg.TotalReadBudget()))
result := make([]memorymodel.ItemDTO, 0, minInt(len(items), hardCap))
for _, item := range items {
if len(result) >= cfg.TotalReadBudget() {
if len(result) >= hardCap {
break
}
@@ -289,11 +286,10 @@ func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memo
return result
}
// hybridSemanticTopK 计算语义召回的候选集大小。
// 使用 callerLimit 的 2 倍作为 TopK保证去重后仍有足够结果填充预算。
func hybridSemanticTopK(cfg memorymodel.Config, limit int) int {
if cfg.TotalReadBudget() > limit {
return cfg.TotalReadBudget()
}
return limit
return limit * 2
}
func resolveBudgetMemoryType(memoryType string) string {