Version: 0.9.24.dev.260416
后端:
1. Memory 预取缓存改为会话级隔离 + 管理面自动失效 + 空检索清理
- 预取缓存 key 从 smartflow:memory_prefetch:{userID} 改为 smartflow:memory_prefetch:u:{userID}:c:{chatID},隔离不同会话的记忆上下文,避免会话间互相覆盖
- 新增 DeleteMemoryPrefetchCacheByUser 方法,使用 SCAN+UNLINK 按模式批量删除指定用户所有会话的预取缓存
- ItemRepo 四个变更方法(SoftDeleteByID / RestoreByIDAt / UpdateManagedFieldsByIDAt / UpdateStatusByIDAt)通过 Model 携带 UserID,使 GORM cache deleter 可精准定位用户
- GormCachePlugin 将 MemoryItem 从忽略列表移至主动处理,新增 invalidMemoryPrefetchCache 异步失效方法
- 后台检索返回空结果时主动清除该用户所有预取缓存,避免过期记忆残留
2. 修复 RAG 召回未过滤 deleted 状态记忆的严重 bug
- MemoryCorpus.BuildRetrieveFilter 新增 status="active" 硬过滤,Milvus 向量检索直接排除已删除/已归档记忆
- 此前删除记忆后即使 MySQL 标记为 deleted,Milvus 中向量仍可被语义召回并注入 prompt
前端:无
仓库:无
This commit is contained in:
@@ -65,6 +65,13 @@ type Config struct {
|
||||
DecisionCandidateMinScore float64 // Milvus 语义召回最低相似度
|
||||
DecisionFallbackMode string // "legacy_add"(退回旧路径直接新增)/ "drop"(丢弃)
|
||||
WriteMode string // "legacy"(旧路径)/ "decision"(决策流程),仅 DecisionEnabled=true 时生效
|
||||
|
||||
// 写入置信度阈值。
|
||||
// 说明:
|
||||
// 1. 抽取结果 confidence 低于此值直接丢弃,不做入库;
|
||||
// 2. 默认 0.5,与"守门员"prompt 的 confidence>=0.5 输出规则配合;
|
||||
// 3. fallback 路径 confidence 设为 0.45,低于默认阈值,LLM 不可用时不写入。
|
||||
WriteMinConfidence float64
|
||||
}
|
||||
|
||||
// NormalizeReadMode 统一读取模式字符串。
|
||||
|
||||
@@ -92,7 +92,8 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
|
||||
}
|
||||
|
||||
type memoryExtractResponse struct {
|
||||
Facts []memoryExtractFact `json:"facts"`
|
||||
MessageIntent string `json:"message_intent"`
|
||||
Facts []memoryExtractFact `json:"facts"`
|
||||
}
|
||||
|
||||
type memoryExtractFact struct {
|
||||
@@ -123,33 +124,43 @@ func buildMemoryExtractSystemPrompt(override string) string {
|
||||
return override
|
||||
}
|
||||
|
||||
return strings.TrimSpace(`你是一个“记忆抽取器”。
|
||||
你的任务是从单条用户消息中抽取值得长期记住的事实、偏好、约束、待办线索。
|
||||
return strings.TrimSpace(`你是一个”记忆守门员”。
|
||||
你的任务是判断用户消息是否包含值得长期记住的信息,如有则提取。
|
||||
请只输出 JSON 对象,不要输出解释、不要输出 markdown。
|
||||
|
||||
输出格式:
|
||||
{
|
||||
"facts": [
|
||||
“message_intent”: “chitchat|task_request|knowledge_qa|preference|personal_fact|standing_instruction”,
|
||||
“facts”: [
|
||||
{
|
||||
"memory_type": "preference|constraint|fact|todo_hint",
|
||||
"title": "短标题",
|
||||
"content": "完整事实内容",
|
||||
"confidence": 0.0,
|
||||
"importance": 0.0,
|
||||
"sensitivity_level": 0,
|
||||
"is_explicit": false
|
||||
“memory_type”: “preference|constraint|fact|todo_hint”,
|
||||
“title”: “短标题”,
|
||||
“content”: “完整事实内容”,
|
||||
“confidence”: 0.0,
|
||||
“importance”: 0.0,
|
||||
“sensitivity_level”: 0,
|
||||
“is_explicit”: false
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
意图分类规则:
|
||||
- chitchat:闲聊、寒暄、情绪表达(”你好””谢谢””我今天好累””嗯嗯”)
|
||||
- task_request:一次性任务请求(”帮我查天气””定个闹钟””帮我写个邮件”)
|
||||
- knowledge_qa:知识问答、信息查询(”什么是量子力学””北京明天多少度”)
|
||||
- preference:用户偏好、习惯、口味(”我喜欢吃辣””别用简称””我习惯用微信”)
|
||||
- personal_fact:个人事实(”我有两个孩子””我在上海工作””我老婆对花生过敏”)
|
||||
- standing_instruction:持久指令(”以后都用英文回复我””记住我的生日是3月5号”)
|
||||
|
||||
规则:
|
||||
1. 最多输出 5 条事实。
|
||||
2. 只保留稳定、未来可能复用的信息,闲聊、寒暄、一次性噪声不要记。
|
||||
3. 用户明确说“记住”或“以后提醒我”时,is_explicit 设为 true。
|
||||
4. confidence 表示这条事实是否真的值得记,取 0 到 1。
|
||||
5. importance 表示对后续提醒/陪伴的价值,取 0 到 1。
|
||||
1. 先判断 message_intent。chitchat / task_request / knowledge_qa 三类,facts 输出空数组。
|
||||
2. 只有 preference / personal_fact / standing_instruction 才提取 facts,最多 3 条。
|
||||
3. 一条消息可能同时包含任务和偏好(如”帮我查天气,记住我喜欢晴天”),此时 intent 取偏好类型,facts 只保留偏好部分。
|
||||
4. confidence 表示这条事实是否真的值得长期记,取 0 到 1。低于 0.5 的不要输出。
|
||||
5. importance 表示对后续陪伴的价值,取 0 到 1。
|
||||
6. sensitivity_level 取 0 到 2,数字越大越敏感。
|
||||
7. 不确定就少记,不要编造。`)
|
||||
7. 用户明确说”记住”或”以后提醒我”时,is_explicit 设为 true。
|
||||
8. 宁可漏记也不要滥记。大多数消息不应该产生任何 facts。`)
|
||||
}
|
||||
|
||||
func buildMemoryExtractUserPrompt(payload memorymodel.ExtractJobPayload) string {
|
||||
@@ -167,15 +178,27 @@ func buildMemoryExtractUserPrompt(payload memorymodel.ExtractJobPayload) string
|
||||
|
||||
raw, err := json.MarshalIndent(request, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Sprintf("请从这条消息中抽取可长期记住的信息:%s", payload.SourceText)
|
||||
return fmt.Sprintf("请分析这条用户消息,判断是否需要写入长期记忆:%s", payload.SourceText)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("请从下面这条用户消息中抽取可长期记住的信息,最多 %d 条。\n输入:\n%s",
|
||||
defaultMemoryExtractMaxFacts, string(raw))
|
||||
return fmt.Sprintf("请分析下面这条用户消息,判断 message_intent,如包含值得长期记住的信息则提取 facts。\n输入:\n%s",
|
||||
string(raw))
|
||||
}
|
||||
|
||||
func convertExtractResponse(resp *memoryExtractResponse) []memorymodel.FactCandidate {
|
||||
if resp == nil || len(resp.Facts) == 0 {
|
||||
if resp == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 意图过滤:跳过不需要记忆的消息类型。
|
||||
// 兼容自定义 prompt(不返回 message_intent 时跳过此检查,保持向后兼容)。
|
||||
if intent := strings.TrimSpace(resp.MessageIntent); intent != "" {
|
||||
if isSkipIntent(intent) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if len(resp.Facts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -225,7 +248,7 @@ func fallbackNormalizedFacts(payload memorymodel.ExtractJobPayload) []memorymode
|
||||
MemoryType: memorymodel.MemoryTypeFact,
|
||||
Title: buildFallbackTitle(sourceText),
|
||||
Content: sourceText,
|
||||
Confidence: 0.55,
|
||||
Confidence: 0.45,
|
||||
Importance: defaultImportanceByType(memorymodel.MemoryTypeFact),
|
||||
SensitivityLevel: 0,
|
||||
IsExplicit: false,
|
||||
@@ -287,6 +310,17 @@ func defaultImportanceByType(memoryType string) float64 {
|
||||
}
|
||||
}
|
||||
|
||||
// isSkipIntent 判断意图是否属于"不需要记忆"的类别。
|
||||
// chitchat / task_request / knowledge_qa 三类直接跳过,不产出任何候选事实。
|
||||
func isSkipIntent(intent string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(intent)) {
|
||||
case "chitchat", "task_request", "knowledge_qa":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func truncateForLog(raw *infrallm.TextResult) string {
|
||||
if raw == nil {
|
||||
return ""
|
||||
|
||||
@@ -258,7 +258,7 @@ func (r *ItemRepo) UpdateStatusByIDAt(
|
||||
}
|
||||
|
||||
return r.db.WithContext(ctx).
|
||||
Model(&model.MemoryItem{}).
|
||||
Model(&model.MemoryItem{UserID: userID}).
|
||||
Where("id = ? AND user_id = ?", memoryID, userID).
|
||||
Updates(map[string]any{
|
||||
"status": status,
|
||||
@@ -401,7 +401,7 @@ func (r *ItemRepo) UpdateManagedFieldsByIDAt(
|
||||
}
|
||||
|
||||
return r.db.WithContext(ctx).
|
||||
Model(&model.MemoryItem{}).
|
||||
Model(&model.MemoryItem{UserID: userID}).
|
||||
Where("id = ? AND user_id = ?", memoryID, userID).
|
||||
Updates(map[string]any{
|
||||
"memory_type": fields.MemoryType,
|
||||
@@ -434,7 +434,7 @@ func (r *ItemRepo) SoftDeleteByID(ctx context.Context, userID int, memoryID int6
|
||||
}
|
||||
|
||||
return r.db.WithContext(ctx).
|
||||
Model(&model.MemoryItem{}).
|
||||
Model(&model.MemoryItem{UserID: userID}).
|
||||
Where("id = ? AND user_id = ?", memoryID, userID).
|
||||
Updates(map[string]any{
|
||||
"status": model.MemoryItemStatusDeleted,
|
||||
@@ -466,7 +466,7 @@ func (r *ItemRepo) RestoreByIDAt(ctx context.Context, userID int, memoryID int64
|
||||
}
|
||||
|
||||
return r.db.WithContext(ctx).
|
||||
Model(&model.MemoryItem{}).
|
||||
Model(&model.MemoryItem{UserID: userID}).
|
||||
Where("id = ? AND user_id = ?", memoryID, userID).
|
||||
Updates(map[string]any{
|
||||
"status": model.MemoryItemStatusActive,
|
||||
|
||||
@@ -39,6 +39,7 @@ func LoadConfigFromViper() memorymodel.Config {
|
||||
DecisionCandidateMinScore: viper.GetFloat64("memory.decision.candidateMinScore"),
|
||||
DecisionFallbackMode: viper.GetString("memory.decision.fallbackMode"),
|
||||
WriteMode: viper.GetString("memory.write.mode"),
|
||||
WriteMinConfidence: viper.GetFloat64("memory.write.minConfidence"),
|
||||
}
|
||||
|
||||
if cfg.Threshold <= 0 {
|
||||
@@ -83,6 +84,9 @@ func LoadConfigFromViper() memorymodel.Config {
|
||||
if cfg.WriteMode == "" {
|
||||
cfg.WriteMode = "legacy"
|
||||
}
|
||||
if cfg.WriteMinConfidence <= 0 {
|
||||
cfg.WriteMinConfidence = 0.5
|
||||
}
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -10,12 +10,12 @@ import (
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
)
|
||||
|
||||
// HybridRetrieve 统一承接读取侧混合召回链路。
|
||||
// HybridRetrieve 统一承接读取侧 RAG-first 召回链路。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 结构化路由先取 constraint / 高置信 preference,给模型一份稳定“硬约束底座”;
|
||||
// 2. 再补语义候选,优先走 RAG;RAG 报错或 0 命中时都回退 MySQL,保证链路韧性;
|
||||
// 3. 两路结果统一做三级去重、排序与类型预算裁剪,只对最终真正注入的条目刷新 last_access_at;
|
||||
// 1. 优先走 RAG 语义搜索,按 query 相关性召回候选记忆;
|
||||
// 2. RAG 报错或 0 命中时回退 MySQL,保证链路韧性;
|
||||
// 3. 召回结果做三级去重、排序与类型预算裁剪(总量不超过调用方 limit);
|
||||
// 4. 旧 legacy 链路完全保留,方便通过配置快速回滚。
|
||||
func (s *ReadService) HybridRetrieve(
|
||||
ctx context.Context,
|
||||
@@ -32,41 +32,33 @@ func (s *ReadService) HybridRetrieve(
|
||||
return nil, telemetry, nil
|
||||
}
|
||||
|
||||
pinnedItems, err := s.retrievePinnedCandidates(ctx, req, effectiveSetting, now)
|
||||
// RAG-first:只走语义召回,不再全量拉 MySQL pinned。
|
||||
items, semanticTelemetry, err := s.retrieveSemanticCandidates(ctx, req, effectiveSetting, limit, now)
|
||||
if err != nil {
|
||||
return nil, telemetry, err
|
||||
}
|
||||
telemetry.PinnedHitCount = len(pinnedItems)
|
||||
|
||||
semanticItems, semanticTelemetry, err := s.retrieveSemanticCandidates(ctx, req, effectiveSetting, limit, now)
|
||||
if err != nil {
|
||||
return nil, telemetry, err
|
||||
}
|
||||
telemetry.SemanticHitCount = len(semanticItems)
|
||||
telemetry.SemanticHitCount = semanticTelemetry.HitCount
|
||||
telemetry.Degraded = semanticTelemetry.Degraded
|
||||
telemetry.RAGFallbackUsed = semanticTelemetry.RAGFallbackUsed
|
||||
|
||||
merged := make([]memorymodel.ItemDTO, 0, len(pinnedItems)+len(semanticItems))
|
||||
merged = append(merged, pinnedItems...)
|
||||
merged = append(merged, semanticItems...)
|
||||
if len(merged) == 0 {
|
||||
if len(items) == 0 {
|
||||
return nil, telemetry, nil
|
||||
}
|
||||
|
||||
beforeDedupCount := len(merged)
|
||||
merged = dedupByID(merged)
|
||||
merged = dedupByHash(merged)
|
||||
merged = dedupByText(merged)
|
||||
telemetry.DedupDropCount = beforeDedupCount - len(merged)
|
||||
merged = RankItems(merged, now)
|
||||
merged = applyTypeBudget(merged, s.cfg)
|
||||
if len(merged) == 0 {
|
||||
beforeDedupCount := len(items)
|
||||
items = dedupByID(items)
|
||||
items = dedupByHash(items)
|
||||
items = dedupByText(items)
|
||||
telemetry.DedupDropCount = beforeDedupCount - len(items)
|
||||
items = RankItems(items, now)
|
||||
items = applyTypeBudget(items, s.cfg, limit)
|
||||
if len(items) == 0 {
|
||||
return nil, telemetry, nil
|
||||
}
|
||||
telemetry.FinalCount = len(merged)
|
||||
telemetry.FinalCount = len(items)
|
||||
|
||||
_ = s.itemRepo.TouchLastAccessAt(ctx, collectItemDTOIDs(merged), now)
|
||||
return merged, telemetry, nil
|
||||
_ = s.itemRepo.TouchLastAccessAt(ctx, collectItemDTOIDs(items), now)
|
||||
return items, telemetry, nil
|
||||
}
|
||||
|
||||
func (s *ReadService) retrievePinnedCandidates(
|
||||
@@ -155,7 +147,7 @@ func (s *ReadService) retrieveSemanticCandidatesByMySQL(
|
||||
req,
|
||||
now,
|
||||
[]string{model.MemoryItemStatusActive},
|
||||
normalizeLimit(candidateLimit*3, candidateLimit*3, maxRetrieveLimit*3),
|
||||
normalizeLimit(candidateLimit, candidateLimit, maxRetrieveLimit),
|
||||
)
|
||||
|
||||
items, err := s.itemRepo.FindByQuery(ctx, query)
|
||||
@@ -255,17 +247,22 @@ func preferCurrentItem(previous memorymodel.ItemDTO, current memorymodel.ItemDTO
|
||||
return true
|
||||
}
|
||||
|
||||
// applyTypeBudget 在排序结果上应用四类记忆预算。
|
||||
// applyTypeBudget 在排序结果上应用四类记忆预算,并以 callerLimit 作为总量硬上限。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 每种类型先保底自己的预算上限,避免 fact 抢掉 constraint 的位置;
|
||||
// 2. 裁剪时保持当前排序顺序,不在这里重新打分;
|
||||
// 3. 最终总量由四类预算之和共同决定,默认 18 条。
|
||||
func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memorymodel.ItemDTO {
|
||||
// 3. 最终总量不超过 min(callerLimit, cfg.TotalReadBudget())。
|
||||
func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config, callerLimit int) []memorymodel.ItemDTO {
|
||||
if len(items) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
hardCap := cfg.TotalReadBudget()
|
||||
if callerLimit > 0 && callerLimit < hardCap {
|
||||
hardCap = callerLimit
|
||||
}
|
||||
|
||||
budgetByType := map[string]int{
|
||||
memorymodel.MemoryTypeConstraint: cfg.EffectiveReadConstraintLimit(),
|
||||
memorymodel.MemoryTypePreference: cfg.EffectiveReadPreferenceLimit(),
|
||||
@@ -273,9 +270,9 @@ func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memo
|
||||
memorymodel.MemoryTypeTodoHint: cfg.EffectiveReadTodoHintLimit(),
|
||||
}
|
||||
usedByType := make(map[string]int, len(budgetByType))
|
||||
result := make([]memorymodel.ItemDTO, 0, minInt(len(items), cfg.TotalReadBudget()))
|
||||
result := make([]memorymodel.ItemDTO, 0, minInt(len(items), hardCap))
|
||||
for _, item := range items {
|
||||
if len(result) >= cfg.TotalReadBudget() {
|
||||
if len(result) >= hardCap {
|
||||
break
|
||||
}
|
||||
|
||||
@@ -289,11 +286,10 @@ func applyTypeBudget(items []memorymodel.ItemDTO, cfg memorymodel.Config) []memo
|
||||
return result
|
||||
}
|
||||
|
||||
// hybridSemanticTopK 计算语义召回的候选集大小。
|
||||
// 使用 callerLimit 的 2 倍作为 TopK,保证去重后仍有足够结果填充预算。
|
||||
func hybridSemanticTopK(cfg memorymodel.Config, limit int) int {
|
||||
if cfg.TotalReadBudget() > limit {
|
||||
return cfg.TotalReadBudget()
|
||||
}
|
||||
return limit
|
||||
return limit * 2
|
||||
}
|
||||
|
||||
func resolveBudgetMemoryType(memoryType string) string {
|
||||
|
||||
@@ -60,3 +60,22 @@ func FilterItemsBySetting(items []model.MemoryItem, setting model.MemoryUserSett
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// FilterFactsByConfidence 按置信度阈值过滤候选事实。
|
||||
//
|
||||
// 说明:
|
||||
// 1. minConfidence <= 0 时不做过滤,保持向后兼容;
|
||||
// 2. 过滤在 FilterFactsBySetting 之后执行,是写入链路的第二道程序化门槛;
|
||||
// 3. 阈值由 memory.write.minConfidence 配置控制,默认 0.5。
|
||||
func FilterFactsByConfidence(facts []memorymodel.NormalizedFact, minConfidence float64) []memorymodel.NormalizedFact {
|
||||
if minConfidence <= 0 || len(facts) == 0 {
|
||||
return facts
|
||||
}
|
||||
result := make([]memorymodel.NormalizedFact, 0, len(facts))
|
||||
for _, fact := range facts {
|
||||
if fact.Confidence >= minConfidence {
|
||||
result = append(result, fact)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -163,6 +163,7 @@ func (r *Runner) RunOnce(ctx context.Context) (*RunOnceResult, error) {
|
||||
return result, nil
|
||||
}
|
||||
facts = memoryutils.FilterFactsBySetting(facts, effectiveSetting)
|
||||
facts = memoryutils.FilterFactsByConfidence(facts, r.cfg.WriteMinConfidence)
|
||||
|
||||
if len(facts) == 0 {
|
||||
if err = r.jobRepo.MarkSuccess(ctx, job.ID); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user