Files
smartmate/backend/memory/utils/normalize_facts.go
Losita 634a9fb926 Version: 0.9.21.dev.260416
后端:
1. Memory 写入链路新增"召回→比对→汇总"去重决策层
- 新增决策流程:Runner 根据decision.enabled 配置走决策路径(语义召回候选 → Hash 精确命中 → LLM 逐对比对 → 汇总决策 → 执行 ADD/UPDATE/DELETE/NONE),默认关闭,旧路径完全保留
- 新增 LLMDecisionOrchestrator:单对关系判断编排器,输出 duplicate/update/conflict/unrelated 四种关系
- 新增 decision_flow / apply_actions:决策流程主循环与动作落地(新增、更新内容、软删除、跳过)
- 新增 aggregate_decision / decision_validate:汇总规则(按优先级判定动作)与 LLM 输出校验
- 新增 decision model:CandidateSnapshot / ComparisonResult / FinalDecision 等决策层核心类型
- ItemRepo 新增 FindActiveByHash / UpdateContentByID / SoftDeleteByID 三个决策层专用方法
- RAG Runtime / Pipeline / Service 新增 DeleteMemory 向量删除能力,MilvusStore 补充 duplicate collection 错误识别
- Runner 新增 syncVectorDeletes 处理决策层 DELETE 动作的向量清理
- config 新增 decision(enabled/candidateTopK/candidateMinScore/fallbackMode)和 write.mode 配置项,config_loader 增加默认值兜底
- 删除 HANDOFF-RAG复用后续实施计划.md 和旧 log.txt,新增 Log.txt 记录决策流程调试日志
- normalize_facts 导出 HashContent 供决策层复用,audit 新增 update 操作常量

前端:无 仓库:无
2026-04-16 12:11:58 +08:00

136 lines
3.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package utils
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
)
const (
maxTitleLength = 64
maxContentLength = 1000
)
// NormalizeFacts 对候选事实做标准化与过滤。
//
// 步骤:
// 1. 标准化 memory_type 与文本字段,丢弃空值和非法类型;
// 2. 对超长内容截断,避免脏数据污染后续链路;
// 3. 基于“类型+标准化内容”做去重,避免同一轮重复写入。
func NormalizeFacts(candidates []memorymodel.FactCandidate) []memorymodel.NormalizedFact {
if len(candidates) == 0 {
return nil
}
result := make([]memorymodel.NormalizedFact, 0, len(candidates))
seen := make(map[string]struct{}, len(candidates))
for _, candidate := range candidates {
memoryType := memorymodel.NormalizeMemoryType(candidate.MemoryType)
if memoryType == "" {
continue
}
content := normalizeWhitespace(candidate.Content)
if content == "" {
continue
}
content = truncateByRune(content, maxContentLength)
title := normalizeWhitespace(candidate.Title)
if title == "" {
title = truncateByRune(content, maxTitleLength)
}
title = truncateByRune(title, maxTitleLength)
confidence := clamp01(candidate.Confidence)
if confidence == 0 {
confidence = 0.6
}
importance := clamp01(candidate.Importance)
if importance == 0 {
importance = defaultImportanceByType(memoryType)
}
sensitivityLevel := clampInt(candidate.SensitivityLevel, 0, 2)
normalizedContent := strings.ToLower(content)
contentHash := HashContent(memoryType, normalizedContent)
dedupKey := fmt.Sprintf("%s:%s", memoryType, contentHash)
if _, exists := seen[dedupKey]; exists {
continue
}
seen[dedupKey] = struct{}{}
result = append(result, memorymodel.NormalizedFact{
MemoryType: memoryType,
Title: title,
Content: content,
NormalizedContent: normalizedContent,
ContentHash: contentHash,
Confidence: confidence,
Importance: importance,
SensitivityLevel: sensitivityLevel,
IsExplicit: candidate.IsExplicit,
})
}
return result
}
func normalizeWhitespace(raw string) string {
return strings.Join(strings.Fields(strings.TrimSpace(raw)), " ")
}
func truncateByRune(raw string, max int) string {
if max <= 0 {
return ""
}
runes := []rune(raw)
if len(runes) <= max {
return raw
}
return string(runes[:max])
}
func clamp01(v float64) float64 {
if v < 0 {
return 0
}
if v > 1 {
return 1
}
return v
}
func clampInt(v, minValue, maxValue int) int {
if v < minValue {
return minValue
}
if v > maxValue {
return maxValue
}
return v
}
func defaultImportanceByType(memoryType string) float64 {
switch memoryType {
case memorymodel.MemoryTypePreference:
return 0.85
case memorymodel.MemoryTypeConstraint:
return 0.95
case memorymodel.MemoryTypeTodoHint:
return 0.8
default:
return 0.6
}
}
// HashContent 计算记忆内容的去重哈希。
// 算法sha256(memoryType + "::" + normalizedContent)
// 说明:导出此函数是为了让决策层 apply_actions 也能复用同一算法,避免哈希不一致导致去重失效。
func HashContent(memoryType, normalizedContent string) string {
sum := sha256.Sum256([]byte(memoryType + "::" + normalizedContent))
return hex.EncodeToString(sum[:])
}