后端: 1. Memory 写入链路新增"召回→比对→汇总"去重决策层 - 新增决策流程:Runner 根据decision.enabled 配置走决策路径(语义召回候选 → Hash 精确命中 → LLM 逐对比对 → 汇总决策 → 执行 ADD/UPDATE/DELETE/NONE),默认关闭,旧路径完全保留 - 新增 LLMDecisionOrchestrator:单对关系判断编排器,输出 duplicate/update/conflict/unrelated 四种关系 - 新增 decision_flow / apply_actions:决策流程主循环与动作落地(新增、更新内容、软删除、跳过) - 新增 aggregate_decision / decision_validate:汇总规则(按优先级判定动作)与 LLM 输出校验 - 新增 decision model:CandidateSnapshot / ComparisonResult / FinalDecision 等决策层核心类型 - ItemRepo 新增 FindActiveByHash / UpdateContentByID / SoftDeleteByID 三个决策层专用方法 - RAG Runtime / Pipeline / Service 新增 DeleteMemory 向量删除能力,MilvusStore 补充 duplicate collection 错误识别 - Runner 新增 syncVectorDeletes 处理决策层 DELETE 动作的向量清理 - config 新增 decision(enabled/candidateTopK/candidateMinScore/fallbackMode)和 write.mode 配置项,config_loader 增加默认值兜底 - 删除 HANDOFF-RAG复用后续实施计划.md 和旧 log.txt,新增 Log.txt 记录决策流程调试日志 - normalize_facts 导出 HashContent 供决策层复用,audit 新增 update 操作常量 前端:无 仓库:无
136 lines
3.3 KiB
Go
136 lines
3.3 KiB
Go
package utils
|
||
|
||
import (
|
||
"crypto/sha256"
|
||
"encoding/hex"
|
||
"fmt"
|
||
"strings"
|
||
|
||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||
)
|
||
|
||
const (
|
||
maxTitleLength = 64
|
||
maxContentLength = 1000
|
||
)
|
||
|
||
// NormalizeFacts 对候选事实做标准化与过滤。
|
||
//
|
||
// 步骤:
|
||
// 1. 标准化 memory_type 与文本字段,丢弃空值和非法类型;
|
||
// 2. 对超长内容截断,避免脏数据污染后续链路;
|
||
// 3. 基于“类型+标准化内容”做去重,避免同一轮重复写入。
|
||
func NormalizeFacts(candidates []memorymodel.FactCandidate) []memorymodel.NormalizedFact {
|
||
if len(candidates) == 0 {
|
||
return nil
|
||
}
|
||
|
||
result := make([]memorymodel.NormalizedFact, 0, len(candidates))
|
||
seen := make(map[string]struct{}, len(candidates))
|
||
for _, candidate := range candidates {
|
||
memoryType := memorymodel.NormalizeMemoryType(candidate.MemoryType)
|
||
if memoryType == "" {
|
||
continue
|
||
}
|
||
|
||
content := normalizeWhitespace(candidate.Content)
|
||
if content == "" {
|
||
continue
|
||
}
|
||
content = truncateByRune(content, maxContentLength)
|
||
|
||
title := normalizeWhitespace(candidate.Title)
|
||
if title == "" {
|
||
title = truncateByRune(content, maxTitleLength)
|
||
}
|
||
title = truncateByRune(title, maxTitleLength)
|
||
|
||
confidence := clamp01(candidate.Confidence)
|
||
if confidence == 0 {
|
||
confidence = 0.6
|
||
}
|
||
importance := clamp01(candidate.Importance)
|
||
if importance == 0 {
|
||
importance = defaultImportanceByType(memoryType)
|
||
}
|
||
sensitivityLevel := clampInt(candidate.SensitivityLevel, 0, 2)
|
||
|
||
normalizedContent := strings.ToLower(content)
|
||
contentHash := HashContent(memoryType, normalizedContent)
|
||
dedupKey := fmt.Sprintf("%s:%s", memoryType, contentHash)
|
||
if _, exists := seen[dedupKey]; exists {
|
||
continue
|
||
}
|
||
seen[dedupKey] = struct{}{}
|
||
|
||
result = append(result, memorymodel.NormalizedFact{
|
||
MemoryType: memoryType,
|
||
Title: title,
|
||
Content: content,
|
||
NormalizedContent: normalizedContent,
|
||
ContentHash: contentHash,
|
||
Confidence: confidence,
|
||
Importance: importance,
|
||
SensitivityLevel: sensitivityLevel,
|
||
IsExplicit: candidate.IsExplicit,
|
||
})
|
||
}
|
||
return result
|
||
}
|
||
|
||
func normalizeWhitespace(raw string) string {
|
||
return strings.Join(strings.Fields(strings.TrimSpace(raw)), " ")
|
||
}
|
||
|
||
func truncateByRune(raw string, max int) string {
|
||
if max <= 0 {
|
||
return ""
|
||
}
|
||
runes := []rune(raw)
|
||
if len(runes) <= max {
|
||
return raw
|
||
}
|
||
return string(runes[:max])
|
||
}
|
||
|
||
func clamp01(v float64) float64 {
|
||
if v < 0 {
|
||
return 0
|
||
}
|
||
if v > 1 {
|
||
return 1
|
||
}
|
||
return v
|
||
}
|
||
|
||
func clampInt(v, minValue, maxValue int) int {
|
||
if v < minValue {
|
||
return minValue
|
||
}
|
||
if v > maxValue {
|
||
return maxValue
|
||
}
|
||
return v
|
||
}
|
||
|
||
func defaultImportanceByType(memoryType string) float64 {
|
||
switch memoryType {
|
||
case memorymodel.MemoryTypePreference:
|
||
return 0.85
|
||
case memorymodel.MemoryTypeConstraint:
|
||
return 0.95
|
||
case memorymodel.MemoryTypeTodoHint:
|
||
return 0.8
|
||
default:
|
||
return 0.6
|
||
}
|
||
}
|
||
|
||
// HashContent 计算记忆内容的去重哈希。
|
||
// 算法:sha256(memoryType + "::" + normalizedContent)
|
||
// 说明:导出此函数是为了让决策层 apply_actions 也能复用同一算法,避免哈希不一致导致去重失效。
|
||
func HashContent(memoryType, normalizedContent string) string {
|
||
sum := sha256.Sum256([]byte(memoryType + "::" + normalizedContent))
|
||
return hex.EncodeToString(sum[:])
|
||
}
|