Version: 0.9.23.dev.260416

后端:
1. Memory 管理面 API 落地(“我的记忆”增删改查 + 恢复)
   - 补齐 List/Get/Create/Update/Delete/Restore 的 handler、请求模型与返回视图
   - 注册 `/api/v1/memory/items*` 路由并接入 MemoryHandler
   - 新增 memory item not found / invalid memory type / invalid memory content 三类管理面错误码
2. Memory Module / Service / Repo 扩展为“可管理 + 可治理”门面
   - 新增 NewModuleWithObserve / ObserveDeps,导出 GetItem / CreateItem / UpdateItem / DeleteItem / RestoreItem / RunDedupCleanup / MemoryObserver / MemoryMetrics
   - 新增手动新增、修改、恢复能力;删除链路切到 SoftDeleteByID;所有管理动作统一事务内写 audit,并桥接向量同步与管理面观测
   - 补齐 CreateItemFields / UpdateItemFields、单条 Create、管理侧字段更新、软删/恢复,以及 dedup 扫描/归档所需 repo 能力
   - 审计操作补齐 archive / restore
3. Memory 读侧与注入侧观测补齐
   - HybridRetrieve 返回 telemetry,统一记录 pinned hit / semantic hit / dedup drop / degraded / RAG fallback,并上报读取命中、去重丢弃、RAG 降级指标
   - AgentService 持有 memory observer / metrics;injectMemoryContext 对读取失败、空注入、成功注入补齐结构化日志与注入计数
4. Worker / 决策 / 向量同步链路治理增强
   - 召回结果显式携带 fallbackMode;hash 精确命中、rag→mysql 降级、最终动作统一写入决策观测
   - 接入 vectorSyncer / observer / metrics;为 job 重试、任务成功/失败、决策分布与 fallback 补齐打点;向量 upsert/delete 统一改走公共 Syncer,并收敛 parseMemoryID 解析逻辑
5. 启动层接入 Memory 观测依赖
   - 启动时创建 LoggerObserver + MetricsRegistry,并通过 NewModuleWithObserve 注入 memory 模块
前端:无
仓库:无
This commit is contained in:
Losita
2026-04-16 19:34:32 +08:00
parent a1b2ffedb8
commit fad3aed30a
23 changed files with 2527 additions and 121 deletions

View File

@@ -0,0 +1,257 @@
package cleanup
import (
"context"
"errors"
"strconv"
"strings"
"time"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
"github.com/LoveLosita/smartflow/backend/model"
"gorm.io/gorm"
)
// DedupRunner 负责执行一次离线记忆去重治理。
//
// 职责边界:
// 1. 只处理“active + content_hash 非空”的重复组;
// 2. 只负责 archive + audit + 向量删除桥接,不负责自动定时调度;
// 3. 支持 dry-run便于上线初期先观察治理结果再正式落库。
type DedupRunner struct {
db *gorm.DB
itemRepo *memoryrepo.ItemRepo
auditRepo *memoryrepo.AuditRepo
vectorSyncer *memoryvectorsync.Syncer
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
}
func NewDedupRunner(
db *gorm.DB,
itemRepo *memoryrepo.ItemRepo,
auditRepo *memoryrepo.AuditRepo,
vectorSyncer *memoryvectorsync.Syncer,
observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder,
) *DedupRunner {
if observer == nil {
observer = memoryobserve.NewNopObserver()
}
if metrics == nil {
metrics = memoryobserve.NewNopMetrics()
}
return &DedupRunner{
db: db,
itemRepo: itemRepo,
auditRepo: auditRepo,
vectorSyncer: vectorSyncer,
observer: observer,
metrics: metrics,
}
}
// Run 执行一次离线去重治理。
func (r *DedupRunner) Run(ctx context.Context, req model.MemoryDedupCleanupRequest) (model.MemoryDedupCleanupResult, error) {
result := model.MemoryDedupCleanupResult{
DryRun: req.DryRun,
}
if r == nil || r.db == nil || r.itemRepo == nil || r.auditRepo == nil {
return result, errors.New("memory dedup runner is not initialized")
}
items, err := r.itemRepo.ListActiveItemsForDedup(ctx, req.UserID, req.Limit)
if err != nil {
r.recordDedupObserve(ctx, req, result, false, err)
return result, err
}
groups := groupDuplicateItems(items)
result.ScannedGroupCount = len(groups)
if len(groups) == 0 {
r.recordDedupObserve(ctx, req, result, true, nil)
return result, nil
}
for _, group := range groups {
decision := DecideDedupGroup(group)
if decision.Keep.ID > 0 {
result.KeptCount++
}
if len(decision.Archive) == 0 {
continue
}
result.DedupedGroupCount++
archiveIDs := collectDedupIDs(decision.Archive)
result.ArchivedCount += len(archiveIDs)
result.ArchivedIDs = append(result.ArchivedIDs, archiveIDs...)
if req.DryRun {
continue
}
now := time.Now()
txErr := r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
itemRepo := r.itemRepo.WithTx(tx)
auditRepo := r.auditRepo.WithTx(tx)
if archiveErr := itemRepo.ArchiveByIDsAt(ctx, archiveIDs, now); archiveErr != nil {
return archiveErr
}
for _, item := range decision.Archive {
after := item
after.Status = model.MemoryItemStatusArchived
after.UpdatedAt = &now
after.VectorStatus = "pending"
audit := memoryutils.BuildItemAuditLog(
item.ID,
item.UserID,
memoryutils.AuditOperationArchive,
normalizeCleanupOperator(req.OperatorType),
normalizeCleanupReason(req.Reason),
&item,
&after,
)
if createErr := auditRepo.Create(ctx, audit); createErr != nil {
return createErr
}
}
return nil
})
if txErr != nil {
r.recordDedupObserve(ctx, req, result, false, txErr)
return result, txErr
}
r.vectorSyncer.Delete(ctx, "", archiveIDs)
r.metrics.AddCounter(memoryobserve.MetricCleanupArchivedTotal, int64(len(archiveIDs)), map[string]string{
"dry_run": "false",
})
}
r.recordDedupObserve(ctx, req, result, true, nil)
return result, nil
}
func groupDuplicateItems(items []model.MemoryItem) [][]model.MemoryItem {
if len(items) == 0 {
return nil
}
result := make([][]model.MemoryItem, 0)
currentGroup := make([]model.MemoryItem, 0, 2)
currentKey := ""
for _, item := range items {
key := dedupGroupKey(item)
if key == "" {
continue
}
if currentKey == "" || currentKey != key {
if len(currentGroup) > 1 {
copied := make([]model.MemoryItem, len(currentGroup))
copy(copied, currentGroup)
result = append(result, copied)
}
currentKey = key
currentGroup = currentGroup[:0]
}
currentGroup = append(currentGroup, item)
}
if len(currentGroup) > 1 {
copied := make([]model.MemoryItem, len(currentGroup))
copy(copied, currentGroup)
result = append(result, copied)
}
return result
}
func dedupGroupKey(item model.MemoryItem) string {
contentHash := strings.TrimSpace(derefString(item.ContentHash))
if item.UserID <= 0 || strings.TrimSpace(item.MemoryType) == "" || contentHash == "" {
return ""
}
return strings.Join([]string{
strconv.Itoa(item.UserID),
item.MemoryType,
contentHash,
}, "::")
}
func collectDedupIDs(items []model.MemoryItem) []int64 {
ids := make([]int64, 0, len(items))
for _, item := range items {
if item.ID <= 0 {
continue
}
ids = append(ids, item.ID)
}
return ids
}
func normalizeCleanupOperator(operatorType string) string {
operatorType = strings.TrimSpace(operatorType)
if operatorType == "" {
return "system"
}
return memoryutils.NormalizeOperatorType(operatorType)
}
func normalizeCleanupReason(reason string) string {
reason = strings.TrimSpace(reason)
if reason == "" {
return "离线 dedup 治理归档重复记忆"
}
return reason
}
func derefString(value *string) string {
if value == nil {
return ""
}
return strings.TrimSpace(*value)
}
func (r *DedupRunner) recordDedupObserve(
ctx context.Context,
req model.MemoryDedupCleanupRequest,
result model.MemoryDedupCleanupResult,
success bool,
err error,
) {
if r == nil {
return
}
status := "success"
level := memoryobserve.LevelInfo
if !success || err != nil {
status = "error"
level = memoryobserve.LevelWarn
}
r.observer.Observe(ctx, memoryobserve.Event{
Level: level,
Component: memoryobserve.ComponentCleanup,
Operation: memoryobserve.OperationDedup,
Fields: map[string]any{
"user_id": req.UserID,
"limit": req.Limit,
"dry_run": req.DryRun,
"scanned_group_count": result.ScannedGroupCount,
"deduped_group_count": result.DedupedGroupCount,
"archived_count": result.ArchivedCount,
"success": success && err == nil,
"error": err,
"error_code": memoryobserve.ClassifyError(err),
},
})
r.metrics.AddCounter(memoryobserve.MetricCleanupRunTotal, 1, map[string]string{
"operation": "dedup",
"status": status,
})
}