Version: 0.9.76.dev.260505
后端: 1.阶段 6 agent / memory 服务化收口 - 新增 cmd/agent 独立进程入口,承载 agent zrpc server、agent outbox relay / consumer 和运行时依赖初始化 - 补齐 services/agent/rpc 的 Chat stream 与 conversation meta/list/timeline、schedule-preview、context-stats、schedule-state unary RPC - 新增 gateway/client/agent 与 shared/contracts/agent,将 /api/v1/agent chat 和非 chat 门面切到 agent zrpc - 收缩 gateway 本地 AgentService 装配,双 RPC 开关开启时不再初始化本地 agent 编排、LLM、RAG 和 memory reader fallback - 将 backend/memory 物理迁入 services/memory,私有实现收入 internal,保留 module/model/observe 作为 memory 服务门面 - 调整 memory outbox、memory reader 和 agent 记忆渲染链路的 import 与服务边界,cmd/memory 独占 memory worker / consumer - 关闭 gateway 侧 agent outbox worker 所有权,agent relay / consumer 由 cmd/agent 独占,gateway 仅保留 HTTP/SSE 门面与迁移期开关回退 - 更新阶段 6 文档,记录 agent / memory 当前切流点、smoke 结果,以及 backend/client 与 gateway/shared 的目录收口口径
This commit is contained in:
73
backend/services/memory/internal/cleanup/dedup_policy.go
Normal file
73
backend/services/memory/internal/cleanup/dedup_policy.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package cleanup
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
)
|
||||
|
||||
const dedupRecentTieWindow = 24 * time.Hour
|
||||
|
||||
// DedupDecision 描述单个重复组的治理结论。
|
||||
type DedupDecision struct {
|
||||
Keep model.MemoryItem
|
||||
Archive []model.MemoryItem
|
||||
}
|
||||
|
||||
// DecideDedupGroup 决定一组重复 active 记忆中“保留谁、归档谁”。
|
||||
//
|
||||
// 步骤化说明:
|
||||
// 1. 先按“最近更新时间”判断谁更值得保留,符合治理计划里的“优先保留最近更新”;
|
||||
// 2. 若更新时间非常接近,再比较 confidence/importance,避免刚好相差几秒就误保留低质量版本;
|
||||
// 3. 最后用主键逆序兜底,保证同组治理结果稳定可复现。
|
||||
func DecideDedupGroup(items []model.MemoryItem) DedupDecision {
|
||||
if len(items) == 0 {
|
||||
return DedupDecision{}
|
||||
}
|
||||
|
||||
ordered := make([]model.MemoryItem, len(items))
|
||||
copy(ordered, items)
|
||||
sort.SliceStable(ordered, func(i, j int) bool {
|
||||
return preferDedupKeep(ordered[i], ordered[j])
|
||||
})
|
||||
|
||||
return DedupDecision{
|
||||
Keep: ordered[0],
|
||||
Archive: ordered[1:],
|
||||
}
|
||||
}
|
||||
|
||||
func preferDedupKeep(left model.MemoryItem, right model.MemoryItem) bool {
|
||||
leftTime := dedupBaseTime(left)
|
||||
rightTime := dedupBaseTime(right)
|
||||
|
||||
diff := leftTime.Sub(rightTime)
|
||||
if diff < 0 {
|
||||
diff = -diff
|
||||
}
|
||||
if diff > dedupRecentTieWindow {
|
||||
return leftTime.After(rightTime)
|
||||
}
|
||||
|
||||
if left.Confidence != right.Confidence {
|
||||
return left.Confidence > right.Confidence
|
||||
}
|
||||
if left.Importance != right.Importance {
|
||||
return left.Importance > right.Importance
|
||||
}
|
||||
if !leftTime.Equal(rightTime) {
|
||||
return leftTime.After(rightTime)
|
||||
}
|
||||
return left.ID > right.ID
|
||||
}
|
||||
|
||||
func dedupBaseTime(item model.MemoryItem) time.Time {
|
||||
if item.UpdatedAt != nil {
|
||||
return *item.UpdatedAt
|
||||
}
|
||||
if item.CreatedAt != nil {
|
||||
return *item.CreatedAt
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
257
backend/services/memory/internal/cleanup/dedup_runner.go
Normal file
257
backend/services/memory/internal/cleanup/dedup_runner.go
Normal file
@@ -0,0 +1,257 @@
|
||||
package cleanup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
memoryrepo "github.com/LoveLosita/smartflow/backend/services/memory/internal/repo"
|
||||
memoryutils "github.com/LoveLosita/smartflow/backend/services/memory/internal/utils"
|
||||
memoryvectorsync "github.com/LoveLosita/smartflow/backend/services/memory/internal/vectorsync"
|
||||
memoryobserve "github.com/LoveLosita/smartflow/backend/services/memory/observe"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// DedupRunner 负责执行一次离线记忆去重治理。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只处理“active + content_hash 非空”的重复组;
|
||||
// 2. 只负责 archive + audit + 向量删除桥接,不负责自动定时调度;
|
||||
// 3. 支持 dry-run,便于上线初期先观察治理结果再正式落库。
|
||||
type DedupRunner struct {
|
||||
db *gorm.DB
|
||||
itemRepo *memoryrepo.ItemRepo
|
||||
auditRepo *memoryrepo.AuditRepo
|
||||
vectorSyncer *memoryvectorsync.Syncer
|
||||
observer memoryobserve.Observer
|
||||
metrics memoryobserve.MetricsRecorder
|
||||
}
|
||||
|
||||
func NewDedupRunner(
|
||||
db *gorm.DB,
|
||||
itemRepo *memoryrepo.ItemRepo,
|
||||
auditRepo *memoryrepo.AuditRepo,
|
||||
vectorSyncer *memoryvectorsync.Syncer,
|
||||
observer memoryobserve.Observer,
|
||||
metrics memoryobserve.MetricsRecorder,
|
||||
) *DedupRunner {
|
||||
if observer == nil {
|
||||
observer = memoryobserve.NewNopObserver()
|
||||
}
|
||||
if metrics == nil {
|
||||
metrics = memoryobserve.NewNopMetrics()
|
||||
}
|
||||
return &DedupRunner{
|
||||
db: db,
|
||||
itemRepo: itemRepo,
|
||||
auditRepo: auditRepo,
|
||||
vectorSyncer: vectorSyncer,
|
||||
observer: observer,
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
// Run 执行一次离线去重治理。
|
||||
func (r *DedupRunner) Run(ctx context.Context, req model.MemoryDedupCleanupRequest) (model.MemoryDedupCleanupResult, error) {
|
||||
result := model.MemoryDedupCleanupResult{
|
||||
DryRun: req.DryRun,
|
||||
}
|
||||
if r == nil || r.db == nil || r.itemRepo == nil || r.auditRepo == nil {
|
||||
return result, errors.New("memory dedup runner is not initialized")
|
||||
}
|
||||
|
||||
items, err := r.itemRepo.ListActiveItemsForDedup(ctx, req.UserID, req.Limit)
|
||||
if err != nil {
|
||||
r.recordDedupObserve(ctx, req, result, false, err)
|
||||
return result, err
|
||||
}
|
||||
|
||||
groups := groupDuplicateItems(items)
|
||||
result.ScannedGroupCount = len(groups)
|
||||
if len(groups) == 0 {
|
||||
r.recordDedupObserve(ctx, req, result, true, nil)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
for _, group := range groups {
|
||||
decision := DecideDedupGroup(group)
|
||||
if decision.Keep.ID > 0 {
|
||||
result.KeptCount++
|
||||
}
|
||||
if len(decision.Archive) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
result.DedupedGroupCount++
|
||||
archiveIDs := collectDedupIDs(decision.Archive)
|
||||
result.ArchivedCount += len(archiveIDs)
|
||||
result.ArchivedIDs = append(result.ArchivedIDs, archiveIDs...)
|
||||
if req.DryRun {
|
||||
continue
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
txErr := r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||||
itemRepo := r.itemRepo.WithTx(tx)
|
||||
auditRepo := r.auditRepo.WithTx(tx)
|
||||
|
||||
if archiveErr := itemRepo.ArchiveByIDsAt(ctx, archiveIDs, now); archiveErr != nil {
|
||||
return archiveErr
|
||||
}
|
||||
|
||||
for _, item := range decision.Archive {
|
||||
after := item
|
||||
after.Status = model.MemoryItemStatusArchived
|
||||
after.UpdatedAt = &now
|
||||
after.VectorStatus = "pending"
|
||||
|
||||
audit := memoryutils.BuildItemAuditLog(
|
||||
item.ID,
|
||||
item.UserID,
|
||||
memoryutils.AuditOperationArchive,
|
||||
normalizeCleanupOperator(req.OperatorType),
|
||||
normalizeCleanupReason(req.Reason),
|
||||
&item,
|
||||
&after,
|
||||
)
|
||||
if createErr := auditRepo.Create(ctx, audit); createErr != nil {
|
||||
return createErr
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if txErr != nil {
|
||||
r.recordDedupObserve(ctx, req, result, false, txErr)
|
||||
return result, txErr
|
||||
}
|
||||
|
||||
r.vectorSyncer.Delete(ctx, "", archiveIDs)
|
||||
r.metrics.AddCounter(memoryobserve.MetricCleanupArchivedTotal, int64(len(archiveIDs)), map[string]string{
|
||||
"dry_run": "false",
|
||||
})
|
||||
}
|
||||
|
||||
r.recordDedupObserve(ctx, req, result, true, nil)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func groupDuplicateItems(items []model.MemoryItem) [][]model.MemoryItem {
|
||||
if len(items) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make([][]model.MemoryItem, 0)
|
||||
currentGroup := make([]model.MemoryItem, 0, 2)
|
||||
currentKey := ""
|
||||
for _, item := range items {
|
||||
key := dedupGroupKey(item)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
if currentKey == "" || currentKey != key {
|
||||
if len(currentGroup) > 1 {
|
||||
copied := make([]model.MemoryItem, len(currentGroup))
|
||||
copy(copied, currentGroup)
|
||||
result = append(result, copied)
|
||||
}
|
||||
currentKey = key
|
||||
currentGroup = currentGroup[:0]
|
||||
}
|
||||
currentGroup = append(currentGroup, item)
|
||||
}
|
||||
if len(currentGroup) > 1 {
|
||||
copied := make([]model.MemoryItem, len(currentGroup))
|
||||
copy(copied, currentGroup)
|
||||
result = append(result, copied)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func dedupGroupKey(item model.MemoryItem) string {
|
||||
contentHash := strings.TrimSpace(derefString(item.ContentHash))
|
||||
if item.UserID <= 0 || strings.TrimSpace(item.MemoryType) == "" || contentHash == "" {
|
||||
return ""
|
||||
}
|
||||
return strings.Join([]string{
|
||||
strconv.Itoa(item.UserID),
|
||||
item.MemoryType,
|
||||
contentHash,
|
||||
}, "::")
|
||||
}
|
||||
|
||||
func collectDedupIDs(items []model.MemoryItem) []int64 {
|
||||
ids := make([]int64, 0, len(items))
|
||||
for _, item := range items {
|
||||
if item.ID <= 0 {
|
||||
continue
|
||||
}
|
||||
ids = append(ids, item.ID)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func normalizeCleanupOperator(operatorType string) string {
|
||||
operatorType = strings.TrimSpace(operatorType)
|
||||
if operatorType == "" {
|
||||
return "system"
|
||||
}
|
||||
return memoryutils.NormalizeOperatorType(operatorType)
|
||||
}
|
||||
|
||||
func normalizeCleanupReason(reason string) string {
|
||||
reason = strings.TrimSpace(reason)
|
||||
if reason == "" {
|
||||
return "离线 dedup 治理归档重复记忆"
|
||||
}
|
||||
return reason
|
||||
}
|
||||
|
||||
func derefString(value *string) string {
|
||||
if value == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(*value)
|
||||
}
|
||||
|
||||
func (r *DedupRunner) recordDedupObserve(
|
||||
ctx context.Context,
|
||||
req model.MemoryDedupCleanupRequest,
|
||||
result model.MemoryDedupCleanupResult,
|
||||
success bool,
|
||||
err error,
|
||||
) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
status := "success"
|
||||
level := memoryobserve.LevelInfo
|
||||
if !success || err != nil {
|
||||
status = "error"
|
||||
level = memoryobserve.LevelWarn
|
||||
}
|
||||
|
||||
r.observer.Observe(ctx, memoryobserve.Event{
|
||||
Level: level,
|
||||
Component: memoryobserve.ComponentCleanup,
|
||||
Operation: memoryobserve.OperationDedup,
|
||||
Fields: map[string]any{
|
||||
"user_id": req.UserID,
|
||||
"limit": req.Limit,
|
||||
"dry_run": req.DryRun,
|
||||
"scanned_group_count": result.ScannedGroupCount,
|
||||
"deduped_group_count": result.DedupedGroupCount,
|
||||
"archived_count": result.ArchivedCount,
|
||||
"success": success && err == nil,
|
||||
"error": err,
|
||||
"error_code": memoryobserve.ClassifyError(err),
|
||||
},
|
||||
})
|
||||
r.metrics.AddCounter(memoryobserve.MetricCleanupRunTotal, 1, map[string]string{
|
||||
"operation": "dedup",
|
||||
"status": status,
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user