Version: 0.9.13.dev.260410

后端:
1. Memory Day1 链路打通(chat_history -> outbox -> memory_jobs)
   - 更新 service/events/chat_history_persist.go:聊天消息落库同事务追加 memory.extract.requested 事件(仅 user 消息,失败回滚后由 outbox 重试)
   - 新建 service/events/memory_extract_requested.go:消费 memory.extract.requested 并幂等入队 memory_jobs,补齐 payload 校验、文本截断与 idempotency key
   - 更新 cmd/start.go:注册 RegisterMemoryExtractRequestedHandler
2. Memory 模块骨架落地(先跑通状态机,再接入真实抽取)
   - 新建 memory/model、repo、service、orchestrator、worker、utils 目录与 Day1 mock 抽取执行链
   - 新建 model/memory.go:补齐 memory_items / memory_jobs / memory_audit_logs / memory_user_settings 与事件 payload 模型
   - 更新 inits/mysql.go:接入 4 张 memory 相关表 AutoMigrate
3. RAG 复用基础设施预埋(依赖可替换)
   - 新建 infra/rag:core pipeline + chunk/embed/retrieve/rerank/store/corpus/config 分层实现
   - 默认接入 MockEmbedder + InMemoryStore,预留 Milvus / Eino 适配实现
   - 新增 infra/rag/RAG复用接口实施计划.md
4. 本地依赖与交接文档同步
   - 更新 docker-compose.yml:新增 etcd / minio / milvus / attu 服务与数据卷
   - 删除 newAgent/HANDOFF_工具研究与运行态重置.md、newAgent/阶段3_上下文瘦身设计.md
   - 新增 newAgent/HANDOFF_WebSearch两阶段实施计划.md、memory/HANDOFF-RAG复用后续实施计划.md、memory/README.md
前端:无 仓库:无
This commit is contained in:
LoveLosita
2026-04-10 13:07:54 +08:00
parent ee34d5f111
commit fae162162a
47 changed files with 3244 additions and 1280 deletions

View File

@@ -0,0 +1,13 @@
package corpus
import (
"crypto/sha256"
"encoding/hex"
"strings"
)
func hashLikeText(text string) string {
normalized := strings.TrimSpace(strings.ToLower(text))
sum := sha256.Sum256([]byte(normalized))
return hex.EncodeToString(sum[:8])
}

View File

@@ -0,0 +1,149 @@
package corpus
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
)
const memoryCorpusName = "memory"
// MemoryIngestItem 是记忆语料入库项。
type MemoryIngestItem struct {
MemoryID int64
UserID int
ConversationID string
AssistantID string
RunID string
MemoryType string
Title string
Content string
SensitivityLevel int
TTLAt *time.Time
CreatedAt *time.Time
}
// MemoryRetrieveInput 是记忆检索过滤输入。
type MemoryRetrieveInput struct {
UserID int
ConversationID string
AssistantID string
RunID string
MemoryType string
}
// MemoryCorpus 是记忆语料适配器。
type MemoryCorpus struct{}
func NewMemoryCorpus() *MemoryCorpus {
return &MemoryCorpus{}
}
func (c *MemoryCorpus) Name() string {
return memoryCorpusName
}
func (c *MemoryCorpus) BuildIngestDocuments(_ context.Context, input any) ([]core.SourceDocument, error) {
items, err := toMemoryItems(input)
if err != nil {
return nil, err
}
result := make([]core.SourceDocument, 0, len(items))
for _, item := range items {
if item.UserID <= 0 {
return nil, errors.New("memory ingest item user_id is invalid")
}
text := strings.TrimSpace(item.Content)
if text == "" {
continue
}
docID := fmt.Sprintf("memory:%d", item.MemoryID)
if item.MemoryID <= 0 {
docID = fmt.Sprintf("memory:uid:%d:%s", item.UserID, hashLikeText(text))
}
metadata := map[string]any{
"user_id": item.UserID,
"conversation_id": strings.TrimSpace(item.ConversationID),
"assistant_id": strings.TrimSpace(item.AssistantID),
"run_id": strings.TrimSpace(item.RunID),
"memory_type": strings.TrimSpace(strings.ToLower(item.MemoryType)),
"sensitivity_level": item.SensitivityLevel,
}
if item.TTLAt != nil {
metadata["ttl_at"] = item.TTLAt.Format(time.RFC3339)
}
createdAt := time.Now()
if item.CreatedAt != nil {
createdAt = *item.CreatedAt
}
result = append(result, core.SourceDocument{
ID: docID,
Text: text,
Title: strings.TrimSpace(item.Title),
Metadata: metadata,
CreatedAt: createdAt,
})
}
return result, nil
}
func (c *MemoryCorpus) BuildRetrieveFilter(_ context.Context, req any) (map[string]any, error) {
input, ok := req.(MemoryRetrieveInput)
if !ok {
if ptr, isPtr := req.(*MemoryRetrieveInput); isPtr && ptr != nil {
input = *ptr
} else if req == nil {
return nil, errors.New("memory retrieve input is nil")
} else {
return nil, errors.New("invalid memory retrieve input")
}
}
if input.UserID <= 0 {
return nil, errors.New("memory retrieve user_id is invalid")
}
filter := map[string]any{
"user_id": input.UserID,
}
if v := strings.TrimSpace(input.ConversationID); v != "" {
filter["conversation_id"] = v
}
if v := strings.TrimSpace(input.AssistantID); v != "" {
filter["assistant_id"] = v
}
if v := strings.TrimSpace(input.RunID); v != "" {
filter["run_id"] = v
}
if v := strings.TrimSpace(strings.ToLower(input.MemoryType)); v != "" {
filter["memory_type"] = v
}
return filter, nil
}
func toMemoryItems(input any) ([]MemoryIngestItem, error) {
switch value := input.(type) {
case MemoryIngestItem:
return []MemoryIngestItem{value}, nil
case *MemoryIngestItem:
if value == nil {
return nil, errors.New("memory ingest item is nil")
}
return []MemoryIngestItem{*value}, nil
case []MemoryIngestItem:
return value, nil
case []*MemoryIngestItem:
items := make([]MemoryIngestItem, 0, len(value))
for _, ptr := range value {
if ptr == nil {
continue
}
items = append(items, *ptr)
}
return items, nil
default:
return nil, errors.New("invalid memory ingest input")
}
}

View File

@@ -0,0 +1,163 @@
package corpus
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
)
const webCorpusName = "web"
// WebIngestItem 是网页语料入库项。
type WebIngestItem struct {
URL string
Title string
Content string
Snippet string
Domain string
QueryID string
SessionID string
PublishedAt *time.Time
FetchedAt *time.Time
SourceRank int
}
// WebRetrieveInput 是网页检索过滤输入。
type WebRetrieveInput struct {
QueryID string
SessionID string
Domain string
}
// WebCorpus 是网页语料适配器。
type WebCorpus struct{}
func NewWebCorpus() *WebCorpus {
return &WebCorpus{}
}
func (c *WebCorpus) Name() string {
return webCorpusName
}
func (c *WebCorpus) BuildIngestDocuments(_ context.Context, input any) ([]core.SourceDocument, error) {
items, err := toWebItems(input)
if err != nil {
return nil, err
}
result := make([]core.SourceDocument, 0, len(items))
for _, item := range items {
url := strings.TrimSpace(item.URL)
if url == "" {
return nil, errors.New("web ingest item url is empty")
}
mainText := buildWebText(item)
if strings.TrimSpace(mainText) == "" {
continue
}
docID := fmt.Sprintf("web:%s", hashLikeText(url+"|"+mainText))
metadata := map[string]any{
"url": url,
"domain": strings.TrimSpace(item.Domain),
"query_id": strings.TrimSpace(item.QueryID),
"session_id": strings.TrimSpace(item.SessionID),
"source_rank": item.SourceRank,
}
if item.PublishedAt != nil {
metadata["published_at"] = item.PublishedAt.Format(time.RFC3339)
}
if item.FetchedAt != nil {
metadata["fetched_at"] = item.FetchedAt.Format(time.RFC3339)
}
createdAt := time.Now()
if item.FetchedAt != nil {
createdAt = *item.FetchedAt
}
result = append(result, core.SourceDocument{
ID: docID,
Text: mainText,
Title: strings.TrimSpace(item.Title),
Metadata: metadata,
CreatedAt: createdAt,
})
}
return result, nil
}
func (c *WebCorpus) BuildRetrieveFilter(_ context.Context, req any) (map[string]any, error) {
input, ok := req.(WebRetrieveInput)
if !ok {
if ptr, isPtr := req.(*WebRetrieveInput); isPtr && ptr != nil {
input = *ptr
} else if req == nil {
return nil, errors.New("web retrieve input is nil")
} else {
return nil, errors.New("invalid web retrieve input")
}
}
// 1. query_id/session_id 至少要有一个,避免跨问题串数据。
queryID := strings.TrimSpace(input.QueryID)
sessionID := strings.TrimSpace(input.SessionID)
if queryID == "" && sessionID == "" {
return nil, errors.New("web retrieve filter requires query_id or session_id")
}
filter := map[string]any{}
if queryID != "" {
filter["query_id"] = queryID
}
if sessionID != "" {
filter["session_id"] = sessionID
}
if domain := strings.TrimSpace(input.Domain); domain != "" {
filter["domain"] = domain
}
return filter, nil
}
func toWebItems(input any) ([]WebIngestItem, error) {
switch value := input.(type) {
case WebIngestItem:
return []WebIngestItem{value}, nil
case *WebIngestItem:
if value == nil {
return nil, errors.New("web ingest item is nil")
}
return []WebIngestItem{*value}, nil
case []WebIngestItem:
return value, nil
case []*WebIngestItem:
items := make([]WebIngestItem, 0, len(value))
for _, ptr := range value {
if ptr == nil {
continue
}
items = append(items, *ptr)
}
return items, nil
default:
return nil, errors.New("invalid web ingest input")
}
}
func buildWebText(item WebIngestItem) string {
parts := make([]string, 0, 3)
if title := strings.TrimSpace(item.Title); title != "" {
parts = append(parts, title)
}
if snippet := strings.TrimSpace(item.Snippet); snippet != "" {
parts = append(parts, snippet)
}
if content := strings.TrimSpace(item.Content); content != "" {
parts = append(parts, content)
}
return strings.Join(parts, "\n\n")
}