Version: 0.9.13.dev.260410
后端: 1. Memory Day1 链路打通(chat_history -> outbox -> memory_jobs) - 更新 service/events/chat_history_persist.go:聊天消息落库同事务追加 memory.extract.requested 事件(仅 user 消息,失败回滚后由 outbox 重试) - 新建 service/events/memory_extract_requested.go:消费 memory.extract.requested 并幂等入队 memory_jobs,补齐 payload 校验、文本截断与 idempotency key - 更新 cmd/start.go:注册 RegisterMemoryExtractRequestedHandler 2. Memory 模块骨架落地(先跑通状态机,再接入真实抽取) - 新建 memory/model、repo、service、orchestrator、worker、utils 目录与 Day1 mock 抽取执行链 - 新建 model/memory.go:补齐 memory_items / memory_jobs / memory_audit_logs / memory_user_settings 与事件 payload 模型 - 更新 inits/mysql.go:接入 4 张 memory 相关表 AutoMigrate 3. RAG 复用基础设施预埋(依赖可替换) - 新建 infra/rag:core pipeline + chunk/embed/retrieve/rerank/store/corpus/config 分层实现 - 默认接入 MockEmbedder + InMemoryStore,预留 Milvus / Eino 适配实现 - 新增 infra/rag/RAG复用接口实施计划.md 4. 本地依赖与交接文档同步 - 更新 docker-compose.yml:新增 etcd / minio / milvus / attu 服务与数据卷 - 删除 newAgent/HANDOFF_工具研究与运行态重置.md、newAgent/阶段3_上下文瘦身设计.md - 新增 newAgent/HANDOFF_WebSearch两阶段实施计划.md、memory/HANDOFF-RAG复用后续实施计划.md、memory/README.md 前端:无 仓库:无
This commit is contained in:
13
backend/infra/rag/corpus/common.go
Normal file
13
backend/infra/rag/corpus/common.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package corpus
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func hashLikeText(text string) string {
|
||||
normalized := strings.TrimSpace(strings.ToLower(text))
|
||||
sum := sha256.Sum256([]byte(normalized))
|
||||
return hex.EncodeToString(sum[:8])
|
||||
}
|
||||
149
backend/infra/rag/corpus/memory_corpus.go
Normal file
149
backend/infra/rag/corpus/memory_corpus.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package corpus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
)
|
||||
|
||||
const memoryCorpusName = "memory"
|
||||
|
||||
// MemoryIngestItem 是记忆语料入库项。
|
||||
type MemoryIngestItem struct {
|
||||
MemoryID int64
|
||||
UserID int
|
||||
ConversationID string
|
||||
AssistantID string
|
||||
RunID string
|
||||
MemoryType string
|
||||
Title string
|
||||
Content string
|
||||
SensitivityLevel int
|
||||
TTLAt *time.Time
|
||||
CreatedAt *time.Time
|
||||
}
|
||||
|
||||
// MemoryRetrieveInput 是记忆检索过滤输入。
|
||||
type MemoryRetrieveInput struct {
|
||||
UserID int
|
||||
ConversationID string
|
||||
AssistantID string
|
||||
RunID string
|
||||
MemoryType string
|
||||
}
|
||||
|
||||
// MemoryCorpus 是记忆语料适配器。
|
||||
type MemoryCorpus struct{}
|
||||
|
||||
func NewMemoryCorpus() *MemoryCorpus {
|
||||
return &MemoryCorpus{}
|
||||
}
|
||||
|
||||
func (c *MemoryCorpus) Name() string {
|
||||
return memoryCorpusName
|
||||
}
|
||||
|
||||
func (c *MemoryCorpus) BuildIngestDocuments(_ context.Context, input any) ([]core.SourceDocument, error) {
|
||||
items, err := toMemoryItems(input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := make([]core.SourceDocument, 0, len(items))
|
||||
for _, item := range items {
|
||||
if item.UserID <= 0 {
|
||||
return nil, errors.New("memory ingest item user_id is invalid")
|
||||
}
|
||||
text := strings.TrimSpace(item.Content)
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
docID := fmt.Sprintf("memory:%d", item.MemoryID)
|
||||
if item.MemoryID <= 0 {
|
||||
docID = fmt.Sprintf("memory:uid:%d:%s", item.UserID, hashLikeText(text))
|
||||
}
|
||||
metadata := map[string]any{
|
||||
"user_id": item.UserID,
|
||||
"conversation_id": strings.TrimSpace(item.ConversationID),
|
||||
"assistant_id": strings.TrimSpace(item.AssistantID),
|
||||
"run_id": strings.TrimSpace(item.RunID),
|
||||
"memory_type": strings.TrimSpace(strings.ToLower(item.MemoryType)),
|
||||
"sensitivity_level": item.SensitivityLevel,
|
||||
}
|
||||
if item.TTLAt != nil {
|
||||
metadata["ttl_at"] = item.TTLAt.Format(time.RFC3339)
|
||||
}
|
||||
createdAt := time.Now()
|
||||
if item.CreatedAt != nil {
|
||||
createdAt = *item.CreatedAt
|
||||
}
|
||||
result = append(result, core.SourceDocument{
|
||||
ID: docID,
|
||||
Text: text,
|
||||
Title: strings.TrimSpace(item.Title),
|
||||
Metadata: metadata,
|
||||
CreatedAt: createdAt,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *MemoryCorpus) BuildRetrieveFilter(_ context.Context, req any) (map[string]any, error) {
|
||||
input, ok := req.(MemoryRetrieveInput)
|
||||
if !ok {
|
||||
if ptr, isPtr := req.(*MemoryRetrieveInput); isPtr && ptr != nil {
|
||||
input = *ptr
|
||||
} else if req == nil {
|
||||
return nil, errors.New("memory retrieve input is nil")
|
||||
} else {
|
||||
return nil, errors.New("invalid memory retrieve input")
|
||||
}
|
||||
}
|
||||
if input.UserID <= 0 {
|
||||
return nil, errors.New("memory retrieve user_id is invalid")
|
||||
}
|
||||
filter := map[string]any{
|
||||
"user_id": input.UserID,
|
||||
}
|
||||
if v := strings.TrimSpace(input.ConversationID); v != "" {
|
||||
filter["conversation_id"] = v
|
||||
}
|
||||
if v := strings.TrimSpace(input.AssistantID); v != "" {
|
||||
filter["assistant_id"] = v
|
||||
}
|
||||
if v := strings.TrimSpace(input.RunID); v != "" {
|
||||
filter["run_id"] = v
|
||||
}
|
||||
if v := strings.TrimSpace(strings.ToLower(input.MemoryType)); v != "" {
|
||||
filter["memory_type"] = v
|
||||
}
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
func toMemoryItems(input any) ([]MemoryIngestItem, error) {
|
||||
switch value := input.(type) {
|
||||
case MemoryIngestItem:
|
||||
return []MemoryIngestItem{value}, nil
|
||||
case *MemoryIngestItem:
|
||||
if value == nil {
|
||||
return nil, errors.New("memory ingest item is nil")
|
||||
}
|
||||
return []MemoryIngestItem{*value}, nil
|
||||
case []MemoryIngestItem:
|
||||
return value, nil
|
||||
case []*MemoryIngestItem:
|
||||
items := make([]MemoryIngestItem, 0, len(value))
|
||||
for _, ptr := range value {
|
||||
if ptr == nil {
|
||||
continue
|
||||
}
|
||||
items = append(items, *ptr)
|
||||
}
|
||||
return items, nil
|
||||
default:
|
||||
return nil, errors.New("invalid memory ingest input")
|
||||
}
|
||||
}
|
||||
163
backend/infra/rag/corpus/web_corpus.go
Normal file
163
backend/infra/rag/corpus/web_corpus.go
Normal file
@@ -0,0 +1,163 @@
|
||||
package corpus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
)
|
||||
|
||||
const webCorpusName = "web"
|
||||
|
||||
// WebIngestItem 是网页语料入库项。
|
||||
type WebIngestItem struct {
|
||||
URL string
|
||||
Title string
|
||||
Content string
|
||||
Snippet string
|
||||
Domain string
|
||||
QueryID string
|
||||
SessionID string
|
||||
PublishedAt *time.Time
|
||||
FetchedAt *time.Time
|
||||
SourceRank int
|
||||
}
|
||||
|
||||
// WebRetrieveInput 是网页检索过滤输入。
|
||||
type WebRetrieveInput struct {
|
||||
QueryID string
|
||||
SessionID string
|
||||
Domain string
|
||||
}
|
||||
|
||||
// WebCorpus 是网页语料适配器。
|
||||
type WebCorpus struct{}
|
||||
|
||||
func NewWebCorpus() *WebCorpus {
|
||||
return &WebCorpus{}
|
||||
}
|
||||
|
||||
func (c *WebCorpus) Name() string {
|
||||
return webCorpusName
|
||||
}
|
||||
|
||||
func (c *WebCorpus) BuildIngestDocuments(_ context.Context, input any) ([]core.SourceDocument, error) {
|
||||
items, err := toWebItems(input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := make([]core.SourceDocument, 0, len(items))
|
||||
for _, item := range items {
|
||||
url := strings.TrimSpace(item.URL)
|
||||
if url == "" {
|
||||
return nil, errors.New("web ingest item url is empty")
|
||||
}
|
||||
|
||||
mainText := buildWebText(item)
|
||||
if strings.TrimSpace(mainText) == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
docID := fmt.Sprintf("web:%s", hashLikeText(url+"|"+mainText))
|
||||
metadata := map[string]any{
|
||||
"url": url,
|
||||
"domain": strings.TrimSpace(item.Domain),
|
||||
"query_id": strings.TrimSpace(item.QueryID),
|
||||
"session_id": strings.TrimSpace(item.SessionID),
|
||||
"source_rank": item.SourceRank,
|
||||
}
|
||||
if item.PublishedAt != nil {
|
||||
metadata["published_at"] = item.PublishedAt.Format(time.RFC3339)
|
||||
}
|
||||
if item.FetchedAt != nil {
|
||||
metadata["fetched_at"] = item.FetchedAt.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
createdAt := time.Now()
|
||||
if item.FetchedAt != nil {
|
||||
createdAt = *item.FetchedAt
|
||||
}
|
||||
result = append(result, core.SourceDocument{
|
||||
ID: docID,
|
||||
Text: mainText,
|
||||
Title: strings.TrimSpace(item.Title),
|
||||
Metadata: metadata,
|
||||
CreatedAt: createdAt,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *WebCorpus) BuildRetrieveFilter(_ context.Context, req any) (map[string]any, error) {
|
||||
input, ok := req.(WebRetrieveInput)
|
||||
if !ok {
|
||||
if ptr, isPtr := req.(*WebRetrieveInput); isPtr && ptr != nil {
|
||||
input = *ptr
|
||||
} else if req == nil {
|
||||
return nil, errors.New("web retrieve input is nil")
|
||||
} else {
|
||||
return nil, errors.New("invalid web retrieve input")
|
||||
}
|
||||
}
|
||||
|
||||
// 1. query_id/session_id 至少要有一个,避免跨问题串数据。
|
||||
queryID := strings.TrimSpace(input.QueryID)
|
||||
sessionID := strings.TrimSpace(input.SessionID)
|
||||
if queryID == "" && sessionID == "" {
|
||||
return nil, errors.New("web retrieve filter requires query_id or session_id")
|
||||
}
|
||||
|
||||
filter := map[string]any{}
|
||||
if queryID != "" {
|
||||
filter["query_id"] = queryID
|
||||
}
|
||||
if sessionID != "" {
|
||||
filter["session_id"] = sessionID
|
||||
}
|
||||
if domain := strings.TrimSpace(input.Domain); domain != "" {
|
||||
filter["domain"] = domain
|
||||
}
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
func toWebItems(input any) ([]WebIngestItem, error) {
|
||||
switch value := input.(type) {
|
||||
case WebIngestItem:
|
||||
return []WebIngestItem{value}, nil
|
||||
case *WebIngestItem:
|
||||
if value == nil {
|
||||
return nil, errors.New("web ingest item is nil")
|
||||
}
|
||||
return []WebIngestItem{*value}, nil
|
||||
case []WebIngestItem:
|
||||
return value, nil
|
||||
case []*WebIngestItem:
|
||||
items := make([]WebIngestItem, 0, len(value))
|
||||
for _, ptr := range value {
|
||||
if ptr == nil {
|
||||
continue
|
||||
}
|
||||
items = append(items, *ptr)
|
||||
}
|
||||
return items, nil
|
||||
default:
|
||||
return nil, errors.New("invalid web ingest input")
|
||||
}
|
||||
}
|
||||
|
||||
func buildWebText(item WebIngestItem) string {
|
||||
parts := make([]string, 0, 3)
|
||||
if title := strings.TrimSpace(item.Title); title != "" {
|
||||
parts = append(parts, title)
|
||||
}
|
||||
if snippet := strings.TrimSpace(item.Snippet); snippet != "" {
|
||||
parts = append(parts, snippet)
|
||||
}
|
||||
if content := strings.TrimSpace(item.Content); content != "" {
|
||||
parts = append(parts, content)
|
||||
}
|
||||
return strings.Join(parts, "\n\n")
|
||||
}
|
||||
Reference in New Issue
Block a user