Version: 0.9.65.dev.260503

后端:
1. 阶段 1.5/1.6
收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现;
2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
Losita
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions

View File

@@ -0,0 +1,13 @@
package corpus
import (
"crypto/sha256"
"encoding/hex"
"strings"
)
func hashLikeText(text string) string {
normalized := strings.TrimSpace(strings.ToLower(text))
sum := sha256.Sum256([]byte(normalized))
return hex.EncodeToString(sum[:8])
}

View File

@@ -0,0 +1,158 @@
package corpus
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
const memoryCorpusName = "memory"
// MemoryIngestItem 是记忆语料入库项。
type MemoryIngestItem struct {
MemoryID int64
UserID int
ConversationID string
AssistantID string
RunID string
MemoryType string
Title string
Content string
Confidence float64
Importance float64
SensitivityLevel int
IsExplicit bool
Status string
TTLAt *time.Time
CreatedAt *time.Time
}
// MemoryRetrieveInput 是记忆检索过滤输入。
type MemoryRetrieveInput struct {
UserID int
ConversationID string
AssistantID string
RunID string
MemoryType string
}
// MemoryCorpus 是记忆语料适配器。
type MemoryCorpus struct{}
func NewMemoryCorpus() *MemoryCorpus {
return &MemoryCorpus{}
}
func (c *MemoryCorpus) Name() string {
return memoryCorpusName
}
func (c *MemoryCorpus) BuildIngestDocuments(_ context.Context, input any) ([]core.SourceDocument, error) {
items, err := toMemoryItems(input)
if err != nil {
return nil, err
}
result := make([]core.SourceDocument, 0, len(items))
for _, item := range items {
if item.UserID <= 0 {
return nil, errors.New("memory ingest item user_id is invalid")
}
text := strings.TrimSpace(item.Content)
if text == "" {
continue
}
docID := fmt.Sprintf("memory:%d", item.MemoryID)
if item.MemoryID <= 0 {
docID = fmt.Sprintf("memory:uid:%d:%s", item.UserID, hashLikeText(text))
}
metadata := map[string]any{
"user_id": item.UserID,
"conversation_id": strings.TrimSpace(item.ConversationID),
"assistant_id": strings.TrimSpace(item.AssistantID),
"run_id": strings.TrimSpace(item.RunID),
"memory_type": strings.TrimSpace(strings.ToLower(item.MemoryType)),
"title": strings.TrimSpace(item.Title),
"confidence": item.Confidence,
"importance": item.Importance,
"sensitivity_level": item.SensitivityLevel,
"is_explicit": item.IsExplicit,
"status": strings.TrimSpace(item.Status),
}
if item.TTLAt != nil {
metadata["ttl_at"] = item.TTLAt.Format(time.RFC3339)
}
createdAt := time.Now()
if item.CreatedAt != nil {
createdAt = *item.CreatedAt
}
result = append(result, core.SourceDocument{
ID: docID,
Text: text,
Title: strings.TrimSpace(item.Title),
Metadata: metadata,
CreatedAt: createdAt,
})
}
return result, nil
}
func (c *MemoryCorpus) BuildRetrieveFilter(_ context.Context, req any) (map[string]any, error) {
input, ok := req.(MemoryRetrieveInput)
if !ok {
if ptr, isPtr := req.(*MemoryRetrieveInput); isPtr && ptr != nil {
input = *ptr
} else if req == nil {
return nil, errors.New("memory retrieve input is nil")
} else {
return nil, errors.New("invalid memory retrieve input")
}
}
if input.UserID <= 0 {
return nil, errors.New("memory retrieve user_id is invalid")
}
filter := map[string]any{
"user_id": input.UserID,
}
if v := strings.TrimSpace(input.ConversationID); v != "" {
filter["conversation_id"] = v
}
if v := strings.TrimSpace(input.AssistantID); v != "" {
filter["assistant_id"] = v
}
if v := strings.TrimSpace(input.RunID); v != "" {
filter["run_id"] = v
}
if v := strings.TrimSpace(strings.ToLower(input.MemoryType)); v != "" {
filter["memory_type"] = v
}
return filter, nil
}
func toMemoryItems(input any) ([]MemoryIngestItem, error) {
switch value := input.(type) {
case MemoryIngestItem:
return []MemoryIngestItem{value}, nil
case *MemoryIngestItem:
if value == nil {
return nil, errors.New("memory ingest item is nil")
}
return []MemoryIngestItem{*value}, nil
case []MemoryIngestItem:
return value, nil
case []*MemoryIngestItem:
items := make([]MemoryIngestItem, 0, len(value))
for _, ptr := range value {
if ptr == nil {
continue
}
items = append(items, *ptr)
}
return items, nil
default:
return nil, errors.New("invalid memory ingest input")
}
}

View File

@@ -0,0 +1,163 @@
package corpus
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
const webCorpusName = "web"
// WebIngestItem 是网页语料入库项。
type WebIngestItem struct {
URL string
Title string
Content string
Snippet string
Domain string
QueryID string
SessionID string
PublishedAt *time.Time
FetchedAt *time.Time
SourceRank int
}
// WebRetrieveInput 是网页检索过滤输入。
type WebRetrieveInput struct {
QueryID string
SessionID string
Domain string
}
// WebCorpus 是网页语料适配器。
type WebCorpus struct{}
func NewWebCorpus() *WebCorpus {
return &WebCorpus{}
}
func (c *WebCorpus) Name() string {
return webCorpusName
}
func (c *WebCorpus) BuildIngestDocuments(_ context.Context, input any) ([]core.SourceDocument, error) {
items, err := toWebItems(input)
if err != nil {
return nil, err
}
result := make([]core.SourceDocument, 0, len(items))
for _, item := range items {
url := strings.TrimSpace(item.URL)
if url == "" {
return nil, errors.New("web ingest item url is empty")
}
mainText := buildWebText(item)
if strings.TrimSpace(mainText) == "" {
continue
}
docID := fmt.Sprintf("web:%s", hashLikeText(url+"|"+mainText))
metadata := map[string]any{
"url": url,
"domain": strings.TrimSpace(item.Domain),
"query_id": strings.TrimSpace(item.QueryID),
"session_id": strings.TrimSpace(item.SessionID),
"source_rank": item.SourceRank,
}
if item.PublishedAt != nil {
metadata["published_at"] = item.PublishedAt.Format(time.RFC3339)
}
if item.FetchedAt != nil {
metadata["fetched_at"] = item.FetchedAt.Format(time.RFC3339)
}
createdAt := time.Now()
if item.FetchedAt != nil {
createdAt = *item.FetchedAt
}
result = append(result, core.SourceDocument{
ID: docID,
Text: mainText,
Title: strings.TrimSpace(item.Title),
Metadata: metadata,
CreatedAt: createdAt,
})
}
return result, nil
}
func (c *WebCorpus) BuildRetrieveFilter(_ context.Context, req any) (map[string]any, error) {
input, ok := req.(WebRetrieveInput)
if !ok {
if ptr, isPtr := req.(*WebRetrieveInput); isPtr && ptr != nil {
input = *ptr
} else if req == nil {
return nil, errors.New("web retrieve input is nil")
} else {
return nil, errors.New("invalid web retrieve input")
}
}
// 1. query_id/session_id 至少要有一个,避免跨问题串数据。
queryID := strings.TrimSpace(input.QueryID)
sessionID := strings.TrimSpace(input.SessionID)
if queryID == "" && sessionID == "" {
return nil, errors.New("web retrieve filter requires query_id or session_id")
}
filter := map[string]any{}
if queryID != "" {
filter["query_id"] = queryID
}
if sessionID != "" {
filter["session_id"] = sessionID
}
if domain := strings.TrimSpace(input.Domain); domain != "" {
filter["domain"] = domain
}
return filter, nil
}
func toWebItems(input any) ([]WebIngestItem, error) {
switch value := input.(type) {
case WebIngestItem:
return []WebIngestItem{value}, nil
case *WebIngestItem:
if value == nil {
return nil, errors.New("web ingest item is nil")
}
return []WebIngestItem{*value}, nil
case []WebIngestItem:
return value, nil
case []*WebIngestItem:
items := make([]WebIngestItem, 0, len(value))
for _, ptr := range value {
if ptr == nil {
continue
}
items = append(items, *ptr)
}
return items, nil
default:
return nil, errors.New("invalid web ingest input")
}
}
func buildWebText(item WebIngestItem) string {
parts := make([]string, 0, 3)
if title := strings.TrimSpace(item.Title); title != "" {
parts = append(parts, title)
}
if snippet := strings.TrimSpace(item.Snippet); snippet != "" {
parts = append(parts, snippet)
}
if content := strings.TrimSpace(item.Content); content != "" {
parts = append(parts, content)
}
return strings.Join(parts, "\n\n")
}