Version: 0.9.65.dev.260503

后端:
1. 阶段 1.5/1.6
收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现;
2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
Losita
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions

View File

@@ -5,8 +5,6 @@ import (
"errors"
"log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorycleanup "github.com/LoveLosita/smartflow/backend/memory/cleanup"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
@@ -16,6 +14,8 @@ import (
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker"
"github.com/LoveLosita/smartflow/backend/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm"
)
@@ -28,8 +28,8 @@ import (
type Module struct {
db *gorm.DB
cfg memorymodel.Config
llmClient *infrallm.Client
ragRuntime infrarag.Runtime
llmClient *llmservice.Client
ragRuntime ragservice.Runtime
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
@@ -64,15 +64,15 @@ func LoadConfigFromViper() memorymodel.Config {
// 2. llmClient 允许为 nil此时写入链路会自动回退到本地 fallback 抽取;
// 3. ragRuntime 允许为 nil此时读取/向量同步自动回退旧逻辑;
// 4. 若后续接入统一 DI 容器,也应优先注册这个 Module而不是把内部 repo/service 继续向外泄漏。
func NewModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module {
func NewModule(db *gorm.DB, llmClient *llmservice.Client, ragRuntime ragservice.Runtime, cfg memorymodel.Config) *Module {
return NewModuleWithObserve(db, llmClient, ragRuntime, cfg, ObserveDeps{})
}
// NewModuleWithObserve 创建带观测依赖的 memory 模块门面。
func NewModuleWithObserve(
db *gorm.DB,
llmClient *infrallm.Client,
ragRuntime infrarag.Runtime,
llmClient *llmservice.Client,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
deps ObserveDeps,
) *Module {
@@ -228,8 +228,8 @@ func (m *Module) StartWorker(ctx context.Context) {
func wireModule(
db *gorm.DB,
llmClient *infrallm.Client,
ragRuntime infrarag.Runtime,
llmClient *llmservice.Client,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
deps ObserveDeps,
) *Module {

View File

@@ -6,8 +6,8 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const defaultDecisionCompareMaxTokens = 600
@@ -19,13 +19,13 @@ const defaultDecisionCompareMaxTokens = 600
// 2. LLM 只输出 relation关系类型不输出 action不输出 target ID
// 3. LLM 调用失败时返回 error由上层决定是否视为 unrelated。
type LLMDecisionOrchestrator struct {
client *infrallm.Client
client *llmservice.Client
cfg memorymodel.Config
logger *log.Logger
}
// NewLLMDecisionOrchestrator 构造决策比对编排器。
func NewLLMDecisionOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
func NewLLMDecisionOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
return &LLMDecisionOrchestrator{
client: client,
cfg: cfg,
@@ -52,14 +52,14 @@ func (o *LLMDecisionOrchestrator) Compare(
systemPrompt := buildDecisionCompareSystemPrompt()
userPrompt := buildDecisionCompareUserPrompt(fact, candidate)
messages := infrallm.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
messages := llmservice.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
// 2. 调用 LLM 做结构化输出,温度用低值保证判断稳定。
resp, _, err := infrallm.GenerateJSON[decisionCompareResponse](
resp, _, err := llmservice.GenerateJSON[decisionCompareResponse](
ctx,
o.client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.1,
MaxTokens: defaultDecisionCompareMaxTokens,
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -127,9 +127,9 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
}
// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode {
func resolveMemoryThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled {
return infrallm.ThinkingModeEnabled
return llmservice.ThinkingModeEnabled
}
return infrallm.ThinkingModeDisabled
return llmservice.ThinkingModeDisabled
}

View File

@@ -7,9 +7,9 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -24,13 +24,13 @@ const (
// 2. 不负责落库,不负责任务状态机推进;
// 3. 当 LLM 不可用或输出异常时,回退到保守的本地抽取,保证链路不完全断。
type LLMWriteOrchestrator struct {
client *infrallm.Client
client *llmservice.Client
cfg memorymodel.Config
logger *log.Logger
}
// NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。
func NewLLMWriteOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
func NewLLMWriteOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
return &LLMWriteOrchestrator{
client: client,
cfg: cfg,
@@ -54,17 +54,17 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
return fallbackNormalizedFacts(payload), nil
}
messages := infrallm.BuildSystemUserMessages(
messages := llmservice.BuildSystemUserMessages(
buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt),
nil,
buildMemoryExtractUserPrompt(payload),
)
resp, rawResult, err := infrallm.GenerateJSON[memoryExtractResponse](
resp, rawResult, err := llmservice.GenerateJSON[memoryExtractResponse](
ctx,
o.client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: clampTemperature(o.cfg.LLMTemperature),
MaxTokens: defaultMemoryExtractMaxTokens,
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -319,7 +319,7 @@ func isSkipIntent(intent string) bool {
}
}
func truncateForLog(raw *infrallm.TextResult) string {
func truncateForLog(raw *llmservice.TextResult) string {
if raw == nil {
return ""
}

View File

@@ -3,8 +3,8 @@ package service
import (
"time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
// buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。
@@ -53,8 +53,8 @@ func buildReadScopedRAGRequest(
req memorymodel.RetrieveRequest,
topK int,
threshold float64,
) infrarag.MemoryRetrieveRequest {
return infrarag.MemoryRetrieveRequest{
) ragservice.MemoryRetrieveRequest {
return ragservice.MemoryRetrieveRequest{
Query: req.Query,
TopK: topK,
Threshold: threshold,

View File

@@ -8,12 +8,12 @@ import (
"strings"
"time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
const (
@@ -30,7 +30,7 @@ const (
type ReadService struct {
itemRepo *memoryrepo.ItemRepo
settingsRepo *memoryrepo.SettingsRepo
ragRuntime infrarag.Runtime
ragRuntime ragservice.Runtime
cfg memorymodel.Config
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
@@ -57,7 +57,7 @@ type semanticRetrieveTelemetry struct {
func NewReadService(
itemRepo *memoryrepo.ItemRepo,
settingsRepo *memoryrepo.SettingsRepo,
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder,
@@ -347,7 +347,7 @@ func collectMemoryIDs(items []model.MemoryItem) []int64 {
return ids
}
func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) {
func buildMemoryDTOFromRetrieveHit(hit ragservice.RetrieveHit) (memorymodel.ItemDTO, int64) {
memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
metadata := hit.Metadata
content := strings.TrimSpace(hit.Text)

View File

@@ -6,10 +6,10 @@ import (
"log"
"strings"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
// Syncer 负责 memory_items 与向量库之间的最小桥接。
@@ -19,7 +19,7 @@ import (
// 2. 不负责决定哪些记忆该写、该删、该恢复,这些决策仍由上游 service/worker/cleanup 控制;
// 3. 同步失败时只回写 vector_status 并打观测,不反向回滚业务事务,避免把在线链路拖成强依赖。
type Syncer struct {
ragRuntime infrarag.Runtime
ragRuntime ragservice.Runtime
itemRepo *memoryrepo.ItemRepo
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
@@ -27,7 +27,7 @@ type Syncer struct {
}
func NewSyncer(
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
itemRepo *memoryrepo.ItemRepo,
observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder,
@@ -53,9 +53,9 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
return
}
requestItems := make([]infrarag.MemoryIngestItem, 0, len(items))
requestItems := make([]ragservice.MemoryIngestItem, 0, len(items))
for _, item := range items {
requestItems = append(requestItems, infrarag.MemoryIngestItem{
requestItems = append(requestItems, ragservice.MemoryIngestItem{
MemoryID: item.ID,
UserID: item.UserID,
ConversationID: strValue(item.ConversationID),
@@ -76,7 +76,7 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
result, err := s.ragRuntime.IngestMemory(memoryobserve.WithFields(ctx, map[string]any{
"trace_id": traceID,
}), infrarag.MemoryIngestRequest{
}), ragservice.MemoryIngestRequest{
TraceID: traceID,
Action: "add",
Items: requestItems,

View File

@@ -4,11 +4,11 @@ import (
"context"
"fmt"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm"
)
@@ -192,7 +192,7 @@ func (r *Runner) recallCandidates(
) candidateRecallResult {
// 1. 优先使用 Milvus 向量语义召回。
if r.ragRuntime != nil {
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, ragservice.MemoryRetrieveRequest{
Query: fact.Content,
TopK: r.cfg.DecisionCandidateTopK,
Threshold: r.cfg.DecisionCandidateMinScore,
@@ -235,7 +235,7 @@ func (r *Runner) recallCandidates(
// 1. 从 DocumentID格式 memory:{id})解析出 mysql_id
// 2. 从 metadata 提取 title 和 memory_type
// 3. 跳过无法解析 DocumentID 的结果。
func (r *Runner) buildCandidatesFromRAG(hits []infrarag.RetrieveHit) []memorymodel.CandidateSnapshot {
func (r *Runner) buildCandidatesFromRAG(hits []ragservice.RetrieveHit) []memorymodel.CandidateSnapshot {
candidates := make([]memorymodel.CandidateSnapshot, 0, len(hits))
for _, hit := range hits {
memoryID := parseMemoryID(hit.DocumentID)

View File

@@ -9,7 +9,6 @@ import (
"strings"
"time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator"
@@ -17,6 +16,7 @@ import (
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm"
)
@@ -41,7 +41,7 @@ type Runner struct {
auditRepo *memoryrepo.AuditRepo
settingsRepo *memoryrepo.SettingsRepo
extractor Extractor
ragRuntime infrarag.Runtime
ragRuntime ragservice.Runtime
logger *log.Logger
vectorSyncer *memoryvectorsync.Syncer
observer memoryobserve.Observer
@@ -63,7 +63,7 @@ func NewRunner(
auditRepo *memoryrepo.AuditRepo,
settingsRepo *memoryrepo.SettingsRepo,
extractor Extractor,
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
decisionOrchestrator *memoryorchestrator.LLMDecisionOrchestrator,
vectorSyncer *memoryvectorsync.Syncer,