Version: 0.9.65.dev.260503

后端： 1. 阶段 1.5/1.6 收口 llm-service / rag-service，统一模型出口与检索基础设施入口，清退 backend/infra/llm 与 backend/infra/rag 旧实现； 2. 同步更新相关调用链与微服务迁移计划文档
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions
--- a/backend/memory/module.go
+++ b/backend/memory/module.go
@@ -5,8 +5,6 @@ import (
 	"errors"
 	"log"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
-	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorycleanup "github.com/LoveLosita/smartflow/backend/memory/cleanup"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
@@ -16,6 +14,8 @@ import (
 	memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
 	memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker"
 	"github.com/LoveLosita/smartflow/backend/model"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
+	ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
 	"gorm.io/gorm"
 )

@@ -28,8 +28,8 @@ import (
 type Module struct {
 	db         *gorm.DB
 	cfg        memorymodel.Config
-	llmClient  *infrallm.Client
-	ragRuntime infrarag.Runtime
+	llmClient  *llmservice.Client
+	ragRuntime ragservice.Runtime
 	observer   memoryobserve.Observer
 	metrics    memoryobserve.MetricsRecorder

@@ -64,15 +64,15 @@ func LoadConfigFromViper() memorymodel.Config {
 // 2. llmClient 允许为 nil，此时写入链路会自动回退到本地 fallback 抽取；
 // 3. ragRuntime 允许为 nil，此时读取/向量同步自动回退旧逻辑；
 // 4. 若后续接入统一 DI 容器，也应优先注册这个 Module，而不是把内部 repo/service 继续向外泄漏。
-func NewModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module {
+func NewModule(db *gorm.DB, llmClient *llmservice.Client, ragRuntime ragservice.Runtime, cfg memorymodel.Config) *Module {
 	return NewModuleWithObserve(db, llmClient, ragRuntime, cfg, ObserveDeps{})
 }

 // NewModuleWithObserve 创建带观测依赖的 memory 模块门面。
 func NewModuleWithObserve(
 	db *gorm.DB,
-	llmClient *infrallm.Client,
-	ragRuntime infrarag.Runtime,
+	llmClient *llmservice.Client,
+	ragRuntime ragservice.Runtime,
 	cfg memorymodel.Config,
 	deps ObserveDeps,
 ) *Module {
@@ -228,8 +228,8 @@ func (m *Module) StartWorker(ctx context.Context) {

 func wireModule(
 	db *gorm.DB,
-	llmClient *infrallm.Client,
-	ragRuntime infrarag.Runtime,
+	llmClient *llmservice.Client,
+	ragRuntime ragservice.Runtime,
 	cfg memorymodel.Config,
 	deps ObserveDeps,
 ) *Module {
--- a/backend/memory/orchestrator/llm_decision_orchestrator.go
+++ b/backend/memory/orchestrator/llm_decision_orchestrator.go
@@ -6,8 +6,8 @@ import (
 	"log"
 	"strings"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 )

 const defaultDecisionCompareMaxTokens = 600
@@ -19,13 +19,13 @@ const defaultDecisionCompareMaxTokens = 600
 // 2. LLM 只输出 relation（关系类型），不输出 action，不输出 target ID；
 // 3. LLM 调用失败时返回 error，由上层决定是否视为 unrelated。
 type LLMDecisionOrchestrator struct {
-	client *infrallm.Client
+	client *llmservice.Client
 	cfg    memorymodel.Config
 	logger *log.Logger
 }

 // NewLLMDecisionOrchestrator 构造决策比对编排器。
-func NewLLMDecisionOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
+func NewLLMDecisionOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
 	return &LLMDecisionOrchestrator{
 		client: client,
 		cfg:    cfg,
@@ -52,14 +52,14 @@ func (o *LLMDecisionOrchestrator) Compare(
 	systemPrompt := buildDecisionCompareSystemPrompt()
 	userPrompt := buildDecisionCompareUserPrompt(fact, candidate)

-	messages := infrallm.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
+	messages := llmservice.BuildSystemUserMessages(systemPrompt, nil, userPrompt)

 	// 2. 调用 LLM 做结构化输出，温度用低值保证判断稳定。
-	resp, _, err := infrallm.GenerateJSON[decisionCompareResponse](
+	resp, _, err := llmservice.GenerateJSON[decisionCompareResponse](
 		ctx,
 		o.client,
 		messages,
-		infrallm.GenerateOptions{
+		llmservice.GenerateOptions{
 			Temperature: 0.1,
 			MaxTokens:   defaultDecisionCompareMaxTokens,
 			Thinking:    resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -127,9 +127,9 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
 }

 // resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
-func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode {
+func resolveMemoryThinkingMode(enabled bool) llmservice.ThinkingMode {
 	if enabled {
-		return infrallm.ThinkingModeEnabled
+		return llmservice.ThinkingModeEnabled
 	}
-	return infrallm.ThinkingModeDisabled
+	return llmservice.ThinkingModeDisabled
 }
--- a/backend/memory/orchestrator/llm_write_orchestrator.go
+++ b/backend/memory/orchestrator/llm_write_orchestrator.go
@@ -7,9 +7,9 @@ import (
 	"log"
 	"strings"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 )

 const (
@@ -24,13 +24,13 @@ const (
 // 2. 不负责落库，不负责任务状态机推进；
 // 3. 当 LLM 不可用或输出异常时，回退到保守的本地抽取，保证链路不完全断。
 type LLMWriteOrchestrator struct {
-	client *infrallm.Client
+	client *llmservice.Client
 	cfg    memorymodel.Config
 	logger *log.Logger
 }

 // NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。
-func NewLLMWriteOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
+func NewLLMWriteOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
 	return &LLMWriteOrchestrator{
 		client: client,
 		cfg:    cfg,
@@ -54,17 +54,17 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
 		return fallbackNormalizedFacts(payload), nil
 	}

-	messages := infrallm.BuildSystemUserMessages(
+	messages := llmservice.BuildSystemUserMessages(
 		buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt),
 		nil,
 		buildMemoryExtractUserPrompt(payload),
 	)

-	resp, rawResult, err := infrallm.GenerateJSON[memoryExtractResponse](
+	resp, rawResult, err := llmservice.GenerateJSON[memoryExtractResponse](
 		ctx,
 		o.client,
 		messages,
-		infrallm.GenerateOptions{
+		llmservice.GenerateOptions{
 			Temperature: clampTemperature(o.cfg.LLMTemperature),
 			MaxTokens:   defaultMemoryExtractMaxTokens,
 			Thinking:    resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -319,7 +319,7 @@ func isSkipIntent(intent string) bool {
 	}
 }

-func truncateForLog(raw *infrallm.TextResult) string {
+func truncateForLog(raw *llmservice.TextResult) string {
 	if raw == nil {
 		return ""
 	}
--- a/backend/memory/service/read_scope.go
+++ b/backend/memory/service/read_scope.go
@@ -3,8 +3,8 @@ package service
 import (
 	"time"

-	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
+	ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
 )

 // buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。
@@ -53,8 +53,8 @@ func buildReadScopedRAGRequest(
 	req memorymodel.RetrieveRequest,
 	topK int,
 	threshold float64,
-) infrarag.MemoryRetrieveRequest {
-	return infrarag.MemoryRetrieveRequest{
+) ragservice.MemoryRetrieveRequest {
+	return ragservice.MemoryRetrieveRequest{
 		Query:          req.Query,
 		TopK:           topK,
 		Threshold:      threshold,
--- a/backend/memory/service/read_service.go
+++ b/backend/memory/service/read_service.go
@@ -8,12 +8,12 @@ import (
 	"strings"
 	"time"

-	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	"github.com/LoveLosita/smartflow/backend/model"
+	ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
 )

 const (
@@ -30,7 +30,7 @@ const (
 type ReadService struct {
 	itemRepo     *memoryrepo.ItemRepo
 	settingsRepo *memoryrepo.SettingsRepo
-	ragRuntime   infrarag.Runtime
+	ragRuntime   ragservice.Runtime
 	cfg          memorymodel.Config
 	observer     memoryobserve.Observer
 	metrics      memoryobserve.MetricsRecorder
@@ -57,7 +57,7 @@ type semanticRetrieveTelemetry struct {
 func NewReadService(
 	itemRepo *memoryrepo.ItemRepo,
 	settingsRepo *memoryrepo.SettingsRepo,
-	ragRuntime infrarag.Runtime,
+	ragRuntime ragservice.Runtime,
 	cfg memorymodel.Config,
 	observer memoryobserve.Observer,
 	metrics memoryobserve.MetricsRecorder,
@@ -347,7 +347,7 @@ func collectMemoryIDs(items []model.MemoryItem) []int64 {
 	return ids
 }

-func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) {
+func buildMemoryDTOFromRetrieveHit(hit ragservice.RetrieveHit) (memorymodel.ItemDTO, int64) {
 	memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
 	metadata := hit.Metadata
 	content := strings.TrimSpace(hit.Text)
--- a/backend/memory/vectorsync/syncer.go
+++ b/backend/memory/vectorsync/syncer.go
@@ -6,10 +6,10 @@ import (
 	"log"
 	"strings"

-	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	"github.com/LoveLosita/smartflow/backend/model"
+	ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
 )

 // Syncer 负责 memory_items 与向量库之间的最小桥接。
@@ -19,7 +19,7 @@ import (
 // 2. 不负责决定哪些记忆该写、该删、该恢复，这些决策仍由上游 service/worker/cleanup 控制；
 // 3. 同步失败时只回写 vector_status 并打观测，不反向回滚业务事务，避免把在线链路拖成强依赖。
 type Syncer struct {
-	ragRuntime infrarag.Runtime
+	ragRuntime ragservice.Runtime
 	itemRepo   *memoryrepo.ItemRepo
 	observer   memoryobserve.Observer
 	metrics    memoryobserve.MetricsRecorder
@@ -27,7 +27,7 @@ type Syncer struct {
 }

 func NewSyncer(
-	ragRuntime infrarag.Runtime,
+	ragRuntime ragservice.Runtime,
 	itemRepo *memoryrepo.ItemRepo,
 	observer memoryobserve.Observer,
 	metrics memoryobserve.MetricsRecorder,
@@ -53,9 +53,9 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
 		return
 	}

-	requestItems := make([]infrarag.MemoryIngestItem, 0, len(items))
+	requestItems := make([]ragservice.MemoryIngestItem, 0, len(items))
 	for _, item := range items {
-		requestItems = append(requestItems, infrarag.MemoryIngestItem{
+		requestItems = append(requestItems, ragservice.MemoryIngestItem{
 			MemoryID:         item.ID,
 			UserID:           item.UserID,
 			ConversationID:   strValue(item.ConversationID),
@@ -76,7 +76,7 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor

 	result, err := s.ragRuntime.IngestMemory(memoryobserve.WithFields(ctx, map[string]any{
 		"trace_id": traceID,
-	}), infrarag.MemoryIngestRequest{
+	}), ragservice.MemoryIngestRequest{
 		TraceID: traceID,
 		Action:  "add",
 		Items:   requestItems,
--- a/backend/memory/worker/decision_flow.go
+++ b/backend/memory/worker/decision_flow.go
@@ -4,11 +4,11 @@ import (
 	"context"
 	"fmt"

-	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	"github.com/LoveLosita/smartflow/backend/model"
+	ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
 	"gorm.io/gorm"
 )

@@ -192,7 +192,7 @@ func (r *Runner) recallCandidates(
 ) candidateRecallResult {
 	// 1. 优先使用 Milvus 向量语义召回。
 	if r.ragRuntime != nil {
-		retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{
+		retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, ragservice.MemoryRetrieveRequest{
 			Query:       fact.Content,
 			TopK:        r.cfg.DecisionCandidateTopK,
 			Threshold:   r.cfg.DecisionCandidateMinScore,
@@ -235,7 +235,7 @@ func (r *Runner) recallCandidates(
 // 1. 从 DocumentID（格式 memory:{id}）解析出 mysql_id；
 // 2. 从 metadata 提取 title 和 memory_type；
 // 3. 跳过无法解析 DocumentID 的结果。
-func (r *Runner) buildCandidatesFromRAG(hits []infrarag.RetrieveHit) []memorymodel.CandidateSnapshot {
+func (r *Runner) buildCandidatesFromRAG(hits []ragservice.RetrieveHit) []memorymodel.CandidateSnapshot {
 	candidates := make([]memorymodel.CandidateSnapshot, 0, len(hits))
 	for _, hit := range hits {
 		memoryID := parseMemoryID(hit.DocumentID)
--- a/backend/memory/worker/runner.go
+++ b/backend/memory/worker/runner.go
@@ -9,7 +9,6 @@ import (
 	"strings"
 	"time"

-	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
 	memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator"
@@ -17,6 +16,7 @@ import (
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
 	"github.com/LoveLosita/smartflow/backend/model"
+	ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
 	"gorm.io/gorm"
 )

@@ -41,7 +41,7 @@ type Runner struct {
 	auditRepo    *memoryrepo.AuditRepo
 	settingsRepo *memoryrepo.SettingsRepo
 	extractor    Extractor
-	ragRuntime   infrarag.Runtime
+	ragRuntime   ragservice.Runtime
 	logger       *log.Logger
 	vectorSyncer *memoryvectorsync.Syncer
 	observer     memoryobserve.Observer
@@ -63,7 +63,7 @@ func NewRunner(
 	auditRepo *memoryrepo.AuditRepo,
 	settingsRepo *memoryrepo.SettingsRepo,
 	extractor Extractor,
-	ragRuntime infrarag.Runtime,
+	ragRuntime ragservice.Runtime,
 	cfg memorymodel.Config,
 	decisionOrchestrator *memoryorchestrator.LLMDecisionOrchestrator,
 	vectorSyncer *memoryvectorsync.Syncer,