Version: 0.9.65.dev.260503

后端： 1. 阶段 1.5/1.6 收口 llm-service / rag-service，统一模型出口与检索基础设施入口，清退 backend/infra/llm 与 backend/infra/rag 旧实现； 2. 同步更新相关调用链与微服务迁移计划文档
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions
--- a/backend/newAgent/node/chat.go
+++ b/backend/newAgent/node/chat.go
@@ -11,11 +11,11 @@ import (
 	"github.com/cloudwego/eino/schema"
 	"github.com/google/uuid"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 )

 const (
@@ -50,7 +50,7 @@ type ChatNodeInput struct {
 	UserInput             string
 	ConfirmAction         string
 	ResumeInteractionID   string
-	Client                *infrallm.Client
+	Client                *llmservice.Client
 	ChunkEmitter          *newagentstream.ChunkEmitter
 	CompactionStore       newagentmodel.CompactionStore // 上下文压缩持久化
 	PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -107,9 +107,9 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
 	})
 	logNodeLLMContext(chatStageName, "routing", flowState, messages)

-	reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
+	reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
 		Temperature: 0.7,
-		Thinking:    infrallm.ThinkingModeDisabled,
+		Thinking:    llmservice.ThinkingModeDisabled,
 		Metadata: map[string]any{
 			"stage": chatStageName,
 			"phase": "routing",
@@ -172,7 +172,7 @@ func isExecuteLoopClosedMarker(msg *schema.Message) bool {
 // 3. 控制码解析超时或流异常结束 → fallback 到 plan。
 func streamAndDispatch(
 	ctx context.Context,
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	parser *newagentrouter.StreamRouteParser,
 	input ChatNodeInput,
 	emitter *newagentstream.ChunkEmitter,
@@ -292,7 +292,7 @@ func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisi
 // 2. thinking=true：关闭路由流，发起第二次 thinking 流式调用。
 func handleDirectReplyStream(
 	ctx context.Context,
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	input ChatNodeInput,
 	emitter *newagentstream.ChunkEmitter,
 	conversationContext *newagentmodel.ConversationContext,
@@ -309,7 +309,7 @@ func handleDirectReplyStream(
 // handleThinkingReplyStream 处理需要思考的回复：关闭路由流 → 第二次 thinking 流式调用。
 func handleThinkingReplyStream(
 	ctx context.Context,
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	input ChatNodeInput,
 	emitter *newagentstream.ChunkEmitter,
 	conversationContext *newagentmodel.ConversationContext,
@@ -327,10 +327,10 @@ func handleThinkingReplyStream(
 		StatusBlockID:   chatStatusBlockID,
 	})
 	logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages)
-	deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
+	deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
 		Temperature: 0.5,
 		MaxTokens:   2000,
-		Thinking:    infrallm.ThinkingModeEnabled,
+		Thinking:    llmservice.ThinkingModeEnabled,
 		Metadata: map[string]any{
 			"stage": chatStageName,
 			"phase": "direct_reply_thinking",
@@ -363,7 +363,7 @@ func handleThinkingReplyStream(
 // handleDirectReplyContinueStream 处理无思考的闲聊：同一流续传。
 func handleDirectReplyContinueStream(
 	ctx context.Context,
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	input ChatNodeInput,
 	emitter *newagentstream.ChunkEmitter,
 	conversationContext *newagentmodel.ConversationContext,
@@ -419,7 +419,7 @@ func handleDirectReplyContinueStream(
 // 2. 推送轻量状态通知；
 // 3. 设置流程状态，进入 Execute 或 RoughBuild。
 func handleRouteExecuteStream(
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	emitter *newagentstream.ChunkEmitter,
 	flowState *newagentmodel.CommonState,
 	decision *newagentmodel.ChatRoutingDecision,
@@ -674,7 +674,7 @@ func isExplicitNoRefineAfterRoughBuildRequest(userInput string) bool {
 // 4. 完整回复写入 history。
 func handleDeepAnswerStream(
 	ctx context.Context,
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	input ChatNodeInput,
 	emitter *newagentstream.ChunkEmitter,
 	conversationContext *newagentmodel.ConversationContext,
@@ -685,9 +685,9 @@ func handleDeepAnswerStream(
 	_ = reader.Close()

 	// 2. 第二次流式调用。
-	thinkingOpt := infrallm.ThinkingModeDisabled
+	thinkingOpt := llmservice.ThinkingModeDisabled
 	if effectiveThinking {
-		thinkingOpt = infrallm.ThinkingModeEnabled
+		thinkingOpt = llmservice.ThinkingModeEnabled
 	}
 	deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput)
 	deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{
@@ -699,7 +699,7 @@ func handleDeepAnswerStream(
 		StatusBlockID:   chatStatusBlockID,
 	})
 	logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages)
-	deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
+	deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
 		Temperature: 0.5,
 		MaxTokens:   2000,
 		Thinking:    thinkingOpt,
@@ -741,7 +741,7 @@ func handleDeepAnswerStream(

 // handleRoutePlanStream 处理规划路由：推送状态确认 → 设 PhasePlanning。
 func handleRoutePlanStream(
-	reader infrallm.StreamReader,
+	reader llmservice.StreamReader,
 	emitter *newagentstream.ChunkEmitter,
 	flowState *newagentmodel.CommonState,
 	effectiveThinking bool,
--- a/backend/newAgent/node/deliver.go
+++ b/backend/newAgent/node/deliver.go
@@ -9,10 +9,10 @@ import (

 	"github.com/cloudwego/eino/schema"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 )

 const (
@@ -31,7 +31,7 @@ const (
 type DeliverNodeInput struct {
 	RuntimeState          *newagentmodel.AgentRuntimeState
 	ConversationContext   *newagentmodel.ConversationContext
-	Client                *infrallm.Client
+	Client                *llmservice.Client
 	ChunkEmitter          *newagentstream.ChunkEmitter
 	ThinkingEnabled       bool                          // 是否开启 thinking，由 config.yaml 的 agent.thinking.deliver 注入
 	CompactionStore       newagentmodel.CompactionStore // 上下文压缩持久化
@@ -128,7 +128,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
 //   - streamed：true 表示文本已通过 EmitStreamAssistantText 真流式推送到前端，调用方无需再伪流式。
 func generateDeliverSummary(
 	ctx context.Context,
-	client *infrallm.Client,
+	client *llmservice.Client,
 	flowState *newagentmodel.CommonState,
 	conversationContext *newagentmodel.ConversationContext,
 	thinkingEnabled bool,
@@ -162,7 +162,7 @@ func generateDeliverSummary(
 	reader, err := client.Stream(
 		ctx,
 		messages,
-		infrallm.GenerateOptions{
+		llmservice.GenerateOptions{
 			Temperature: 0.5,
 			MaxTokens:   800,
 			Thinking:    resolveThinkingMode(thinkingEnabled),
--- a/backend/newAgent/node/execute/action_router.go
+++ b/backend/newAgent/node/execute/action_router.go
@@ -8,11 +8,11 @@ import (
 	"log"
 	"strings"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 	"github.com/cloudwego/eino/schema"
 	"github.com/google/uuid"
 )
@@ -38,7 +38,7 @@ func collectExecuteDecisionFromLLM(
 	reader, err := input.Client.Stream(
 		ctx,
 		messages,
-		infrallm.GenerateOptions{
+		llmservice.GenerateOptions{
 			Temperature: 1.0,
 			MaxTokens:   131072,
 			Thinking:    newagentshared.ResolveThinkingMode(input.ThinkingEnabled),
@@ -123,7 +123,7 @@ func collectExecuteDecisionFromLLM(
 			return nil, nil
 		}

-		decision, parseErr := infrallm.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
+		decision, parseErr := llmservice.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
 		if parseErr != nil {
 			log.Printf(
 				"[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s",
--- a/backend/newAgent/node/execute/run.go
+++ b/backend/newAgent/node/execute/run.go
@@ -5,12 +5,12 @@ import (
 	"fmt"
 	newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
 	"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 )

 const (
@@ -29,7 +29,7 @@ type ExecuteNodeInput struct {
 	RuntimeState          *newagentmodel.AgentRuntimeState
 	ConversationContext   *newagentmodel.ConversationContext
 	UserInput             string
-	Client                *infrallm.Client
+	Client                *llmservice.Client
 	ChunkEmitter          *newagentstream.ChunkEmitter
 	ResumeNode            string
 	ToolRegistry          *newagenttools.ToolRegistry
--- a/backend/newAgent/node/plan.go
+++ b/backend/newAgent/node/plan.go
@@ -10,11 +10,11 @@ import (

 	"github.com/google/uuid"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 	"github.com/cloudwego/eino/schema"
 )

@@ -34,7 +34,7 @@ type PlanNodeInput struct {
 	RuntimeState          *newagentmodel.AgentRuntimeState
 	ConversationContext   *newagentmodel.ConversationContext
 	UserInput             string
-	Client                *infrallm.Client
+	Client                *llmservice.Client
 	ChunkEmitter          *newagentstream.ChunkEmitter
 	ResumeNode            string
 	AlwaysExecute         bool                          // true 时计划生成后自动确认，不进入 confirm 节点
@@ -87,7 +87,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 	reader, err := input.Client.Stream(
 		ctx,
 		messages,
-		infrallm.GenerateOptions{
+		llmservice.GenerateOptions{
 			Temperature: 0.2,
 			// 显式设置上限，避免依赖框架默认值（默认 4096）导致长决策被截断。
 			// 注意：当前模型接口 max_tokens 上限为 131072，超过会 400。
@@ -149,7 +149,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 			return fmt.Errorf("规划解析失败，原始输出=%s", result.RawBuffer)
 		}

-		decision, parseErr := infrallm.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
+		decision, parseErr := llmservice.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
 		if parseErr != nil {
 			return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer)
 		}
@@ -390,9 +390,9 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {

 // resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
 // 供 plan / execute / deliver 节点统一使用。
-func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
+func resolveThinkingMode(enabled bool) llmservice.ThinkingMode {
 	if enabled {
-		return infrallm.ThinkingModeEnabled
+		return llmservice.ThinkingModeEnabled
 	}
-	return infrallm.ThinkingModeDisabled
+	return llmservice.ThinkingModeDisabled
 }
--- a/backend/newAgent/node/quick_task.go
+++ b/backend/newAgent/node/quick_task.go
@@ -8,13 +8,13 @@ import (
 	"strings"
 	"time"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	taskmodel "github.com/LoveLosita/smartflow/backend/model"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
 	newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 	"github.com/cloudwego/eino/schema"
 )

@@ -30,7 +30,7 @@ type QuickTaskNodeInput struct {
 	RuntimeState          *newagentmodel.AgentRuntimeState
 	ConversationContext   *newagentmodel.ConversationContext
 	UserInput             string
-	Client                *infrallm.Client
+	Client                *llmservice.Client
 	ChunkEmitter          *newagentstream.ChunkEmitter
 	QuickTaskDeps         newagentmodel.QuickTaskDeps
 	PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -77,7 +77,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
 	messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput)

 	// 2. 真流式调用 LLM。
-	reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
+	reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
 		Temperature: 0.3,
 		MaxTokens:   512,
 	})
@@ -130,7 +130,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
 		// 解析 JSON。
 		log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
 		var parseErr error
-		decision, parseErr = infrallm.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
+		decision, parseErr = llmservice.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
 		if parseErr != nil {
 			log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
 			if result.RawBuffer != "" {
--- a/backend/newAgent/node/unified_compact.go
+++ b/backend/newAgent/node/unified_compact.go
@@ -6,11 +6,11 @@ import (
 	"fmt"
 	"log"

-	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
 	"github.com/LoveLosita/smartflow/backend/pkg"
+	llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
 	"github.com/cloudwego/eino/schema"
 )

@@ -22,7 +22,7 @@ import (
 //  3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。
 type UnifiedCompactInput struct {
 	// Client 用于调用 LLM 压缩 msg1/msg2。
-	Client *infrallm.Client
+	Client *llmservice.Client
 	// CompactionStore 用于持久化压缩摘要和 token 统计，为 nil 时跳过持久化。
 	CompactionStore newagentmodel.CompactionStore
 	// FlowState 提供 userID / chatID / roundUsed 等定位信息。