Version: 0.9.65.dev.260503

后端:
1. 阶段 1.5/1.6
收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现;
2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
Losita
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions

View File

@@ -11,11 +11,11 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -50,7 +50,7 @@ type ChatNodeInput struct {
UserInput string
ConfirmAction string
ResumeInteractionID string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -107,9 +107,9 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
})
logNodeLLMContext(chatStageName, "routing", flowState, messages)
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.7,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{
"stage": chatStageName,
"phase": "routing",
@@ -172,7 +172,7 @@ func isExecuteLoopClosedMarker(msg *schema.Message) bool {
// 3. 控制码解析超时或流异常结束 → fallback 到 plan。
func streamAndDispatch(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
parser *newagentrouter.StreamRouteParser,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
@@ -292,7 +292,7 @@ func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisi
// 2. thinking=true关闭路由流发起第二次 thinking 流式调用。
func handleDirectReplyStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -309,7 +309,7 @@ func handleDirectReplyStream(
// handleThinkingReplyStream 处理需要思考的回复:关闭路由流 → 第二次 thinking 流式调用。
func handleThinkingReplyStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -327,10 +327,10 @@ func handleThinkingReplyStream(
StatusBlockID: chatStatusBlockID,
})
logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages)
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
Temperature: 0.5,
MaxTokens: 2000,
Thinking: infrallm.ThinkingModeEnabled,
Thinking: llmservice.ThinkingModeEnabled,
Metadata: map[string]any{
"stage": chatStageName,
"phase": "direct_reply_thinking",
@@ -363,7 +363,7 @@ func handleThinkingReplyStream(
// handleDirectReplyContinueStream 处理无思考的闲聊:同一流续传。
func handleDirectReplyContinueStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -419,7 +419,7 @@ func handleDirectReplyContinueStream(
// 2. 推送轻量状态通知;
// 3. 设置流程状态,进入 Execute 或 RoughBuild。
func handleRouteExecuteStream(
reader infrallm.StreamReader,
reader llmservice.StreamReader,
emitter *newagentstream.ChunkEmitter,
flowState *newagentmodel.CommonState,
decision *newagentmodel.ChatRoutingDecision,
@@ -674,7 +674,7 @@ func isExplicitNoRefineAfterRoughBuildRequest(userInput string) bool {
// 4. 完整回复写入 history。
func handleDeepAnswerStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -685,9 +685,9 @@ func handleDeepAnswerStream(
_ = reader.Close()
// 2. 第二次流式调用。
thinkingOpt := infrallm.ThinkingModeDisabled
thinkingOpt := llmservice.ThinkingModeDisabled
if effectiveThinking {
thinkingOpt = infrallm.ThinkingModeEnabled
thinkingOpt = llmservice.ThinkingModeEnabled
}
deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput)
deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{
@@ -699,7 +699,7 @@ func handleDeepAnswerStream(
StatusBlockID: chatStatusBlockID,
})
logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages)
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
Temperature: 0.5,
MaxTokens: 2000,
Thinking: thinkingOpt,
@@ -741,7 +741,7 @@ func handleDeepAnswerStream(
// handleRoutePlanStream 处理规划路由:推送状态确认 → 设 PhasePlanning。
func handleRoutePlanStream(
reader infrallm.StreamReader,
reader llmservice.StreamReader,
emitter *newagentstream.ChunkEmitter,
flowState *newagentmodel.CommonState,
effectiveThinking bool,

View File

@@ -9,10 +9,10 @@ import (
"github.com/cloudwego/eino/schema"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -31,7 +31,7 @@ const (
type DeliverNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
ThinkingEnabled bool // 是否开启 thinking由 config.yaml 的 agent.thinking.deliver 注入
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
@@ -128,7 +128,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
// - streamedtrue 表示文本已通过 EmitStreamAssistantText 真流式推送到前端,调用方无需再伪流式。
func generateDeliverSummary(
ctx context.Context,
client *infrallm.Client,
client *llmservice.Client,
flowState *newagentmodel.CommonState,
conversationContext *newagentmodel.ConversationContext,
thinkingEnabled bool,
@@ -162,7 +162,7 @@ func generateDeliverSummary(
reader, err := client.Stream(
ctx,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.5,
MaxTokens: 800,
Thinking: resolveThinkingMode(thinkingEnabled),

View File

@@ -8,11 +8,11 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
)
@@ -38,7 +38,7 @@ func collectExecuteDecisionFromLLM(
reader, err := input.Client.Stream(
ctx,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 1.0,
MaxTokens: 131072,
Thinking: newagentshared.ResolveThinkingMode(input.ThinkingEnabled),
@@ -123,7 +123,7 @@ func collectExecuteDecisionFromLLM(
return nil, nil
}
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
if parseErr != nil {
log.Printf(
"[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s",

View File

@@ -5,12 +5,12 @@ import (
"fmt"
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -29,7 +29,7 @@ type ExecuteNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
UserInput string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
ResumeNode string
ToolRegistry *newagenttools.ToolRegistry

View File

@@ -10,11 +10,11 @@ import (
"github.com/google/uuid"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -34,7 +34,7 @@ type PlanNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
UserInput string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
ResumeNode string
AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点
@@ -87,7 +87,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
reader, err := input.Client.Stream(
ctx,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.2,
// 显式设置上限,避免依赖框架默认值(默认 4096导致长决策被截断。
// 注意:当前模型接口 max_tokens 上限为 131072超过会 400。
@@ -149,7 +149,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
return fmt.Errorf("规划解析失败,原始输出=%s", result.RawBuffer)
}
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
if parseErr != nil {
return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer)
}
@@ -390,9 +390,9 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
// 供 plan / execute / deliver 节点统一使用。
func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
func resolveThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled {
return infrallm.ThinkingModeEnabled
return llmservice.ThinkingModeEnabled
}
return infrallm.ThinkingModeDisabled
return llmservice.ThinkingModeDisabled
}

View File

@@ -8,13 +8,13 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
taskmodel "github.com/LoveLosita/smartflow/backend/model"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -30,7 +30,7 @@ type QuickTaskNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
UserInput string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
QuickTaskDeps newagentmodel.QuickTaskDeps
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -77,7 +77,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput)
// 2. 真流式调用 LLM。
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.3,
MaxTokens: 512,
})
@@ -130,7 +130,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
// 解析 JSON。
log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
var parseErr error
decision, parseErr = infrallm.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
decision, parseErr = llmservice.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
if parseErr != nil {
log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
if result.RawBuffer != "" {

View File

@@ -6,11 +6,11 @@ import (
"fmt"
"log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/LoveLosita/smartflow/backend/pkg"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -22,7 +22,7 @@ import (
// 3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。
type UnifiedCompactInput struct {
// Client 用于调用 LLM 压缩 msg1/msg2。
Client *infrallm.Client
Client *llmservice.Client
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
CompactionStore newagentmodel.CompactionStore
// FlowState 提供 userID / chatID / roundUsed 等定位信息。