Version: 0.9.65.dev.260503
后端: 1. 阶段 1.5/1.6 收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现; 2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
@@ -11,11 +11,11 @@ import (
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/google/uuid"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -50,7 +50,7 @@ type ChatNodeInput struct {
|
||||
UserInput string
|
||||
ConfirmAction string
|
||||
ResumeInteractionID string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
|
||||
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
|
||||
@@ -107,9 +107,9 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
|
||||
})
|
||||
logNodeLLMContext(chatStageName, "routing", flowState, messages)
|
||||
|
||||
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
|
||||
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
|
||||
Temperature: 0.7,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": chatStageName,
|
||||
"phase": "routing",
|
||||
@@ -172,7 +172,7 @@ func isExecuteLoopClosedMarker(msg *schema.Message) bool {
|
||||
// 3. 控制码解析超时或流异常结束 → fallback 到 plan。
|
||||
func streamAndDispatch(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
parser *newagentrouter.StreamRouteParser,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
@@ -292,7 +292,7 @@ func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisi
|
||||
// 2. thinking=true:关闭路由流,发起第二次 thinking 流式调用。
|
||||
func handleDirectReplyStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -309,7 +309,7 @@ func handleDirectReplyStream(
|
||||
// handleThinkingReplyStream 处理需要思考的回复:关闭路由流 → 第二次 thinking 流式调用。
|
||||
func handleThinkingReplyStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -327,10 +327,10 @@ func handleThinkingReplyStream(
|
||||
StatusBlockID: chatStatusBlockID,
|
||||
})
|
||||
logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages)
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
|
||||
Temperature: 0.5,
|
||||
MaxTokens: 2000,
|
||||
Thinking: infrallm.ThinkingModeEnabled,
|
||||
Thinking: llmservice.ThinkingModeEnabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": chatStageName,
|
||||
"phase": "direct_reply_thinking",
|
||||
@@ -363,7 +363,7 @@ func handleThinkingReplyStream(
|
||||
// handleDirectReplyContinueStream 处理无思考的闲聊:同一流续传。
|
||||
func handleDirectReplyContinueStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -419,7 +419,7 @@ func handleDirectReplyContinueStream(
|
||||
// 2. 推送轻量状态通知;
|
||||
// 3. 设置流程状态,进入 Execute 或 RoughBuild。
|
||||
func handleRouteExecuteStream(
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
flowState *newagentmodel.CommonState,
|
||||
decision *newagentmodel.ChatRoutingDecision,
|
||||
@@ -674,7 +674,7 @@ func isExplicitNoRefineAfterRoughBuildRequest(userInput string) bool {
|
||||
// 4. 完整回复写入 history。
|
||||
func handleDeepAnswerStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -685,9 +685,9 @@ func handleDeepAnswerStream(
|
||||
_ = reader.Close()
|
||||
|
||||
// 2. 第二次流式调用。
|
||||
thinkingOpt := infrallm.ThinkingModeDisabled
|
||||
thinkingOpt := llmservice.ThinkingModeDisabled
|
||||
if effectiveThinking {
|
||||
thinkingOpt = infrallm.ThinkingModeEnabled
|
||||
thinkingOpt = llmservice.ThinkingModeEnabled
|
||||
}
|
||||
deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput)
|
||||
deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{
|
||||
@@ -699,7 +699,7 @@ func handleDeepAnswerStream(
|
||||
StatusBlockID: chatStatusBlockID,
|
||||
})
|
||||
logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages)
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
|
||||
Temperature: 0.5,
|
||||
MaxTokens: 2000,
|
||||
Thinking: thinkingOpt,
|
||||
@@ -741,7 +741,7 @@ func handleDeepAnswerStream(
|
||||
|
||||
// handleRoutePlanStream 处理规划路由:推送状态确认 → 设 PhasePlanning。
|
||||
func handleRoutePlanStream(
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
flowState *newagentmodel.CommonState,
|
||||
effectiveThinking bool,
|
||||
|
||||
@@ -9,10 +9,10 @@ import (
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -31,7 +31,7 @@ const (
|
||||
type DeliverNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
ThinkingEnabled bool // 是否开启 thinking,由 config.yaml 的 agent.thinking.deliver 注入
|
||||
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
|
||||
@@ -128,7 +128,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
|
||||
// - streamed:true 表示文本已通过 EmitStreamAssistantText 真流式推送到前端,调用方无需再伪流式。
|
||||
func generateDeliverSummary(
|
||||
ctx context.Context,
|
||||
client *infrallm.Client,
|
||||
client *llmservice.Client,
|
||||
flowState *newagentmodel.CommonState,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
thinkingEnabled bool,
|
||||
@@ -162,7 +162,7 @@ func generateDeliverSummary(
|
||||
reader, err := client.Stream(
|
||||
ctx,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.5,
|
||||
MaxTokens: 800,
|
||||
Thinking: resolveThinkingMode(thinkingEnabled),
|
||||
|
||||
@@ -8,11 +8,11 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -38,7 +38,7 @@ func collectExecuteDecisionFromLLM(
|
||||
reader, err := input.Client.Stream(
|
||||
ctx,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 1.0,
|
||||
MaxTokens: 131072,
|
||||
Thinking: newagentshared.ResolveThinkingMode(input.ThinkingEnabled),
|
||||
@@ -123,7 +123,7 @@ func collectExecuteDecisionFromLLM(
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
|
||||
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
|
||||
if parseErr != nil {
|
||||
log.Printf(
|
||||
"[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s",
|
||||
|
||||
@@ -5,12 +5,12 @@ import (
|
||||
"fmt"
|
||||
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
|
||||
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -29,7 +29,7 @@ type ExecuteNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
UserInput string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
ResumeNode string
|
||||
ToolRegistry *newagenttools.ToolRegistry
|
||||
|
||||
@@ -10,11 +10,11 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -34,7 +34,7 @@ type PlanNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
UserInput string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
ResumeNode string
|
||||
AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点
|
||||
@@ -87,7 +87,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
reader, err := input.Client.Stream(
|
||||
ctx,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.2,
|
||||
// 显式设置上限,避免依赖框架默认值(默认 4096)导致长决策被截断。
|
||||
// 注意:当前模型接口 max_tokens 上限为 131072,超过会 400。
|
||||
@@ -149,7 +149,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
return fmt.Errorf("规划解析失败,原始输出=%s", result.RawBuffer)
|
||||
}
|
||||
|
||||
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
|
||||
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
|
||||
if parseErr != nil {
|
||||
return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer)
|
||||
}
|
||||
@@ -390,9 +390,9 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
|
||||
|
||||
// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
|
||||
// 供 plan / execute / deliver 节点统一使用。
|
||||
func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
|
||||
func resolveThinkingMode(enabled bool) llmservice.ThinkingMode {
|
||||
if enabled {
|
||||
return infrallm.ThinkingModeEnabled
|
||||
return llmservice.ThinkingModeEnabled
|
||||
}
|
||||
return infrallm.ThinkingModeDisabled
|
||||
return llmservice.ThinkingModeDisabled
|
||||
}
|
||||
|
||||
@@ -8,13 +8,13 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
taskmodel "github.com/LoveLosita/smartflow/backend/model"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -30,7 +30,7 @@ type QuickTaskNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
UserInput string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
QuickTaskDeps newagentmodel.QuickTaskDeps
|
||||
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
|
||||
@@ -77,7 +77,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
|
||||
messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput)
|
||||
|
||||
// 2. 真流式调用 LLM。
|
||||
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
|
||||
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
|
||||
Temperature: 0.3,
|
||||
MaxTokens: 512,
|
||||
})
|
||||
@@ -130,7 +130,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
|
||||
// 解析 JSON。
|
||||
log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
|
||||
var parseErr error
|
||||
decision, parseErr = infrallm.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
|
||||
decision, parseErr = llmservice.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
|
||||
if parseErr != nil {
|
||||
log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
|
||||
if result.RawBuffer != "" {
|
||||
|
||||
@@ -6,11 +6,11 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
"github.com/LoveLosita/smartflow/backend/pkg"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ import (
|
||||
// 3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。
|
||||
type UnifiedCompactInput struct {
|
||||
// Client 用于调用 LLM 压缩 msg1/msg2。
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
|
||||
CompactionStore newagentmodel.CompactionStore
|
||||
// FlowState 提供 userID / chatID / roundUsed 等定位信息。
|
||||
|
||||
Reference in New Issue
Block a user