Version: 0.9.65.dev.260503

后端:
1. 阶段 1.5/1.6
收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现;
2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
Losita
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions

View File

@@ -3,8 +3,8 @@ package service
import (
"github.com/LoveLosita/smartflow/backend/dao"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/service/agentsvc"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
// AgentService 是 service 层对 agentsvc.AgentService 的兼容别名。
@@ -20,7 +20,7 @@ type AgentService = agentsvc.AgentService
// 2) 主动调度 session DAO 也在这里显式透传,避免聊天入口再去回查全局单例;
// 3) 真实构造逻辑已下沉到 service/agentsvc 包。
func NewAgentService(
aiHub *inits.AIHub,
llmService *llmservice.Service,
repo *dao.AgentDAO,
taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO,
@@ -29,7 +29,7 @@ func NewAgentService(
activeSessionDAO *dao.ActiveScheduleSessionDAO,
eventPublisher outboxinfra.EventPublisher,
) *AgentService {
return agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
return agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
}
// NewAgentServiceWithSchedule 在基础 AgentService 上注入排程依赖。
@@ -39,7 +39,7 @@ func NewAgentService(
// 2) 排程依赖为可选:未注入时排程路由自动回退到普通聊天;
// 3) 主动调度 session DAO 仍沿用统一构造注入,避免排程分支自己拼装仓储。
func NewAgentServiceWithSchedule(
aiHub *inits.AIHub,
llmService *llmservice.Service,
repo *dao.AgentDAO,
taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO,
@@ -50,7 +50,7 @@ func NewAgentServiceWithSchedule(
scheduleSvc *ScheduleService,
taskSvc *TaskService,
) *AgentService {
svc := agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
svc := agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
// 注入排程依赖:将 service 层方法包装为函数闭包,避免循环依赖。
if scheduleSvc != nil {

View File

@@ -3,6 +3,7 @@ package agentsvc
import (
"context"
"encoding/json"
"errors"
"log"
"strconv"
"strings"
@@ -11,7 +12,6 @@ import (
"github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
"github.com/LoveLosita/smartflow/backend/model"
@@ -20,13 +20,13 @@ import (
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
"github.com/LoveLosita/smartflow/backend/pkg"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
"github.com/cloudwego/eino-ext/components/model/ark"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
)
type AgentService struct {
AIHub *inits.AIHub
llmService *llmservice.Service
repo *dao.AgentDAO
taskRepo *dao.TaskDAO
cacheDAO *dao.CacheDAO
@@ -75,7 +75,7 @@ type AgentService struct {
// 这里通过依赖注入把“模型、仓储、缓存、异步持久化通道”统一交给服务层管理,
// 便于后续在单测中替换实现,或在启动流程中按环境切换配置。
func NewAgentService(
aiHub *inits.AIHub,
llmService *llmservice.Service,
repo *dao.AgentDAO,
taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO,
@@ -90,7 +90,7 @@ func NewAgentService(
ensureTokenMeterCallbackRegistered()
return &AgentService{
AIHub: aiHub,
llmService: llmService,
repo: repo,
taskRepo: taskRepo,
cacheDAO: cacheDAO,
@@ -123,8 +123,11 @@ func thinkingModeToBool(mode string) bool {
// 当前约定:
// - 旧链路已全面切到 newAgent graph这里仅作为 runNormalChatFlow 回退时的模型选择入口;
// - 统一返回 Pro 模型,旧 strategist 参数不再生效。
func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
return s.AIHub.Pro, "pro"
func (s *AgentService) pickChatModel(requestModel string) (*llmservice.Client, string) {
if s == nil || s.llmService == nil {
return nil, "pro"
}
return s.llmService.ProClient(), "pro"
}
// PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。
@@ -304,7 +307,7 @@ func pushErrNonBlocking(errChan chan error, err error) {
// 2) 开启随口记进度推送后,最终判定“非随口记”时回落到普通聊天。
func (s *AgentService) runNormalChatFlow(
ctx context.Context,
selectedModel *ark.ChatModel,
selectedModel *llmservice.Client,
resolvedModelName string,
userMessage string,
userPersisted bool,
@@ -365,6 +368,12 @@ func (s *AgentService) runNormalChatFlow(
}
}
// 6.0. 没有可用模型时,直接中止普通聊天,避免写入半截用户消息后没有后续回复。
if selectedModel == nil {
pushErrNonBlocking(errChan, errors.New("llm client is not ready"))
return
}
// 6. 执行真正的流式聊天。
// fullText 用于后续写 Redis/持久化outChan 用于把流片段实时推给前端。
fullText, _, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt, userID, chatID)

View File

@@ -11,10 +11,8 @@ import (
"github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/respond"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
"github.com/cloudwego/eino-ext/components/model/ark"
einoModel "github.com/cloudwego/eino/components/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
const (
@@ -253,11 +251,11 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
}
// 2. 标题生成属于结构化短输出,关闭 thinking 并限制 tokens降低延迟与发散。
resp, err := modelInst.Generate(ctx, messages,
ark.WithThinking(&arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}),
einoModel.WithTemperature(0.2),
einoModel.WithMaxTokens(40),
)
resp, err := modelInst.GenerateText(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.2,
MaxTokens: 40,
Thinking: llmservice.ThinkingModeDisabled,
})
if err != nil {
return "", 0, err
}
@@ -267,26 +265,26 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
// 2.1 标题链路的 token 从模型响应 usage 中提取;缺失则按 0 处理,不影响主流程。
titleTokens := 0
if resp.ResponseMeta != nil && resp.ResponseMeta.Usage != nil {
if resp.Usage != nil {
titleTokens = normalizeUsageTotal(
resp.ResponseMeta.Usage.TotalTokens,
resp.ResponseMeta.Usage.PromptTokens,
resp.ResponseMeta.Usage.CompletionTokens,
resp.Usage.TotalTokens,
resp.Usage.PromptTokens,
resp.Usage.CompletionTokens,
)
}
return normalizeConversationTitle(resp.Content), titleTokens, nil
return normalizeConversationTitle(resp.Text), titleTokens, nil
}
// pickTitleModel 选择用于标题生成的模型。
// 优先 Lite成本低、速度快Lite 不可用时回退 Pro。
func (s *AgentService) pickTitleModel() *ark.ChatModel {
if s.AIHub == nil {
func (s *AgentService) pickTitleModel() *llmservice.Client {
if s == nil || s.llmService == nil {
return nil
}
if s.AIHub.Lite != nil {
return s.AIHub.Lite
if client := s.llmService.LiteClient(); client != nil {
return client
}
return s.AIHub.Pro
return s.llmService.ProClient()
}
// buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。

View File

@@ -8,7 +8,6 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv"
newagentgraph "github.com/LoveLosita/smartflow/backend/newAgent/graph"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
@@ -57,6 +56,11 @@ func (s *AgentService) runNewAgentGraph(
errChan chan error,
) {
requestCtx, _ := withRequestTokenMeter(ctx)
if s == nil || s.llmService == nil {
// 0. newAgent 主链强依赖 llm-service装配漏传时直接返回错误避免 nil receiver panic。
pushErrNonBlocking(errChan, errors.New("agent llm service is not initialized"))
return
}
// 1. 规范会话 ID 和模型选择。
chatID = normalizeConversationID(chatID)
@@ -184,14 +188,15 @@ func (s *AgentService) runNewAgentGraph(
}
graphRequest.Normalize()
// 8. 适配 LLM clients从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client)。
// 8. 适配 LLM clients统一从 llm-service 取出 newAgent 图所需模型,不再直接碰 AIHub)。
// 8.1 Chat/Deliver 使用 Pro 模型:路由分流、闲聊、交付总结属于标准复杂度。
// 8.2 Plan/Execute 使用 Max 模型:规划和 ReAct 循环需要深度推理能力。
chatClient := infrallm.WrapArkClient(s.AIHub.Pro)
planClient := infrallm.WrapArkClient(s.AIHub.Max)
executeClient := infrallm.WrapArkClient(s.AIHub.Max)
deliverClient := infrallm.WrapArkClient(s.AIHub.Pro)
summaryClient := infrallm.WrapArkClient(s.AIHub.Lite)
llmClients := s.llmService.NewAgentModelClients()
chatClient := llmClients.Chat
planClient := llmClients.Plan
executeClient := llmClients.Execute
deliverClient := llmClients.Deliver
summaryClient := llmClients.Summary
// 9. 适配 SSE emitter。
sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
@@ -244,8 +249,8 @@ func (s *AgentService) runNewAgentGraph(
log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr)
pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr))
// Graph 出错时回退普通聊天,保证可用性。回退使用 Pro 模型。
s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
// Graph 出错时回退普通聊天,保证可用性。回退使用 llm-service 的 Pro 模型。
s.runNormalChatFlow(requestCtx, chatClient, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
return
}

View File

@@ -6,20 +6,18 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/cloudwego/eino-ext/components/model/ark"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
// streamChatFallback 是 graph 执行失败时的降级流式聊天。
// 内联了旧 agentchat.StreamChat 的核心逻辑,不再依赖 agent/ 包。
func (s *AgentService) streamChatFallback(
ctx context.Context,
llm *ark.ChatModel,
llm *llmservice.Client,
modelName string,
userInput string,
ifThinking bool,
@@ -36,13 +34,6 @@ func (s *AgentService) streamChatFallback(
}
messages = append(messages, schema.UserMessage(userInput))
var thinking *ark.Thinking
if ifThinking {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
} else {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
}
if strings.TrimSpace(modelName) == "" {
modelName = "smartflow-worker"
}
@@ -50,7 +41,11 @@ func (s *AgentService) streamChatFallback(
created := time.Now().Unix()
firstChunk := true
chunkEmitter := newagentstream.NewChunkEmitter(newagentstream.NewSSEPayloadEmitter(outChan), requestID, modelName, created)
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(infrallm.WrapArkClient(s.AIHub.Lite)))
reasoningSummaryClient := s.llmService.LiteClient()
if reasoningSummaryClient == nil {
reasoningSummaryClient = s.llmService.ProClient()
}
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(reasoningSummaryClient))
chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) {
s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra)
})
@@ -75,7 +70,14 @@ func (s *AgentService) streamChatFallback(
}
var reasoningEndAt *time.Time
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
thinkingMode := llmservice.ThinkingModeDisabled
if ifThinking {
thinkingMode = llmservice.ThinkingModeEnabled
}
reader, err := llm.Stream(ctx, messages, llmservice.GenerateOptions{
Thinking: thinkingMode,
})
if err != nil {
return "", "", 0, nil, err
}

View File

@@ -6,9 +6,9 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const reasoningSummaryMaxTokens = 700
@@ -24,7 +24,7 @@ type reasoningSummaryLLMResponse struct {
// 1. service 层负责选择模型与 promptstream 层只负责调度和闸门;
// 2. 这里不持久化摘要,持久化统一走 ChunkEmitter 的 extra hook
// 3. 摘要失败时返回 error由 ReasoningDigestor 吞掉并等待下一次水位线/Flush 兜底。
func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagentstream.ReasoningSummaryFunc {
func (s *AgentService) makeReasoningSummaryFunc(client *llmservice.Client) newagentstream.ReasoningSummaryFunc {
if client == nil {
return nil
}
@@ -47,14 +47,14 @@ func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagen
DurationSeconds: input.DurationSeconds,
})
resp, rawResult, err := infrallm.GenerateJSON[reasoningSummaryLLMResponse](
resp, rawResult, err := llmservice.GenerateJSON[reasoningSummaryLLMResponse](
ctx,
client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.1,
MaxTokens: reasoningSummaryMaxTokens,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{
"stage": "reasoning_summary",
"candidate_seq": input.CandidateSeq,
@@ -99,7 +99,7 @@ func limitReasoningDetailSummary(text string, maxRunes int) string {
return string(runes[:maxRunes])
}
func truncateReasoningSummaryRaw(raw *infrallm.TextResult) string {
func truncateReasoningSummaryRaw(raw *llmservice.TextResult) string {
if raw == nil {
return ""
}

View File

@@ -6,16 +6,16 @@ import (
"github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
"github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/respond"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
type CourseService struct {
// 伸出手:准备接住 DAO
courseDAO *dao.CourseDAO
scheduleDAO *dao.ScheduleDAO
courseImageResponsesClient *infrallm.ArkResponsesClient
courseImageResponsesClient *llmservice.ArkResponsesClient
courseImageConfig CourseImageParseConfig
courseImageModel string
}
@@ -24,7 +24,7 @@ type CourseService struct {
func NewCourseService(
courseDAO *dao.CourseDAO,
scheduleDAO *dao.ScheduleDAO,
courseImageResponsesClient *infrallm.ArkResponsesClient,
courseImageResponsesClient *llmservice.ArkResponsesClient,
courseImageConfig CourseImageParseConfig,
courseImageModel string,
) *CourseService {

View File

@@ -8,16 +8,20 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
"github.com/LoveLosita/smartflow/backend/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
if ss == nil || ss.courseImageResponsesClient == nil {
modelName := ""
if ss != nil {
modelName = ss.courseImageModel
}
log.Printf(
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
ss.courseImageModel,
modelName,
req.Filename,
req.MIMEType,
len(req.ImageBytes),
@@ -57,7 +61,7 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
base64Chars,
promptChars,
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
infrallm.ThinkingModeDisabled,
llmservice.ThinkingModeDisabled,
courseImageParseTemperature,
ss.courseImageConfig.MaxTokens,
"json_object",
@@ -66,10 +70,10 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens避免被默认值截断。
// 2. text_format 固定为 json_object降低输出混入解释文本导致解析失败的概率。
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。
draft, rawResult, err := infrallm.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, infrallm.ArkResponsesOptions{
draft, rawResult, err := llmservice.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, llmservice.ArkResponsesOptions{
Temperature: courseImageParseTemperature,
MaxOutputTokens: ss.courseImageConfig.MaxTokens,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
TextFormat: "json_object",
})
if err != nil {
@@ -188,12 +192,12 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
return normalizedDraft, nil
}
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]infrallm.ArkResponsesMessage, int, int) {
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]llmservice.ArkResponsesMessage, int, int) {
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
messages := []infrallm.ArkResponsesMessage{
messages := []llmservice.ArkResponsesMessage{
{
Role: "system",
Text: strings.TrimSpace(courseImageParseSystemPrompt),
@@ -208,7 +212,7 @@ func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest)
return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
}
func isCourseImageOutputTruncated(rawResult *infrallm.ArkResponsesResult) bool {
func isCourseImageOutputTruncated(rawResult *llmservice.ArkResponsesResult) bool {
if rawResult == nil {
return false
}