Version: 0.9.65.dev.260503

后端:
1. 阶段 1.5/1.6
收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现;
2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
Losita
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions

View File

@@ -11,7 +11,7 @@ import (
"github.com/LoveLosita/smartflow/backend/active_scheduler/ports" "github.com/LoveLosita/smartflow/backend/active_scheduler/ports"
"github.com/LoveLosita/smartflow/backend/active_scheduler/trigger" "github.com/LoveLosita/smartflow/backend/active_scheduler/trigger"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const locateMaxTokens = 800 const locateMaxTokens = 800
@@ -24,7 +24,7 @@ const locateMaxTokens = 800
// 3. 不创建新工具系统,也不直接产出 preview。 // 3. 不创建新工具系统,也不直接产出 preview。
type Service struct { type Service struct {
reader ports.ScheduleReader reader ports.ScheduleReader
client *infrallm.Client client *llmservice.Client
clock func() time.Time clock func() time.Time
logger *log.Logger logger *log.Logger
} }
@@ -34,7 +34,7 @@ type Service struct {
// 说明: // 说明:
// 1. reader / client 允许为空,方便在模型不可用或读模型暂时不可用时直接回退 ask_user。 // 1. reader / client 允许为空,方便在模型不可用或读模型暂时不可用时直接回退 ask_user。
// 2. 真正的定位能力只在 Resolve 内部按需启用。 // 2. 真正的定位能力只在 Resolve 内部按需启用。
func NewService(reader ports.ScheduleReader, client *infrallm.Client) *Service { func NewService(reader ports.ScheduleReader, client *llmservice.Client) *Service {
return &Service{ return &Service{
reader: reader, reader: reader,
client: client, client: client,
@@ -101,15 +101,15 @@ func (s *Service) Resolve(ctx context.Context, req Request) (Result, error) {
return s.buildAskUserResult(req, "定位 prompt 构造失败"), nil return s.buildAskUserResult(req, "定位 prompt 构造失败"), nil
} }
messages := infrallm.BuildSystemUserMessages(strings.TrimSpace(locateSystemPrompt), nil, userPrompt) messages := llmservice.BuildSystemUserMessages(strings.TrimSpace(locateSystemPrompt), nil, userPrompt)
resp, rawResult, err := infrallm.GenerateJSON[llmResponse]( resp, rawResult, err := llmservice.GenerateJSON[llmResponse](
ctx, ctx,
s.client, s.client,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 0.1, Temperature: 0.1,
MaxTokens: locateMaxTokens, MaxTokens: locateMaxTokens,
Thinking: infrallm.ThinkingModeDisabled, Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{ Metadata: map[string]any{
"stage": "active_scheduler_feedback_locate", "stage": "active_scheduler_feedback_locate",
"candidate_count": len(candidates), "candidate_count": len(candidates),
@@ -340,7 +340,7 @@ func cloneAndTrimStrings(values []string) []string {
return result return result
} }
func truncateRaw(raw *infrallm.TextResult) string { func truncateRaw(raw *llmservice.TextResult) string {
if raw == nil { if raw == nil {
return "" return ""
} }

View File

@@ -10,7 +10,7 @@ import (
"time" "time"
"github.com/LoveLosita/smartflow/backend/active_scheduler/candidate" "github.com/LoveLosita/smartflow/backend/active_scheduler/candidate"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const selectionMaxTokens = 1200 const selectionMaxTokens = 1200
@@ -22,7 +22,7 @@ const selectionMaxTokens = 1200
// 2. LLM 失败、输出非法或选择不存在候选时,回退到后端 fallback candidate // 2. LLM 失败、输出非法或选择不存在候选时,回退到后端 fallback candidate
// 3. 不写 preview、不发通知、不修改正式日程。 // 3. 不写 preview、不发通知、不修改正式日程。
type Service struct { type Service struct {
client *infrallm.Client client *llmservice.Client
clock func() time.Time clock func() time.Time
logger *log.Logger logger *log.Logger
} }
@@ -33,7 +33,7 @@ type Service struct {
// 1. client 允许为空;为空时选择器只走确定性 fallback便于本地测试和降级 // 1. client 允许为空;为空时选择器只走确定性 fallback便于本地测试和降级
// 2. 真正的模型接入在 cmd/start.go 中完成aiHub.Pro -> llm.Client -> selection.Service // 2. 真正的模型接入在 cmd/start.go 中完成aiHub.Pro -> llm.Client -> selection.Service
// 3. 选择器本身不持有模型配置,只表达本业务域的 prompt 和结果校验。 // 3. 选择器本身不持有模型配置,只表达本业务域的 prompt 和结果校验。
func NewService(client *infrallm.Client) *Service { func NewService(client *llmservice.Client) *Service {
return &Service{ return &Service{
client: client, client: client,
clock: time.Now, clock: time.Now,
@@ -70,19 +70,19 @@ func (s *Service) Select(ctx context.Context, req SelectRequest) (Result, error)
return buildFallbackResult(req, "选择器 prompt 构造失败: "+err.Error()), nil return buildFallbackResult(req, "选择器 prompt 构造失败: "+err.Error()), nil
} }
messages := infrallm.BuildSystemUserMessages( messages := llmservice.BuildSystemUserMessages(
strings.TrimSpace(selectionSystemPrompt), strings.TrimSpace(selectionSystemPrompt),
nil, nil,
userPrompt, userPrompt,
) )
resp, rawResult, err := infrallm.GenerateJSON[llmSelectionResponse]( resp, rawResult, err := llmservice.GenerateJSON[llmSelectionResponse](
ctx, ctx,
s.client, s.client,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 0.1, Temperature: 0.1,
MaxTokens: selectionMaxTokens, MaxTokens: selectionMaxTokens,
Thinking: infrallm.ThinkingModeDisabled, Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{ Metadata: map[string]any{
"stage": "active_scheduler_select", "stage": "active_scheduler_select",
"candidate_count": len(req.Candidates), "candidate_count": len(req.Candidates),
@@ -275,7 +275,7 @@ func firstNonEmpty(values ...string) string {
return "" return ""
} }
func truncateRaw(raw *infrallm.TextResult) string { func truncateRaw(raw *llmservice.TextResult) string {
if raw == nil { if raw == nil {
return "" return ""
} }

View File

@@ -23,10 +23,7 @@ import (
"github.com/LoveLosita/smartflow/backend/api" "github.com/LoveLosita/smartflow/backend/api"
"github.com/LoveLosita/smartflow/backend/dao" "github.com/LoveLosita/smartflow/backend/dao"
kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
"github.com/LoveLosita/smartflow/backend/inits" "github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/memory" "github.com/LoveLosita/smartflow/backend/memory"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
@@ -44,6 +41,9 @@ import (
"github.com/LoveLosita/smartflow/backend/service" "github.com/LoveLosita/smartflow/backend/service"
agentsvcsvc "github.com/LoveLosita/smartflow/backend/service/agentsvc" agentsvcsvc "github.com/LoveLosita/smartflow/backend/service/agentsvc"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events" eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/go-redis/redis/v8" "github.com/go-redis/redis/v8"
"github.com/spf13/viper" "github.com/spf13/viper"
"gorm.io/gorm" "gorm.io/gorm"
@@ -167,17 +167,25 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
return nil, fmt.Errorf("failed to initialize Eino: %w", err) return nil, fmt.Errorf("failed to initialize Eino: %w", err)
} }
ragRuntime, err := buildRAGRuntime(ctx) llmService := llmservice.New(llmservice.Options{
AIHub: aiHub,
APIKey: os.Getenv("ARK_API_KEY"),
BaseURL: viper.GetString("agent.baseURL"),
CourseVisionModel: viper.GetString("courseImport.visionModel"),
})
ragService, err := buildRAGService(ctx)
if err != nil { if err != nil {
return nil, err return nil, err
} }
ragRuntime := ragService.Runtime()
memoryCfg := memory.LoadConfigFromViper() memoryCfg := memory.LoadConfigFromViper()
memoryObserver := memoryobserve.NewLoggerObserver(log.Default()) memoryObserver := memoryobserve.NewLoggerObserver(log.Default())
memoryMetrics := memoryobserve.NewMetricsRegistry() memoryMetrics := memoryobserve.NewMetricsRegistry()
memoryModule := memory.NewModuleWithObserve( memoryModule := memory.NewModuleWithObserve(
db, db,
infrallm.WrapArkClient(aiHub.Pro), llmService.ProClient(),
ragRuntime, ragRuntime,
memoryCfg, memoryCfg,
memory.ObserveDeps{ memory.ObserveDeps{
@@ -208,11 +216,11 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
userService := service.NewUserService(userRepo, cacheRepo) userService := service.NewUserService(userRepo, cacheRepo)
taskSv := service.NewTaskService(taskRepo, cacheRepo, eventBus) taskSv := service.NewTaskService(taskRepo, cacheRepo, eventBus)
taskSv.SetActiveScheduleDAO(manager.ActiveSchedule) taskSv.SetActiveScheduleDAO(manager.ActiveSchedule)
courseService := buildCourseService(courseRepo, scheduleRepo) courseService := buildCourseService(llmService, courseRepo, scheduleRepo)
taskClassService := service.NewTaskClassService(taskClassRepo, cacheRepo, scheduleRepo, manager) taskClassService := service.NewTaskClassService(taskClassRepo, cacheRepo, scheduleRepo, manager)
scheduleService := service.NewScheduleService(scheduleRepo, userRepo, taskClassRepo, manager, cacheRepo) scheduleService := service.NewScheduleService(scheduleRepo, userRepo, taskClassRepo, manager, cacheRepo)
agentService := service.NewAgentServiceWithSchedule( agentService := service.NewAgentServiceWithSchedule(
aiHub, llmService,
agentRepo, agentRepo,
taskRepo, taskRepo,
cacheRepo, cacheRepo,
@@ -251,7 +259,7 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
} }
// 1. 主动调度选择器单独复用 Pro 模型LLM 失败时由 selection 层显式回退到确定性候选; // 1. 主动调度选择器单独复用 Pro 模型LLM 失败时由 selection 层显式回退到确定性候选;
// 2. dry-run 与 selection 通过 graph runner 串起来,避免 trigger_pipeline 再拼第二套候选逻辑。 // 2. dry-run 与 selection 通过 graph runner 串起来,避免 trigger_pipeline 再拼第二套候选逻辑。
activeScheduleLLMClient := infrallm.WrapArkClient(aiHub.Pro) activeScheduleLLMClient := llmService.ProClient()
activeScheduleSelector := activesel.NewService(activeScheduleLLMClient) activeScheduleSelector := activesel.NewService(activeScheduleLLMClient)
activeScheduleFeedbackLocator := activefeedbacklocate.NewService(activeReaders, activeScheduleLLMClient) activeScheduleFeedbackLocator := activefeedbacklocate.NewService(activeReaders, activeScheduleLLMClient)
activeScheduleGraphRunner, err := activegraph.NewRunner(activeScheduleDryRun.AsGraphDryRunFunc(), activeScheduleSelector) activeScheduleGraphRunner, err := activegraph.NewRunner(activeScheduleDryRun.AsGraphDryRunFunc(), activeScheduleSelector)
@@ -323,26 +331,26 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
return runtime, nil return runtime, nil
} }
func buildRAGRuntime(ctx context.Context) (infrarag.Runtime, error) { func buildRAGService(ctx context.Context) (*ragservice.Service, error) {
ragCfg := ragconfig.LoadFromViper() ragCfg := ragconfig.LoadFromViper()
if !ragCfg.Enabled { if !ragCfg.Enabled {
log.Println("RAG runtime is disabled") log.Println("RAG service is disabled")
return nil, nil return ragservice.New(ragservice.Options{}), nil
} }
// 1. 当前项目尚未完成全局观测平台建设,这里先注入一层轻量 Observer // 1. 当前项目尚未完成全局观测平台建设,这里先注入一层轻量 Observer
// 2. RAG 内部只依赖 Observer 接口,后续若全项目统一日志/指标系统,只需替换这里; // 2. RAG 内部只依赖 Observer 接口,后续若全项目统一日志/指标系统,只需替换这里;
// 3. 这样可以避免 RAG 单独自建一套割裂的日志基础设施。 // 3. 这样可以避免 RAG 单独自建一套割裂的日志基础设施。
ragLogger := log.Default() ragLogger := log.Default()
ragRuntime, err := infrarag.NewRuntimeFromConfig(ctx, ragCfg, infrarag.FactoryDeps{ ragService, err := ragservice.NewFromConfig(ctx, ragCfg, ragservice.FactoryDeps{
Logger: ragLogger, Logger: ragLogger,
Observer: infrarag.NewLoggerObserver(ragLogger), Observer: ragservice.NewLoggerObserver(ragLogger),
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to initialize RAG runtime: %w", err) return nil, fmt.Errorf("failed to initialize RAG service: %w", err)
} }
log.Printf("RAG runtime initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider) log.Printf("RAG service initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
return ragRuntime, nil return ragService, nil
} }
func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, error) { func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, error) {
@@ -369,12 +377,8 @@ func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, erro
return eventBus, nil return eventBus, nil
} }
func buildCourseService(courseRepo *dao.CourseDAO, scheduleRepo *dao.ScheduleDAO) *service.CourseService { func buildCourseService(llmService *llmservice.Service, courseRepo *dao.CourseDAO, scheduleRepo *dao.ScheduleDAO) *service.CourseService {
courseImageResponsesClient := infrallm.NewArkResponsesClient( courseImageResponsesClient := llmService.CourseImageResponsesClient()
os.Getenv("ARK_API_KEY"),
viper.GetString("agent.baseURL"),
viper.GetString("courseImport.visionModel"),
)
return service.NewCourseService( return service.NewCourseService(
courseRepo, courseRepo,
scheduleRepo, scheduleRepo,
@@ -650,7 +654,7 @@ func containsString(values []string, target string) bool {
func configureAgentService( func configureAgentService(
agentService *service.AgentService, agentService *service.AgentService,
ragRuntime infrarag.Runtime, ragRuntime ragservice.Runtime,
agentRepo *dao.AgentDAO, agentRepo *dao.AgentDAO,
cacheRepo *dao.CacheDAO, cacheRepo *dao.CacheDAO,
taskRepo *dao.TaskDAO, taskRepo *dao.TaskDAO,

View File

@@ -5,8 +5,6 @@ import (
"errors" "errors"
"log" "log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorycleanup "github.com/LoveLosita/smartflow/backend/memory/cleanup" memorycleanup "github.com/LoveLosita/smartflow/backend/memory/cleanup"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe" memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
@@ -16,6 +14,8 @@ import (
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync" memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker" memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm" "gorm.io/gorm"
) )
@@ -28,8 +28,8 @@ import (
type Module struct { type Module struct {
db *gorm.DB db *gorm.DB
cfg memorymodel.Config cfg memorymodel.Config
llmClient *infrallm.Client llmClient *llmservice.Client
ragRuntime infrarag.Runtime ragRuntime ragservice.Runtime
observer memoryobserve.Observer observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder metrics memoryobserve.MetricsRecorder
@@ -64,15 +64,15 @@ func LoadConfigFromViper() memorymodel.Config {
// 2. llmClient 允许为 nil此时写入链路会自动回退到本地 fallback 抽取; // 2. llmClient 允许为 nil此时写入链路会自动回退到本地 fallback 抽取;
// 3. ragRuntime 允许为 nil此时读取/向量同步自动回退旧逻辑; // 3. ragRuntime 允许为 nil此时读取/向量同步自动回退旧逻辑;
// 4. 若后续接入统一 DI 容器,也应优先注册这个 Module而不是把内部 repo/service 继续向外泄漏。 // 4. 若后续接入统一 DI 容器,也应优先注册这个 Module而不是把内部 repo/service 继续向外泄漏。
func NewModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module { func NewModule(db *gorm.DB, llmClient *llmservice.Client, ragRuntime ragservice.Runtime, cfg memorymodel.Config) *Module {
return NewModuleWithObserve(db, llmClient, ragRuntime, cfg, ObserveDeps{}) return NewModuleWithObserve(db, llmClient, ragRuntime, cfg, ObserveDeps{})
} }
// NewModuleWithObserve 创建带观测依赖的 memory 模块门面。 // NewModuleWithObserve 创建带观测依赖的 memory 模块门面。
func NewModuleWithObserve( func NewModuleWithObserve(
db *gorm.DB, db *gorm.DB,
llmClient *infrallm.Client, llmClient *llmservice.Client,
ragRuntime infrarag.Runtime, ragRuntime ragservice.Runtime,
cfg memorymodel.Config, cfg memorymodel.Config,
deps ObserveDeps, deps ObserveDeps,
) *Module { ) *Module {
@@ -228,8 +228,8 @@ func (m *Module) StartWorker(ctx context.Context) {
func wireModule( func wireModule(
db *gorm.DB, db *gorm.DB,
llmClient *infrallm.Client, llmClient *llmservice.Client,
ragRuntime infrarag.Runtime, ragRuntime ragservice.Runtime,
cfg memorymodel.Config, cfg memorymodel.Config,
deps ObserveDeps, deps ObserveDeps,
) *Module { ) *Module {

View File

@@ -6,8 +6,8 @@ import (
"log" "log"
"strings" "strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const defaultDecisionCompareMaxTokens = 600 const defaultDecisionCompareMaxTokens = 600
@@ -19,13 +19,13 @@ const defaultDecisionCompareMaxTokens = 600
// 2. LLM 只输出 relation关系类型不输出 action不输出 target ID // 2. LLM 只输出 relation关系类型不输出 action不输出 target ID
// 3. LLM 调用失败时返回 error由上层决定是否视为 unrelated。 // 3. LLM 调用失败时返回 error由上层决定是否视为 unrelated。
type LLMDecisionOrchestrator struct { type LLMDecisionOrchestrator struct {
client *infrallm.Client client *llmservice.Client
cfg memorymodel.Config cfg memorymodel.Config
logger *log.Logger logger *log.Logger
} }
// NewLLMDecisionOrchestrator 构造决策比对编排器。 // NewLLMDecisionOrchestrator 构造决策比对编排器。
func NewLLMDecisionOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator { func NewLLMDecisionOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
return &LLMDecisionOrchestrator{ return &LLMDecisionOrchestrator{
client: client, client: client,
cfg: cfg, cfg: cfg,
@@ -52,14 +52,14 @@ func (o *LLMDecisionOrchestrator) Compare(
systemPrompt := buildDecisionCompareSystemPrompt() systemPrompt := buildDecisionCompareSystemPrompt()
userPrompt := buildDecisionCompareUserPrompt(fact, candidate) userPrompt := buildDecisionCompareUserPrompt(fact, candidate)
messages := infrallm.BuildSystemUserMessages(systemPrompt, nil, userPrompt) messages := llmservice.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
// 2. 调用 LLM 做结构化输出,温度用低值保证判断稳定。 // 2. 调用 LLM 做结构化输出,温度用低值保证判断稳定。
resp, _, err := infrallm.GenerateJSON[decisionCompareResponse]( resp, _, err := llmservice.GenerateJSON[decisionCompareResponse](
ctx, ctx,
o.client, o.client,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 0.1, Temperature: 0.1,
MaxTokens: defaultDecisionCompareMaxTokens, MaxTokens: defaultDecisionCompareMaxTokens,
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking), Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -127,9 +127,9 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
} }
// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。 // resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode { func resolveMemoryThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled { if enabled {
return infrallm.ThinkingModeEnabled return llmservice.ThinkingModeEnabled
} }
return infrallm.ThinkingModeDisabled return llmservice.ThinkingModeDisabled
} }

View File

@@ -7,9 +7,9 @@ import (
"log" "log"
"strings" "strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils" memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const ( const (
@@ -24,13 +24,13 @@ const (
// 2. 不负责落库,不负责任务状态机推进; // 2. 不负责落库,不负责任务状态机推进;
// 3. 当 LLM 不可用或输出异常时,回退到保守的本地抽取,保证链路不完全断。 // 3. 当 LLM 不可用或输出异常时,回退到保守的本地抽取,保证链路不完全断。
type LLMWriteOrchestrator struct { type LLMWriteOrchestrator struct {
client *infrallm.Client client *llmservice.Client
cfg memorymodel.Config cfg memorymodel.Config
logger *log.Logger logger *log.Logger
} }
// NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。 // NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。
func NewLLMWriteOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMWriteOrchestrator { func NewLLMWriteOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
return &LLMWriteOrchestrator{ return &LLMWriteOrchestrator{
client: client, client: client,
cfg: cfg, cfg: cfg,
@@ -54,17 +54,17 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
return fallbackNormalizedFacts(payload), nil return fallbackNormalizedFacts(payload), nil
} }
messages := infrallm.BuildSystemUserMessages( messages := llmservice.BuildSystemUserMessages(
buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt), buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt),
nil, nil,
buildMemoryExtractUserPrompt(payload), buildMemoryExtractUserPrompt(payload),
) )
resp, rawResult, err := infrallm.GenerateJSON[memoryExtractResponse]( resp, rawResult, err := llmservice.GenerateJSON[memoryExtractResponse](
ctx, ctx,
o.client, o.client,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: clampTemperature(o.cfg.LLMTemperature), Temperature: clampTemperature(o.cfg.LLMTemperature),
MaxTokens: defaultMemoryExtractMaxTokens, MaxTokens: defaultMemoryExtractMaxTokens,
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking), Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -319,7 +319,7 @@ func isSkipIntent(intent string) bool {
} }
} }
func truncateForLog(raw *infrallm.TextResult) string { func truncateForLog(raw *llmservice.TextResult) string {
if raw == nil { if raw == nil {
return "" return ""
} }

View File

@@ -3,8 +3,8 @@ package service
import ( import (
"time" "time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
) )
// buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。 // buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。
@@ -53,8 +53,8 @@ func buildReadScopedRAGRequest(
req memorymodel.RetrieveRequest, req memorymodel.RetrieveRequest,
topK int, topK int,
threshold float64, threshold float64,
) infrarag.MemoryRetrieveRequest { ) ragservice.MemoryRetrieveRequest {
return infrarag.MemoryRetrieveRequest{ return ragservice.MemoryRetrieveRequest{
Query: req.Query, Query: req.Query,
TopK: topK, TopK: topK,
Threshold: threshold, Threshold: threshold,

View File

@@ -8,12 +8,12 @@ import (
"strings" "strings"
"time" "time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe" memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo" memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils" memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
) )
const ( const (
@@ -30,7 +30,7 @@ const (
type ReadService struct { type ReadService struct {
itemRepo *memoryrepo.ItemRepo itemRepo *memoryrepo.ItemRepo
settingsRepo *memoryrepo.SettingsRepo settingsRepo *memoryrepo.SettingsRepo
ragRuntime infrarag.Runtime ragRuntime ragservice.Runtime
cfg memorymodel.Config cfg memorymodel.Config
observer memoryobserve.Observer observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder metrics memoryobserve.MetricsRecorder
@@ -57,7 +57,7 @@ type semanticRetrieveTelemetry struct {
func NewReadService( func NewReadService(
itemRepo *memoryrepo.ItemRepo, itemRepo *memoryrepo.ItemRepo,
settingsRepo *memoryrepo.SettingsRepo, settingsRepo *memoryrepo.SettingsRepo,
ragRuntime infrarag.Runtime, ragRuntime ragservice.Runtime,
cfg memorymodel.Config, cfg memorymodel.Config,
observer memoryobserve.Observer, observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder, metrics memoryobserve.MetricsRecorder,
@@ -347,7 +347,7 @@ func collectMemoryIDs(items []model.MemoryItem) []int64 {
return ids return ids
} }
func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) { func buildMemoryDTOFromRetrieveHit(hit ragservice.RetrieveHit) (memorymodel.ItemDTO, int64) {
memoryID := parseMemoryIDFromDocumentID(hit.DocumentID) memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
metadata := hit.Metadata metadata := hit.Metadata
content := strings.TrimSpace(hit.Text) content := strings.TrimSpace(hit.Text)

View File

@@ -6,10 +6,10 @@ import (
"log" "log"
"strings" "strings"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe" memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo" memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
) )
// Syncer 负责 memory_items 与向量库之间的最小桥接。 // Syncer 负责 memory_items 与向量库之间的最小桥接。
@@ -19,7 +19,7 @@ import (
// 2. 不负责决定哪些记忆该写、该删、该恢复,这些决策仍由上游 service/worker/cleanup 控制; // 2. 不负责决定哪些记忆该写、该删、该恢复,这些决策仍由上游 service/worker/cleanup 控制;
// 3. 同步失败时只回写 vector_status 并打观测,不反向回滚业务事务,避免把在线链路拖成强依赖。 // 3. 同步失败时只回写 vector_status 并打观测,不反向回滚业务事务,避免把在线链路拖成强依赖。
type Syncer struct { type Syncer struct {
ragRuntime infrarag.Runtime ragRuntime ragservice.Runtime
itemRepo *memoryrepo.ItemRepo itemRepo *memoryrepo.ItemRepo
observer memoryobserve.Observer observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder metrics memoryobserve.MetricsRecorder
@@ -27,7 +27,7 @@ type Syncer struct {
} }
func NewSyncer( func NewSyncer(
ragRuntime infrarag.Runtime, ragRuntime ragservice.Runtime,
itemRepo *memoryrepo.ItemRepo, itemRepo *memoryrepo.ItemRepo,
observer memoryobserve.Observer, observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder, metrics memoryobserve.MetricsRecorder,
@@ -53,9 +53,9 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
return return
} }
requestItems := make([]infrarag.MemoryIngestItem, 0, len(items)) requestItems := make([]ragservice.MemoryIngestItem, 0, len(items))
for _, item := range items { for _, item := range items {
requestItems = append(requestItems, infrarag.MemoryIngestItem{ requestItems = append(requestItems, ragservice.MemoryIngestItem{
MemoryID: item.ID, MemoryID: item.ID,
UserID: item.UserID, UserID: item.UserID,
ConversationID: strValue(item.ConversationID), ConversationID: strValue(item.ConversationID),
@@ -76,7 +76,7 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
result, err := s.ragRuntime.IngestMemory(memoryobserve.WithFields(ctx, map[string]any{ result, err := s.ragRuntime.IngestMemory(memoryobserve.WithFields(ctx, map[string]any{
"trace_id": traceID, "trace_id": traceID,
}), infrarag.MemoryIngestRequest{ }), ragservice.MemoryIngestRequest{
TraceID: traceID, TraceID: traceID,
Action: "add", Action: "add",
Items: requestItems, Items: requestItems,

View File

@@ -4,11 +4,11 @@ import (
"context" "context"
"fmt" "fmt"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo" memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils" memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm" "gorm.io/gorm"
) )
@@ -192,7 +192,7 @@ func (r *Runner) recallCandidates(
) candidateRecallResult { ) candidateRecallResult {
// 1. 优先使用 Milvus 向量语义召回。 // 1. 优先使用 Milvus 向量语义召回。
if r.ragRuntime != nil { if r.ragRuntime != nil {
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{ retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, ragservice.MemoryRetrieveRequest{
Query: fact.Content, Query: fact.Content,
TopK: r.cfg.DecisionCandidateTopK, TopK: r.cfg.DecisionCandidateTopK,
Threshold: r.cfg.DecisionCandidateMinScore, Threshold: r.cfg.DecisionCandidateMinScore,
@@ -235,7 +235,7 @@ func (r *Runner) recallCandidates(
// 1. 从 DocumentID格式 memory:{id})解析出 mysql_id // 1. 从 DocumentID格式 memory:{id})解析出 mysql_id
// 2. 从 metadata 提取 title 和 memory_type // 2. 从 metadata 提取 title 和 memory_type
// 3. 跳过无法解析 DocumentID 的结果。 // 3. 跳过无法解析 DocumentID 的结果。
func (r *Runner) buildCandidatesFromRAG(hits []infrarag.RetrieveHit) []memorymodel.CandidateSnapshot { func (r *Runner) buildCandidatesFromRAG(hits []ragservice.RetrieveHit) []memorymodel.CandidateSnapshot {
candidates := make([]memorymodel.CandidateSnapshot, 0, len(hits)) candidates := make([]memorymodel.CandidateSnapshot, 0, len(hits))
for _, hit := range hits { for _, hit := range hits {
memoryID := parseMemoryID(hit.DocumentID) memoryID := parseMemoryID(hit.DocumentID)

View File

@@ -9,7 +9,6 @@ import (
"strings" "strings"
"time" "time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe" memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator" memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator"
@@ -17,6 +16,7 @@ import (
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils" memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync" memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm" "gorm.io/gorm"
) )
@@ -41,7 +41,7 @@ type Runner struct {
auditRepo *memoryrepo.AuditRepo auditRepo *memoryrepo.AuditRepo
settingsRepo *memoryrepo.SettingsRepo settingsRepo *memoryrepo.SettingsRepo
extractor Extractor extractor Extractor
ragRuntime infrarag.Runtime ragRuntime ragservice.Runtime
logger *log.Logger logger *log.Logger
vectorSyncer *memoryvectorsync.Syncer vectorSyncer *memoryvectorsync.Syncer
observer memoryobserve.Observer observer memoryobserve.Observer
@@ -63,7 +63,7 @@ func NewRunner(
auditRepo *memoryrepo.AuditRepo, auditRepo *memoryrepo.AuditRepo,
settingsRepo *memoryrepo.SettingsRepo, settingsRepo *memoryrepo.SettingsRepo,
extractor Extractor, extractor Extractor,
ragRuntime infrarag.Runtime, ragRuntime ragservice.Runtime,
cfg memorymodel.Config, cfg memorymodel.Config,
decisionOrchestrator *memoryorchestrator.LLMDecisionOrchestrator, decisionOrchestrator *memoryorchestrator.LLMDecisionOrchestrator,
vectorSyncer *memoryvectorsync.Syncer, vectorSyncer *memoryvectorsync.Syncer,

View File

@@ -5,10 +5,10 @@ import (
"strings" "strings"
"time" "time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule" schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -71,10 +71,10 @@ type PersistVisibleMessageFunc func(ctx context.Context, state *CommonState, msg
// 2. Chat/Plan/Execute/Deliver 允许分别挂不同 client但也允许先复用同一个 client // 2. Chat/Plan/Execute/Deliver 允许分别挂不同 client但也允许先复用同一个 client
// 3. ChunkEmitter 统一承接阶段提示、正文、工具事件、确认请求等 SSE 输出。 // 3. ChunkEmitter 统一承接阶段提示、正文、工具事件、确认请求等 SSE 输出。
type AgentGraphDeps struct { type AgentGraphDeps struct {
ChatClient *infrallm.Client ChatClient *llmservice.Client
PlanClient *infrallm.Client PlanClient *llmservice.Client
ExecuteClient *infrallm.Client ExecuteClient *llmservice.Client
DeliverClient *infrallm.Client DeliverClient *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter ChunkEmitter *newagentstream.ChunkEmitter
StateStore AgentStateStore StateStore AgentStateStore
ToolRegistry *newagenttools.ToolRegistry ToolRegistry *newagenttools.ToolRegistry
@@ -141,7 +141,7 @@ func (d *AgentGraphDeps) EnsureChunkEmitter() *newagentstream.ChunkEmitter {
} }
// ResolveChatClient 返回 chat 阶段可用的模型客户端。 // ResolveChatClient 返回 chat 阶段可用的模型客户端。
func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client { func (d *AgentGraphDeps) ResolveChatClient() *llmservice.Client {
if d == nil { if d == nil {
return nil return nil
} }
@@ -154,7 +154,7 @@ func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client {
// 1. 优先使用显式注入的 PlanClient // 1. 优先使用显式注入的 PlanClient
// 2. 若未单独注入,则回退到 ChatClient // 2. 若未单独注入,则回退到 ChatClient
// 3. 这样在骨架期可先用一套 client 跑通,再按需拆分 strategist / worker。 // 3. 这样在骨架期可先用一套 client 跑通,再按需拆分 strategist / worker。
func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client { func (d *AgentGraphDeps) ResolvePlanClient() *llmservice.Client {
if d == nil { if d == nil {
return nil return nil
} }
@@ -165,7 +165,7 @@ func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client {
} }
// ResolveExecuteClient 返回 execute 阶段可用的模型客户端。 // ResolveExecuteClient 返回 execute 阶段可用的模型客户端。
func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client { func (d *AgentGraphDeps) ResolveExecuteClient() *llmservice.Client {
if d == nil { if d == nil {
return nil return nil
} }
@@ -179,7 +179,7 @@ func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client {
} }
// ResolveDeliverClient 返回 deliver 阶段可用的模型客户端。 // ResolveDeliverClient 返回 deliver 阶段可用的模型客户端。
func (d *AgentGraphDeps) ResolveDeliverClient() *infrallm.Client { func (d *AgentGraphDeps) ResolveDeliverClient() *llmservice.Client {
if d == nil { if d == nil {
return nil return nil
} }

View File

@@ -11,11 +11,11 @@ import (
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
"github.com/google/uuid" "github.com/google/uuid"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router" newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const ( const (
@@ -50,7 +50,7 @@ type ChatNodeInput struct {
UserInput string UserInput string
ConfirmAction string ConfirmAction string
ResumeInteractionID string ResumeInteractionID string
Client *infrallm.Client Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter ChunkEmitter *newagentstream.ChunkEmitter
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化 CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -107,9 +107,9 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
}) })
logNodeLLMContext(chatStageName, "routing", flowState, messages) logNodeLLMContext(chatStageName, "routing", flowState, messages)
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{ reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.7, Temperature: 0.7,
Thinking: infrallm.ThinkingModeDisabled, Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{ Metadata: map[string]any{
"stage": chatStageName, "stage": chatStageName,
"phase": "routing", "phase": "routing",
@@ -172,7 +172,7 @@ func isExecuteLoopClosedMarker(msg *schema.Message) bool {
// 3. 控制码解析超时或流异常结束 → fallback 到 plan。 // 3. 控制码解析超时或流异常结束 → fallback 到 plan。
func streamAndDispatch( func streamAndDispatch(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
parser *newagentrouter.StreamRouteParser, parser *newagentrouter.StreamRouteParser,
input ChatNodeInput, input ChatNodeInput,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
@@ -292,7 +292,7 @@ func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisi
// 2. thinking=true关闭路由流发起第二次 thinking 流式调用。 // 2. thinking=true关闭路由流发起第二次 thinking 流式调用。
func handleDirectReplyStream( func handleDirectReplyStream(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
input ChatNodeInput, input ChatNodeInput,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext, conversationContext *newagentmodel.ConversationContext,
@@ -309,7 +309,7 @@ func handleDirectReplyStream(
// handleThinkingReplyStream 处理需要思考的回复:关闭路由流 → 第二次 thinking 流式调用。 // handleThinkingReplyStream 处理需要思考的回复:关闭路由流 → 第二次 thinking 流式调用。
func handleThinkingReplyStream( func handleThinkingReplyStream(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
input ChatNodeInput, input ChatNodeInput,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext, conversationContext *newagentmodel.ConversationContext,
@@ -327,10 +327,10 @@ func handleThinkingReplyStream(
StatusBlockID: chatStatusBlockID, StatusBlockID: chatStatusBlockID,
}) })
logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages) logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages)
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{ deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
Temperature: 0.5, Temperature: 0.5,
MaxTokens: 2000, MaxTokens: 2000,
Thinking: infrallm.ThinkingModeEnabled, Thinking: llmservice.ThinkingModeEnabled,
Metadata: map[string]any{ Metadata: map[string]any{
"stage": chatStageName, "stage": chatStageName,
"phase": "direct_reply_thinking", "phase": "direct_reply_thinking",
@@ -363,7 +363,7 @@ func handleThinkingReplyStream(
// handleDirectReplyContinueStream 处理无思考的闲聊:同一流续传。 // handleDirectReplyContinueStream 处理无思考的闲聊:同一流续传。
func handleDirectReplyContinueStream( func handleDirectReplyContinueStream(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
input ChatNodeInput, input ChatNodeInput,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext, conversationContext *newagentmodel.ConversationContext,
@@ -419,7 +419,7 @@ func handleDirectReplyContinueStream(
// 2. 推送轻量状态通知; // 2. 推送轻量状态通知;
// 3. 设置流程状态,进入 Execute 或 RoughBuild。 // 3. 设置流程状态,进入 Execute 或 RoughBuild。
func handleRouteExecuteStream( func handleRouteExecuteStream(
reader infrallm.StreamReader, reader llmservice.StreamReader,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
flowState *newagentmodel.CommonState, flowState *newagentmodel.CommonState,
decision *newagentmodel.ChatRoutingDecision, decision *newagentmodel.ChatRoutingDecision,
@@ -674,7 +674,7 @@ func isExplicitNoRefineAfterRoughBuildRequest(userInput string) bool {
// 4. 完整回复写入 history。 // 4. 完整回复写入 history。
func handleDeepAnswerStream( func handleDeepAnswerStream(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
input ChatNodeInput, input ChatNodeInput,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext, conversationContext *newagentmodel.ConversationContext,
@@ -685,9 +685,9 @@ func handleDeepAnswerStream(
_ = reader.Close() _ = reader.Close()
// 2. 第二次流式调用。 // 2. 第二次流式调用。
thinkingOpt := infrallm.ThinkingModeDisabled thinkingOpt := llmservice.ThinkingModeDisabled
if effectiveThinking { if effectiveThinking {
thinkingOpt = infrallm.ThinkingModeEnabled thinkingOpt = llmservice.ThinkingModeEnabled
} }
deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput) deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput)
deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{ deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{
@@ -699,7 +699,7 @@ func handleDeepAnswerStream(
StatusBlockID: chatStatusBlockID, StatusBlockID: chatStatusBlockID,
}) })
logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages) logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages)
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{ deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
Temperature: 0.5, Temperature: 0.5,
MaxTokens: 2000, MaxTokens: 2000,
Thinking: thinkingOpt, Thinking: thinkingOpt,
@@ -741,7 +741,7 @@ func handleDeepAnswerStream(
// handleRoutePlanStream 处理规划路由:推送状态确认 → 设 PhasePlanning。 // handleRoutePlanStream 处理规划路由:推送状态确认 → 设 PhasePlanning。
func handleRoutePlanStream( func handleRoutePlanStream(
reader infrallm.StreamReader, reader llmservice.StreamReader,
emitter *newagentstream.ChunkEmitter, emitter *newagentstream.ChunkEmitter,
flowState *newagentmodel.CommonState, flowState *newagentmodel.CommonState,
effectiveThinking bool, effectiveThinking bool,

View File

@@ -9,10 +9,10 @@ import (
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const ( const (
@@ -31,7 +31,7 @@ const (
type DeliverNodeInput struct { type DeliverNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext ConversationContext *newagentmodel.ConversationContext
Client *infrallm.Client Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter ChunkEmitter *newagentstream.ChunkEmitter
ThinkingEnabled bool // 是否开启 thinking由 config.yaml 的 agent.thinking.deliver 注入 ThinkingEnabled bool // 是否开启 thinking由 config.yaml 的 agent.thinking.deliver 注入
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化 CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
@@ -128,7 +128,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
// - streamedtrue 表示文本已通过 EmitStreamAssistantText 真流式推送到前端,调用方无需再伪流式。 // - streamedtrue 表示文本已通过 EmitStreamAssistantText 真流式推送到前端,调用方无需再伪流式。
func generateDeliverSummary( func generateDeliverSummary(
ctx context.Context, ctx context.Context,
client *infrallm.Client, client *llmservice.Client,
flowState *newagentmodel.CommonState, flowState *newagentmodel.CommonState,
conversationContext *newagentmodel.ConversationContext, conversationContext *newagentmodel.ConversationContext,
thinkingEnabled bool, thinkingEnabled bool,
@@ -162,7 +162,7 @@ func generateDeliverSummary(
reader, err := client.Stream( reader, err := client.Stream(
ctx, ctx,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 0.5, Temperature: 0.5,
MaxTokens: 800, MaxTokens: 800,
Thinking: resolveThinkingMode(thinkingEnabled), Thinking: resolveThinkingMode(thinkingEnabled),

View File

@@ -8,11 +8,11 @@ import (
"log" "log"
"strings" "strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router" newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
"github.com/google/uuid" "github.com/google/uuid"
) )
@@ -38,7 +38,7 @@ func collectExecuteDecisionFromLLM(
reader, err := input.Client.Stream( reader, err := input.Client.Stream(
ctx, ctx,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 1.0, Temperature: 1.0,
MaxTokens: 131072, MaxTokens: 131072,
Thinking: newagentshared.ResolveThinkingMode(input.ThinkingEnabled), Thinking: newagentshared.ResolveThinkingMode(input.ThinkingEnabled),
@@ -123,7 +123,7 @@ func collectExecuteDecisionFromLLM(
return nil, nil return nil, nil
} }
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON) decision, parseErr := llmservice.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
if parseErr != nil { if parseErr != nil {
log.Printf( log.Printf(
"[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s", "[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s",

View File

@@ -5,12 +5,12 @@ import (
"fmt" "fmt"
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared" newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule" "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const ( const (
@@ -29,7 +29,7 @@ type ExecuteNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext ConversationContext *newagentmodel.ConversationContext
UserInput string UserInput string
Client *infrallm.Client Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter ChunkEmitter *newagentstream.ChunkEmitter
ResumeNode string ResumeNode string
ToolRegistry *newagenttools.ToolRegistry ToolRegistry *newagenttools.ToolRegistry

View File

@@ -10,11 +10,11 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router" newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -34,7 +34,7 @@ type PlanNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext ConversationContext *newagentmodel.ConversationContext
UserInput string UserInput string
Client *infrallm.Client Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter ChunkEmitter *newagentstream.ChunkEmitter
ResumeNode string ResumeNode string
AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点 AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点
@@ -87,7 +87,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
reader, err := input.Client.Stream( reader, err := input.Client.Stream(
ctx, ctx,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 0.2, Temperature: 0.2,
// 显式设置上限,避免依赖框架默认值(默认 4096导致长决策被截断。 // 显式设置上限,避免依赖框架默认值(默认 4096导致长决策被截断。
// 注意:当前模型接口 max_tokens 上限为 131072超过会 400。 // 注意:当前模型接口 max_tokens 上限为 131072超过会 400。
@@ -149,7 +149,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
return fmt.Errorf("规划解析失败,原始输出=%s", result.RawBuffer) return fmt.Errorf("规划解析失败,原始输出=%s", result.RawBuffer)
} }
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON) decision, parseErr := llmservice.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
if parseErr != nil { if parseErr != nil {
return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer) return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer)
} }
@@ -390,9 +390,9 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。 // resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
// 供 plan / execute / deliver 节点统一使用。 // 供 plan / execute / deliver 节点统一使用。
func resolveThinkingMode(enabled bool) infrallm.ThinkingMode { func resolveThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled { if enabled {
return infrallm.ThinkingModeEnabled return llmservice.ThinkingModeEnabled
} }
return infrallm.ThinkingModeDisabled return llmservice.ThinkingModeDisabled
} }

View File

@@ -8,13 +8,13 @@ import (
"strings" "strings"
"time" "time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
taskmodel "github.com/LoveLosita/smartflow/backend/model" taskmodel "github.com/LoveLosita/smartflow/backend/model"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router" newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared" newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -30,7 +30,7 @@ type QuickTaskNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext ConversationContext *newagentmodel.ConversationContext
UserInput string UserInput string
Client *infrallm.Client Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter ChunkEmitter *newagentstream.ChunkEmitter
QuickTaskDeps newagentmodel.QuickTaskDeps QuickTaskDeps newagentmodel.QuickTaskDeps
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -77,7 +77,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput) messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput)
// 2. 真流式调用 LLM。 // 2. 真流式调用 LLM。
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{ reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.3, Temperature: 0.3,
MaxTokens: 512, MaxTokens: 512,
}) })
@@ -130,7 +130,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
// 解析 JSON。 // 解析 JSON。
log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON) log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
var parseErr error var parseErr error
decision, parseErr = infrallm.ParseJSONObject[quickTaskDecision](result.DecisionJSON) decision, parseErr = llmservice.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
if parseErr != nil { if parseErr != nil {
log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON) log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
if result.RawBuffer != "" { if result.RawBuffer != "" {

View File

@@ -6,11 +6,11 @@ import (
"fmt" "fmt"
"log" "log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/LoveLosita/smartflow/backend/pkg" "github.com/LoveLosita/smartflow/backend/pkg"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -22,7 +22,7 @@ import (
// 3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。 // 3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。
type UnifiedCompactInput struct { type UnifiedCompactInput struct {
// Client 用于调用 LLM 压缩 msg1/msg2。 // Client 用于调用 LLM 压缩 msg1/msg2。
Client *infrallm.Client Client *llmservice.Client
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。 // CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
CompactionStore newagentmodel.CompactionStore CompactionStore newagentmodel.CompactionStore
// FlowState 提供 userID / chatID / roundUsed 等定位信息。 // FlowState 提供 userID / chatID / roundUsed 等定位信息。

View File

@@ -4,7 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -24,7 +24,7 @@ const compactMsg1SystemPrompt = `你是一个对话压缩助手。你的任务
// existingSummary 不为空时表示已有旧摘要,需要合并压缩。 // existingSummary 不为空时表示已有旧摘要,需要合并压缩。
func CompactMsg1( func CompactMsg1(
ctx context.Context, ctx context.Context,
client *infrallm.Client, client *llmservice.Client,
historyText string, historyText string,
existingSummary string, existingSummary string,
) (string, error) { ) (string, error) {
@@ -49,7 +49,7 @@ func CompactMsg1(
schema.UserMessage(userContent), schema.UserMessage(userContent),
} }
result, err := client.GenerateText(ctx, messages, infrallm.GenerateOptions{ result, err := client.GenerateText(ctx, messages, llmservice.GenerateOptions{
MaxTokens: 4000, MaxTokens: 4000,
}) })
if err != nil { if err != nil {

View File

@@ -4,7 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -23,7 +23,7 @@ const compactMsg2SystemPrompt = `你是一个执行记录压缩助手。你的
// recentText 是保留的近期记录原文,不参与压缩。 // recentText 是保留的近期记录原文,不参与压缩。
func CompactMsg2( func CompactMsg2(
ctx context.Context, ctx context.Context,
client *infrallm.Client, client *llmservice.Client,
earlyLoopText string, earlyLoopText string,
) (string, error) { ) (string, error) {
userContent := fmt.Sprintf(`早期的 ReAct 执行记录: userContent := fmt.Sprintf(`早期的 ReAct 执行记录:
@@ -36,7 +36,7 @@ func CompactMsg2(
schema.UserMessage(userContent), schema.UserMessage(userContent),
} }
result, err := client.GenerateText(ctx, messages, infrallm.GenerateOptions{ result, err := client.GenerateText(ctx, messages, llmservice.GenerateOptions{
MaxTokens: 4000, MaxTokens: 4000,
}) })
if err != nil { if err != nil {

View File

@@ -26,7 +26,7 @@ var (
// StreamDecisionResult 描述解析器的最终输出状态。 // StreamDecisionResult 描述解析器的最终输出状态。
type StreamDecisionResult struct { type StreamDecisionResult struct {
// DecisionJSON 是标签内提取的完整 JSON 字符串。 // DecisionJSON 是标签内提取的完整 JSON 字符串。
// 调用方应使用 infrallm.ParseJSONObject[T] 将其解析为具体决策类型。 // 调用方应使用 llmservice.ParseJSONObject[T] 将其解析为具体决策类型。
DecisionJSON string DecisionJSON string
// BeforeText 是 <SMARTFLOW_DECISION> 标签之前的自然语言前言。 // BeforeText 是 <SMARTFLOW_DECISION> 标签之前的自然语言前言。
@@ -179,7 +179,7 @@ func (p *StreamDecisionParser) Result() *StreamDecisionResult {
} }
// extractJSONFromTag 从标签内文本中提取第一个完整 JSON 对象。 // extractJSONFromTag 从标签内文本中提取第一个完整 JSON 对象。
// 复用括号计数逻辑,与 infrallm.ExtractJSONObject 一致。 // 复用括号计数逻辑,与 llmservice.ExtractJSONObject 一致。
func extractJSONFromTag(text string) string { func extractJSONFromTag(text string) string {
clean := strings.TrimSpace(text) clean := strings.TrimSpace(text)
if clean == "" { if clean == "" {

View File

@@ -1,10 +1,10 @@
package newagentshared package newagentshared
import infrallm "github.com/LoveLosita/smartflow/backend/infra/llm" import llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
func ResolveThinkingMode(enabled bool) infrallm.ThinkingMode { func ResolveThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled { if enabled {
return infrallm.ThinkingModeEnabled return llmservice.ThinkingModeEnabled
} }
return infrallm.ThinkingModeDisabled return llmservice.ThinkingModeDisabled
} }

View File

@@ -6,11 +6,11 @@ import (
"fmt" "fmt"
"log" "log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/LoveLosita/smartflow/backend/pkg" "github.com/LoveLosita/smartflow/backend/pkg"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
@@ -22,7 +22,7 @@ import (
// 3. StageName 和 StatusBlockID 用于区分日志来源与 SSE 状态推送目标。 // 3. StageName 和 StatusBlockID 用于区分日志来源与 SSE 状态推送目标。
type UnifiedCompactInput struct { type UnifiedCompactInput struct {
// Client 用于调用 LLM 压缩 msg1/msg2。 // Client 用于调用 LLM 压缩 msg1/msg2。
Client *infrallm.Client Client *llmservice.Client
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。 // CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
CompactionStore newagentmodel.CompactionStore CompactionStore newagentmodel.CompactionStore
// FlowState 提供 userID / conversationID / roundUsed 等定位信息。 // FlowState 提供 userID / conversationID / roundUsed 等定位信息。

View File

@@ -8,7 +8,7 @@ import (
"sync" "sync"
"time" "time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
// PayloadEmitter 是真正向外层 SSE 管道写 chunk 的最小接口。 // PayloadEmitter 是真正向外层 SSE 管道写 chunk 的最小接口。
@@ -540,7 +540,7 @@ func (e *ChunkEmitter) EmitDone() error {
// 3. 不负责打开/关闭 StreamReader调用方负责生命周期管理。 // 3. 不负责打开/关闭 StreamReader调用方负责生命周期管理。
func (e *ChunkEmitter) EmitStreamAssistantText( func (e *ChunkEmitter) EmitStreamAssistantText(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
blockID, stage string, blockID, stage string,
) (string, error) { ) (string, error) {
if e == nil || reader == nil { if e == nil || reader == nil {
@@ -598,7 +598,7 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
// 用于只需展示思考过程而无需展示正文的场景。 // 用于只需展示思考过程而无需展示正文的场景。
func (e *ChunkEmitter) EmitStreamReasoningText( func (e *ChunkEmitter) EmitStreamReasoningText(
ctx context.Context, ctx context.Context,
reader infrallm.StreamReader, reader llmservice.StreamReader,
blockID, stage string, blockID, stage string,
) (string, error) { ) (string, error) {
if e == nil || reader == nil { if e == nil || reader == nil {

View File

@@ -5,9 +5,9 @@ import (
"sort" "sort"
"strings" "strings"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule" "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/web" "github.com/LoveLosita/smartflow/backend/newAgent/tools/web"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
) )
// ToolHandler 约定所有工具的统一执行签名。 // ToolHandler 约定所有工具的统一执行签名。
@@ -32,7 +32,7 @@ type ToolSchemaEntry struct {
// 2. 某些依赖即便暂未使用也允许保留,避免业务层重新到处 new // 2. 某些依赖即便暂未使用也允许保留,避免业务层重新到处 new
// 3. 具体依赖缺失时由对应工具自行返回结构化失败结果。 // 3. 具体依赖缺失时由对应工具自行返回结构化失败结果。
type DefaultRegistryDeps struct { type DefaultRegistryDeps struct {
RAGRuntime infrarag.Runtime RAGRuntime ragservice.Runtime
// WebSearchProvider 为 nil 时web_search / web_fetch 仍会注册, // WebSearchProvider 为 nil 时web_search / web_fetch 仍会注册,
// 但 handler 会返回“暂未启用”的只读 observation不阻断主流程。 // 但 handler 会返回“暂未启用”的只读 observation不阻断主流程。

View File

@@ -3,8 +3,8 @@ package service
import ( import (
"github.com/LoveLosita/smartflow/backend/dao" "github.com/LoveLosita/smartflow/backend/dao"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/service/agentsvc" "github.com/LoveLosita/smartflow/backend/service/agentsvc"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
// AgentService 是 service 层对 agentsvc.AgentService 的兼容别名。 // AgentService 是 service 层对 agentsvc.AgentService 的兼容别名。
@@ -20,7 +20,7 @@ type AgentService = agentsvc.AgentService
// 2) 主动调度 session DAO 也在这里显式透传,避免聊天入口再去回查全局单例; // 2) 主动调度 session DAO 也在这里显式透传,避免聊天入口再去回查全局单例;
// 3) 真实构造逻辑已下沉到 service/agentsvc 包。 // 3) 真实构造逻辑已下沉到 service/agentsvc 包。
func NewAgentService( func NewAgentService(
aiHub *inits.AIHub, llmService *llmservice.Service,
repo *dao.AgentDAO, repo *dao.AgentDAO,
taskRepo *dao.TaskDAO, taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO, cacheDAO *dao.CacheDAO,
@@ -29,7 +29,7 @@ func NewAgentService(
activeSessionDAO *dao.ActiveScheduleSessionDAO, activeSessionDAO *dao.ActiveScheduleSessionDAO,
eventPublisher outboxinfra.EventPublisher, eventPublisher outboxinfra.EventPublisher,
) *AgentService { ) *AgentService {
return agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher) return agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
} }
// NewAgentServiceWithSchedule 在基础 AgentService 上注入排程依赖。 // NewAgentServiceWithSchedule 在基础 AgentService 上注入排程依赖。
@@ -39,7 +39,7 @@ func NewAgentService(
// 2) 排程依赖为可选:未注入时排程路由自动回退到普通聊天; // 2) 排程依赖为可选:未注入时排程路由自动回退到普通聊天;
// 3) 主动调度 session DAO 仍沿用统一构造注入,避免排程分支自己拼装仓储。 // 3) 主动调度 session DAO 仍沿用统一构造注入,避免排程分支自己拼装仓储。
func NewAgentServiceWithSchedule( func NewAgentServiceWithSchedule(
aiHub *inits.AIHub, llmService *llmservice.Service,
repo *dao.AgentDAO, repo *dao.AgentDAO,
taskRepo *dao.TaskDAO, taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO, cacheDAO *dao.CacheDAO,
@@ -50,7 +50,7 @@ func NewAgentServiceWithSchedule(
scheduleSvc *ScheduleService, scheduleSvc *ScheduleService,
taskSvc *TaskService, taskSvc *TaskService,
) *AgentService { ) *AgentService {
svc := agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher) svc := agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
// 注入排程依赖:将 service 层方法包装为函数闭包,避免循环依赖。 // 注入排程依赖:将 service 层方法包装为函数闭包,避免循环依赖。
if scheduleSvc != nil { if scheduleSvc != nil {

View File

@@ -3,6 +3,7 @@ package agentsvc
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"log" "log"
"strconv" "strconv"
"strings" "strings"
@@ -11,7 +12,6 @@ import (
"github.com/LoveLosita/smartflow/backend/conv" "github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao" "github.com/LoveLosita/smartflow/backend/dao"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model" memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe" memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
@@ -20,13 +20,13 @@ import (
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
"github.com/LoveLosita/smartflow/backend/pkg" "github.com/LoveLosita/smartflow/backend/pkg"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events" eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
"github.com/cloudwego/eino-ext/components/model/ark" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
"github.com/google/uuid" "github.com/google/uuid"
) )
type AgentService struct { type AgentService struct {
AIHub *inits.AIHub llmService *llmservice.Service
repo *dao.AgentDAO repo *dao.AgentDAO
taskRepo *dao.TaskDAO taskRepo *dao.TaskDAO
cacheDAO *dao.CacheDAO cacheDAO *dao.CacheDAO
@@ -75,7 +75,7 @@ type AgentService struct {
// 这里通过依赖注入把“模型、仓储、缓存、异步持久化通道”统一交给服务层管理, // 这里通过依赖注入把“模型、仓储、缓存、异步持久化通道”统一交给服务层管理,
// 便于后续在单测中替换实现,或在启动流程中按环境切换配置。 // 便于后续在单测中替换实现,或在启动流程中按环境切换配置。
func NewAgentService( func NewAgentService(
aiHub *inits.AIHub, llmService *llmservice.Service,
repo *dao.AgentDAO, repo *dao.AgentDAO,
taskRepo *dao.TaskDAO, taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO, cacheDAO *dao.CacheDAO,
@@ -90,7 +90,7 @@ func NewAgentService(
ensureTokenMeterCallbackRegistered() ensureTokenMeterCallbackRegistered()
return &AgentService{ return &AgentService{
AIHub: aiHub, llmService: llmService,
repo: repo, repo: repo,
taskRepo: taskRepo, taskRepo: taskRepo,
cacheDAO: cacheDAO, cacheDAO: cacheDAO,
@@ -123,8 +123,11 @@ func thinkingModeToBool(mode string) bool {
// 当前约定: // 当前约定:
// - 旧链路已全面切到 newAgent graph这里仅作为 runNormalChatFlow 回退时的模型选择入口; // - 旧链路已全面切到 newAgent graph这里仅作为 runNormalChatFlow 回退时的模型选择入口;
// - 统一返回 Pro 模型,旧 strategist 参数不再生效。 // - 统一返回 Pro 模型,旧 strategist 参数不再生效。
func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) { func (s *AgentService) pickChatModel(requestModel string) (*llmservice.Client, string) {
return s.AIHub.Pro, "pro" if s == nil || s.llmService == nil {
return nil, "pro"
}
return s.llmService.ProClient(), "pro"
} }
// PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。 // PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。
@@ -304,7 +307,7 @@ func pushErrNonBlocking(errChan chan error, err error) {
// 2) 开启随口记进度推送后,最终判定“非随口记”时回落到普通聊天。 // 2) 开启随口记进度推送后,最终判定“非随口记”时回落到普通聊天。
func (s *AgentService) runNormalChatFlow( func (s *AgentService) runNormalChatFlow(
ctx context.Context, ctx context.Context,
selectedModel *ark.ChatModel, selectedModel *llmservice.Client,
resolvedModelName string, resolvedModelName string,
userMessage string, userMessage string,
userPersisted bool, userPersisted bool,
@@ -365,6 +368,12 @@ func (s *AgentService) runNormalChatFlow(
} }
} }
// 6.0. 没有可用模型时,直接中止普通聊天,避免写入半截用户消息后没有后续回复。
if selectedModel == nil {
pushErrNonBlocking(errChan, errors.New("llm client is not ready"))
return
}
// 6. 执行真正的流式聊天。 // 6. 执行真正的流式聊天。
// fullText 用于后续写 Redis/持久化outChan 用于把流片段实时推给前端。 // fullText 用于后续写 Redis/持久化outChan 用于把流片段实时推给前端。
fullText, _, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt, userID, chatID) fullText, _, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt, userID, chatID)

View File

@@ -11,10 +11,8 @@ import (
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/respond" "github.com/LoveLosita/smartflow/backend/respond"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events" eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
"github.com/cloudwego/eino-ext/components/model/ark" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
einoModel "github.com/cloudwego/eino/components/model"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
) )
const ( const (
@@ -253,11 +251,11 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
} }
// 2. 标题生成属于结构化短输出,关闭 thinking 并限制 tokens降低延迟与发散。 // 2. 标题生成属于结构化短输出,关闭 thinking 并限制 tokens降低延迟与发散。
resp, err := modelInst.Generate(ctx, messages, resp, err := modelInst.GenerateText(ctx, messages, llmservice.GenerateOptions{
ark.WithThinking(&arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}), Temperature: 0.2,
einoModel.WithTemperature(0.2), MaxTokens: 40,
einoModel.WithMaxTokens(40), Thinking: llmservice.ThinkingModeDisabled,
) })
if err != nil { if err != nil {
return "", 0, err return "", 0, err
} }
@@ -267,26 +265,26 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
// 2.1 标题链路的 token 从模型响应 usage 中提取;缺失则按 0 处理,不影响主流程。 // 2.1 标题链路的 token 从模型响应 usage 中提取;缺失则按 0 处理,不影响主流程。
titleTokens := 0 titleTokens := 0
if resp.ResponseMeta != nil && resp.ResponseMeta.Usage != nil { if resp.Usage != nil {
titleTokens = normalizeUsageTotal( titleTokens = normalizeUsageTotal(
resp.ResponseMeta.Usage.TotalTokens, resp.Usage.TotalTokens,
resp.ResponseMeta.Usage.PromptTokens, resp.Usage.PromptTokens,
resp.ResponseMeta.Usage.CompletionTokens, resp.Usage.CompletionTokens,
) )
} }
return normalizeConversationTitle(resp.Content), titleTokens, nil return normalizeConversationTitle(resp.Text), titleTokens, nil
} }
// pickTitleModel 选择用于标题生成的模型。 // pickTitleModel 选择用于标题生成的模型。
// 优先 Lite成本低、速度快Lite 不可用时回退 Pro。 // 优先 Lite成本低、速度快Lite 不可用时回退 Pro。
func (s *AgentService) pickTitleModel() *ark.ChatModel { func (s *AgentService) pickTitleModel() *llmservice.Client {
if s.AIHub == nil { if s == nil || s.llmService == nil {
return nil return nil
} }
if s.AIHub.Lite != nil { if client := s.llmService.LiteClient(); client != nil {
return s.AIHub.Lite return client
} }
return s.AIHub.Pro return s.llmService.ProClient()
} }
// buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。 // buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。

View File

@@ -8,7 +8,6 @@ import (
"strings" "strings"
"time" "time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv" newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv"
newagentgraph "github.com/LoveLosita/smartflow/backend/newAgent/graph" newagentgraph "github.com/LoveLosita/smartflow/backend/newAgent/graph"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
@@ -57,6 +56,11 @@ func (s *AgentService) runNewAgentGraph(
errChan chan error, errChan chan error,
) { ) {
requestCtx, _ := withRequestTokenMeter(ctx) requestCtx, _ := withRequestTokenMeter(ctx)
if s == nil || s.llmService == nil {
// 0. newAgent 主链强依赖 llm-service装配漏传时直接返回错误避免 nil receiver panic。
pushErrNonBlocking(errChan, errors.New("agent llm service is not initialized"))
return
}
// 1. 规范会话 ID 和模型选择。 // 1. 规范会话 ID 和模型选择。
chatID = normalizeConversationID(chatID) chatID = normalizeConversationID(chatID)
@@ -184,14 +188,15 @@ func (s *AgentService) runNewAgentGraph(
} }
graphRequest.Normalize() graphRequest.Normalize()
// 8. 适配 LLM clients从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client)。 // 8. 适配 LLM clients统一从 llm-service 取出 newAgent 图所需模型,不再直接碰 AIHub)。
// 8.1 Chat/Deliver 使用 Pro 模型:路由分流、闲聊、交付总结属于标准复杂度。 // 8.1 Chat/Deliver 使用 Pro 模型:路由分流、闲聊、交付总结属于标准复杂度。
// 8.2 Plan/Execute 使用 Max 模型:规划和 ReAct 循环需要深度推理能力。 // 8.2 Plan/Execute 使用 Max 模型:规划和 ReAct 循环需要深度推理能力。
chatClient := infrallm.WrapArkClient(s.AIHub.Pro) llmClients := s.llmService.NewAgentModelClients()
planClient := infrallm.WrapArkClient(s.AIHub.Max) chatClient := llmClients.Chat
executeClient := infrallm.WrapArkClient(s.AIHub.Max) planClient := llmClients.Plan
deliverClient := infrallm.WrapArkClient(s.AIHub.Pro) executeClient := llmClients.Execute
summaryClient := infrallm.WrapArkClient(s.AIHub.Lite) deliverClient := llmClients.Deliver
summaryClient := llmClients.Summary
// 9. 适配 SSE emitter。 // 9. 适配 SSE emitter。
sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan) sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
@@ -244,8 +249,8 @@ func (s *AgentService) runNewAgentGraph(
log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr) log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr)
pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr)) pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr))
// Graph 出错时回退普通聊天,保证可用性。回退使用 Pro 模型。 // Graph 出错时回退普通聊天,保证可用性。回退使用 llm-service 的 Pro 模型。
s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan) s.runNormalChatFlow(requestCtx, chatClient, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
return return
} }

View File

@@ -6,20 +6,18 @@ import (
"strings" "strings"
"time" "time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/cloudwego/eino-ext/components/model/ark" llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
"github.com/google/uuid" "github.com/google/uuid"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
) )
// streamChatFallback 是 graph 执行失败时的降级流式聊天。 // streamChatFallback 是 graph 执行失败时的降级流式聊天。
// 内联了旧 agentchat.StreamChat 的核心逻辑,不再依赖 agent/ 包。 // 内联了旧 agentchat.StreamChat 的核心逻辑,不再依赖 agent/ 包。
func (s *AgentService) streamChatFallback( func (s *AgentService) streamChatFallback(
ctx context.Context, ctx context.Context,
llm *ark.ChatModel, llm *llmservice.Client,
modelName string, modelName string,
userInput string, userInput string,
ifThinking bool, ifThinking bool,
@@ -36,13 +34,6 @@ func (s *AgentService) streamChatFallback(
} }
messages = append(messages, schema.UserMessage(userInput)) messages = append(messages, schema.UserMessage(userInput))
var thinking *ark.Thinking
if ifThinking {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
} else {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
}
if strings.TrimSpace(modelName) == "" { if strings.TrimSpace(modelName) == "" {
modelName = "smartflow-worker" modelName = "smartflow-worker"
} }
@@ -50,7 +41,11 @@ func (s *AgentService) streamChatFallback(
created := time.Now().Unix() created := time.Now().Unix()
firstChunk := true firstChunk := true
chunkEmitter := newagentstream.NewChunkEmitter(newagentstream.NewSSEPayloadEmitter(outChan), requestID, modelName, created) chunkEmitter := newagentstream.NewChunkEmitter(newagentstream.NewSSEPayloadEmitter(outChan), requestID, modelName, created)
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(infrallm.WrapArkClient(s.AIHub.Lite))) reasoningSummaryClient := s.llmService.LiteClient()
if reasoningSummaryClient == nil {
reasoningSummaryClient = s.llmService.ProClient()
}
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(reasoningSummaryClient))
chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) { chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) {
s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra) s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra)
}) })
@@ -75,7 +70,14 @@ func (s *AgentService) streamChatFallback(
} }
var reasoningEndAt *time.Time var reasoningEndAt *time.Time
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking)) thinkingMode := llmservice.ThinkingModeDisabled
if ifThinking {
thinkingMode = llmservice.ThinkingModeEnabled
}
reader, err := llm.Stream(ctx, messages, llmservice.GenerateOptions{
Thinking: thinkingMode,
})
if err != nil { if err != nil {
return "", "", 0, nil, err return "", "", 0, nil, err
} }

View File

@@ -6,9 +6,9 @@ import (
"log" "log"
"strings" "strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt" newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
const reasoningSummaryMaxTokens = 700 const reasoningSummaryMaxTokens = 700
@@ -24,7 +24,7 @@ type reasoningSummaryLLMResponse struct {
// 1. service 层负责选择模型与 promptstream 层只负责调度和闸门; // 1. service 层负责选择模型与 promptstream 层只负责调度和闸门;
// 2. 这里不持久化摘要,持久化统一走 ChunkEmitter 的 extra hook // 2. 这里不持久化摘要,持久化统一走 ChunkEmitter 的 extra hook
// 3. 摘要失败时返回 error由 ReasoningDigestor 吞掉并等待下一次水位线/Flush 兜底。 // 3. 摘要失败时返回 error由 ReasoningDigestor 吞掉并等待下一次水位线/Flush 兜底。
func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagentstream.ReasoningSummaryFunc { func (s *AgentService) makeReasoningSummaryFunc(client *llmservice.Client) newagentstream.ReasoningSummaryFunc {
if client == nil { if client == nil {
return nil return nil
} }
@@ -47,14 +47,14 @@ func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagen
DurationSeconds: input.DurationSeconds, DurationSeconds: input.DurationSeconds,
}) })
resp, rawResult, err := infrallm.GenerateJSON[reasoningSummaryLLMResponse]( resp, rawResult, err := llmservice.GenerateJSON[reasoningSummaryLLMResponse](
ctx, ctx,
client, client,
messages, messages,
infrallm.GenerateOptions{ llmservice.GenerateOptions{
Temperature: 0.1, Temperature: 0.1,
MaxTokens: reasoningSummaryMaxTokens, MaxTokens: reasoningSummaryMaxTokens,
Thinking: infrallm.ThinkingModeDisabled, Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{ Metadata: map[string]any{
"stage": "reasoning_summary", "stage": "reasoning_summary",
"candidate_seq": input.CandidateSeq, "candidate_seq": input.CandidateSeq,
@@ -99,7 +99,7 @@ func limitReasoningDetailSummary(text string, maxRunes int) string {
return string(runes[:maxRunes]) return string(runes[:maxRunes])
} }
func truncateReasoningSummaryRaw(raw *infrallm.TextResult) string { func truncateReasoningSummaryRaw(raw *llmservice.TextResult) string {
if raw == nil { if raw == nil {
return "" return ""
} }

View File

@@ -6,16 +6,16 @@ import (
"github.com/LoveLosita/smartflow/backend/conv" "github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao" "github.com/LoveLosita/smartflow/backend/dao"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/respond" "github.com/LoveLosita/smartflow/backend/respond"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
type CourseService struct { type CourseService struct {
// 伸出手:准备接住 DAO // 伸出手:准备接住 DAO
courseDAO *dao.CourseDAO courseDAO *dao.CourseDAO
scheduleDAO *dao.ScheduleDAO scheduleDAO *dao.ScheduleDAO
courseImageResponsesClient *infrallm.ArkResponsesClient courseImageResponsesClient *llmservice.ArkResponsesClient
courseImageConfig CourseImageParseConfig courseImageConfig CourseImageParseConfig
courseImageModel string courseImageModel string
} }
@@ -24,7 +24,7 @@ type CourseService struct {
func NewCourseService( func NewCourseService(
courseDAO *dao.CourseDAO, courseDAO *dao.CourseDAO,
scheduleDAO *dao.ScheduleDAO, scheduleDAO *dao.ScheduleDAO,
courseImageResponsesClient *infrallm.ArkResponsesClient, courseImageResponsesClient *llmservice.ArkResponsesClient,
courseImageConfig CourseImageParseConfig, courseImageConfig CourseImageParseConfig,
courseImageModel string, courseImageModel string,
) *CourseService { ) *CourseService {

View File

@@ -8,16 +8,20 @@ import (
"strings" "strings"
"time" "time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
"github.com/LoveLosita/smartflow/backend/model" "github.com/LoveLosita/smartflow/backend/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
) )
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。 // ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) { func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
if ss == nil || ss.courseImageResponsesClient == nil { if ss == nil || ss.courseImageResponsesClient == nil {
modelName := ""
if ss != nil {
modelName = ss.courseImageModel
}
log.Printf( log.Printf(
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d", "[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
ss.courseImageModel, modelName,
req.Filename, req.Filename,
req.MIMEType, req.MIMEType,
len(req.ImageBytes), len(req.ImageBytes),
@@ -57,7 +61,7 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
base64Chars, base64Chars,
promptChars, promptChars,
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)), base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
infrallm.ThinkingModeDisabled, llmservice.ThinkingModeDisabled,
courseImageParseTemperature, courseImageParseTemperature,
ss.courseImageConfig.MaxTokens, ss.courseImageConfig.MaxTokens,
"json_object", "json_object",
@@ -66,10 +70,10 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens避免被默认值截断。 // 1. 课程表图片识别输出体量大,显式透传 max_output_tokens避免被默认值截断。
// 2. text_format 固定为 json_object降低输出混入解释文本导致解析失败的概率。 // 2. text_format 固定为 json_object降低输出混入解释文本导致解析失败的概率。
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。 // 3. thinking 显式关闭,优先保证课程导入链路稳定性。
draft, rawResult, err := infrallm.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, infrallm.ArkResponsesOptions{ draft, rawResult, err := llmservice.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, llmservice.ArkResponsesOptions{
Temperature: courseImageParseTemperature, Temperature: courseImageParseTemperature,
MaxOutputTokens: ss.courseImageConfig.MaxTokens, MaxOutputTokens: ss.courseImageConfig.MaxTokens,
Thinking: infrallm.ThinkingModeDisabled, Thinking: llmservice.ThinkingModeDisabled,
TextFormat: "json_object", TextFormat: "json_object",
}) })
if err != nil { if err != nil {
@@ -188,12 +192,12 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
return normalizedDraft, nil return normalizedDraft, nil
} }
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]infrallm.ArkResponsesMessage, int, int) { func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]llmservice.ArkResponsesMessage, int, int) {
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType) userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes) base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data) imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
messages := []infrallm.ArkResponsesMessage{ messages := []llmservice.ArkResponsesMessage{
{ {
Role: "system", Role: "system",
Text: strings.TrimSpace(courseImageParseSystemPrompt), Text: strings.TrimSpace(courseImageParseSystemPrompt),
@@ -208,7 +212,7 @@ func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest)
return messages, len(base64Data), len(strings.TrimSpace(userPrompt)) return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
} }
func isCourseImageOutputTruncated(rawResult *infrallm.ArkResponsesResult) bool { func isCourseImageOutputTruncated(rawResult *llmservice.ArkResponsesResult) bool {
if rawResult == nil { if rawResult == nil {
return false return false
} }

View File

@@ -1,7 +1,3 @@
// 过渡期统一 Ark 调用封装。
//
// 这里保留 CallArkText / CallArkJSON方便暂时还直接持有 *ark.ChatModel 的调用点
// 逐步迁移到统一 Client。后续 memory 也可以直接复用这套中立层。
package llm package llm
import ( import (
@@ -15,12 +11,7 @@ import (
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model" arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
) )
// ArkCallOptions 是基于 ark.ChatModel 的通用调用选项 // ArkCallOptions 是直接调用 ark.ChatModel 时使用的通用入参
//
// 设计目的:
// 1. 先把 Ark 调用样板抽成公共层;
// 2. 再由 WrapArkClient 提供统一 Client
// 3. 让上层尽量只关注业务 prompt 和结构化结果。
type ArkCallOptions struct { type ArkCallOptions struct {
Temperature float64 Temperature float64
MaxTokens int MaxTokens int
@@ -28,12 +19,6 @@ type ArkCallOptions struct {
} }
// CallArkText 调用 ark 模型并返回纯文本。 // CallArkText 调用 ark 模型并返回纯文本。
//
// 职责边界:
// 1. 负责拼 system + user 两段消息;
// 2. 负责统一配置 thinking / temperature / maxTokens
// 3. 负责拦截空响应;
// 4. 不负责 JSON 解析,不负责业务字段校验。
func CallArkText(ctx context.Context, chatModel *ark.ChatModel, systemPrompt, userPrompt string, options ArkCallOptions) (string, error) { func CallArkText(ctx context.Context, chatModel *ark.ChatModel, systemPrompt, userPrompt string, options ArkCallOptions) (string, error) {
if chatModel == nil { if chatModel == nil {
return "", errors.New("ark model is nil") return "", errors.New("ark model is nil")
@@ -76,6 +61,7 @@ func buildArkOptions(options ArkCallOptions) []einoModel.Option {
if options.Thinking == ThinkingModeEnabled { if options.Thinking == ThinkingModeEnabled {
thinkingType = arkModel.ThinkingTypeEnabled thinkingType = arkModel.ThinkingTypeEnabled
} }
opts := []einoModel.Option{ opts := []einoModel.Option{
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}), ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
einoModel.WithTemperature(float32(options.Temperature)), einoModel.WithTemperature(float32(options.Temperature)),

View File

@@ -12,17 +12,14 @@ import (
) )
// WrapArkClient 将 ark.ChatModel 适配为统一 Client。 // WrapArkClient 将 ark.ChatModel 适配为统一 Client。
// // 1. generateText 走 Generate供 GenerateJSON/GenerateText 使用。
// 职责边界: // 2. streamText 走 Stream供需要流式输出的场景使用。
// 1. generateText调用 ark.ChatModel.Generate非流式供 GenerateJSON 使用; // 3. 两条路径共用同一套参数转换逻辑。
// 2. streamText调用 ark.ChatModel.Stream流式供需要流式输出的场景使用
// 3. 两者共用同一套 options 转换。
func WrapArkClient(arkChatModel *ark.ChatModel) *Client { func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
if arkChatModel == nil { if arkChatModel == nil {
return nil return nil
} }
// 非流式文本生成,供 GenerateJSON / GenerateText 调用路径使用。
generateFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) { generateFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
arkOpts := buildArkStreamOptions(options) arkOpts := buildArkStreamOptions(options)
msg, err := arkChatModel.Generate(ctx, messages, arkOpts...) msg, err := arkChatModel.Generate(ctx, messages, arkOpts...)
@@ -47,7 +44,6 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
}, nil }, nil
} }
// 流式文本生成。
streamFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) { streamFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
arkOpts := buildArkStreamOptions(options) arkOpts := buildArkStreamOptions(options)
reader, err := arkChatModel.Stream(ctx, messages, arkOpts...) reader, err := arkChatModel.Stream(ctx, messages, arkOpts...)
@@ -60,11 +56,10 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
return NewClient(generateFunc, streamFunc) return NewClient(generateFunc, streamFunc)
} }
// buildArkStreamOptions 将统一 GenerateOptions 转换为 ark 的流式调用选项 // buildArkStreamOptions 将统一 GenerateOptions 转换为 ark 的流式调用参数
func buildArkStreamOptions(options GenerateOptions) []einoModel.Option { func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
thinkingEnabled := options.Thinking == ThinkingModeEnabled thinkingEnabled := options.Thinking == ThinkingModeEnabled
// Thinking
thinkingType := arkModel.ThinkingTypeDisabled thinkingType := arkModel.ThinkingTypeDisabled
if thinkingEnabled { if thinkingEnabled {
thinkingType = arkModel.ThinkingTypeEnabled thinkingType = arkModel.ThinkingTypeEnabled
@@ -73,16 +68,12 @@ func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}), ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
} }
// Temperaturethinking 模型强制要求 temperature=1否则 API 静默忽略 thinking。
if thinkingEnabled { if thinkingEnabled {
opts = append(opts, einoModel.WithTemperature(1.0)) opts = append(opts, einoModel.WithTemperature(1.0))
} else if options.Temperature > 0 { } else if options.Temperature > 0 {
opts = append(opts, einoModel.WithTemperature(float32(options.Temperature))) opts = append(opts, einoModel.WithTemperature(float32(options.Temperature)))
} }
// MaxTokensthinking 模式下 thinking token 占用 max_tokens 预算,
// 调用方设定的值仅代表"期望输出长度",实际预算需留出思考空间。
// 最低保障 16000避免思考链被截断导致输出为空或非 JSON。
maxTokens := options.MaxTokens maxTokens := options.MaxTokens
if thinkingEnabled { if thinkingEnabled {
const minThinkingBudget = 16000 const minThinkingBudget = 16000
@@ -97,14 +88,12 @@ func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
return opts return opts
} }
// arkStreamReaderAdapter 适配 ark.ChatModel.Stream 返回的 reader。 // arkStreamReaderAdapter ark 的流式 reader 转成统一的 StreamReader 接口
// ark.Stream 返回 schema.StreamReader[*schema.Message],其 Close() 方法无返回值
// 而我们的 StreamReader 接口要求 Close() error
type arkStreamReaderAdapter struct { type arkStreamReaderAdapter struct {
reader *schema.StreamReader[*schema.Message] reader *schema.StreamReader[*schema.Message]
} }
// Recv 转发到 ark reader 的 Recv 方法 // Recv 转发到底层 reader。
func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) { func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
if r == nil || r.reader == nil { if r == nil || r.reader == nil {
return nil, io.EOF return nil, io.EOF
@@ -112,8 +101,7 @@ func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
return r.reader.Recv() return r.reader.Recv()
} }
// Close 转发到 ark reader 的 Close 方法 // Close 适配 ark reader 的 Close 行为
// ark 的 Close() 无返回值,我们适配为返回 nil
func (r *arkStreamReaderAdapter) Close() error { func (r *arkStreamReaderAdapter) Close() error {
if r == nil || r.reader == nil { if r == nil || r.reader == nil {
return nil return nil

View File

@@ -11,11 +11,6 @@ import (
) )
// ArkResponsesMessage 描述一次 Responses 输入消息。 // ArkResponsesMessage 描述一次 Responses 输入消息。
//
// 职责边界:
// 1. 负责表达角色与多模态内容(文本/图片);
// 2. 不负责业务 prompt 生成;
// 3. 不负责输出 JSON 的字段校验。
type ArkResponsesMessage struct { type ArkResponsesMessage struct {
Role string Role string
Text string Text string
@@ -23,7 +18,7 @@ type ArkResponsesMessage struct {
ImageDetail string ImageDetail string
} }
// ArkResponsesOptions 描述 Responses 生成选项 // ArkResponsesOptions 描述 Responses 调用参数
type ArkResponsesOptions struct { type ArkResponsesOptions struct {
Model string Model string
Temperature float64 Temperature float64
@@ -32,14 +27,14 @@ type ArkResponsesOptions struct {
TextFormat string TextFormat string
} }
// ArkResponsesUsage 统一透传 token 使用量 // ArkResponsesUsage 统一转写 token usage
type ArkResponsesUsage struct { type ArkResponsesUsage struct {
InputTokens int64 InputTokens int64
OutputTokens int64 OutputTokens int64
TotalTokens int64 TotalTokens int64
} }
// ArkResponsesResult 是 Ark Responses 的统一输出结构。 // ArkResponsesResult 是 Responses 调用的统一输出结构。
type ArkResponsesResult struct { type ArkResponsesResult struct {
Text string Text string
Status string Status string
@@ -56,11 +51,9 @@ type ArkResponsesClient struct {
} }
// NewArkResponsesClient 创建 Ark SDK Responses 客户端。 // NewArkResponsesClient 创建 Ark SDK Responses 客户端。
// // 1. model 为空时直接返回 nil表示这条能力没有启用。
// 说明: // 2. baseURL 为空时使用 SDK 默认地址。
// 1. model 为空时返回 nil表示当前能力未启用 // 3. 这里只负责本地构造,不做连通性探测。
// 2. baseURL 为空时使用 SDK 默认地址;
// 3. 仅负责客户端创建,不做连通性探测。
func NewArkResponsesClient(apiKey string, baseURL string, model string) *ArkResponsesClient { func NewArkResponsesClient(apiKey string, baseURL string, model string) *ArkResponsesClient {
model = strings.TrimSpace(model) model = strings.TrimSpace(model)
if model == "" { if model == "" {
@@ -104,7 +97,7 @@ func (c *ArkResponsesClient) GenerateText(ctx context.Context, messages []ArkRes
return result, nil return result, nil
} }
// GenerateArkResponsesJSON 先调用 Responses再解析 JSON 结构体。 // GenerateArkResponsesJSON 先调用 Responses再解析 JSON 结构体。
func GenerateArkResponsesJSON[T any](ctx context.Context, client *ArkResponsesClient, messages []ArkResponsesMessage, options ArkResponsesOptions) (*T, *ArkResponsesResult, error) { func GenerateArkResponsesJSON[T any](ctx context.Context, client *ArkResponsesClient, messages []ArkResponsesMessage, options ArkResponsesOptions) (*T, *ArkResponsesResult, error) {
if client == nil { if client == nil {
return nil, nil, errors.New("ark responses client is not ready") return nil, nil, errors.New("ark responses client is not ready")

View File

@@ -9,12 +9,7 @@ import (
"github.com/cloudwego/eino/schema" "github.com/cloudwego/eino/schema"
) )
// ThinkingMode 描述次模型调用对 thinking 的期望。 // ThinkingMode 描述次模型调用对 thinking 的期望。
//
// 职责边界:
// 1. 这里只表达“调用方希望怎样配置推理模式”;
// 2. 不直接绑定某个具体模型厂商的参数枚举;
// 3. 真正如何把它翻译成 ark / OpenAI / 其他 provider 的 option由后续适配层负责。
type ThinkingMode string type ThinkingMode string
const ( const (
@@ -23,12 +18,7 @@ const (
ThinkingModeDisabled ThinkingMode = "disabled" ThinkingModeDisabled ThinkingMode = "disabled"
) )
// GenerateOptions 统一模型调用选项 // GenerateOptions 统一收敛文本调用时最常见的公共参数
//
// 设计目的:
// 1. 先把“每个 skill / worker 都会反复传的参数”收敛成一份结构;
// 2. 让上层以后只表达“我要什么”,不再自己重复组织 option
// 3. 暂时不追求覆盖所有 provider 参数,先把最常用的几个公共位抽出来。
type GenerateOptions struct { type GenerateOptions struct {
Temperature float64 Temperature float64
MaxTokens int MaxTokens int
@@ -36,40 +26,32 @@ type GenerateOptions struct {
Metadata map[string]any Metadata map[string]any
} }
// TextResult 是统一文本生成结果 // TextResult 保存一次文本生成的最终结果和 usage
// // 1. Text 存放模型返回的纯文本。
// 职责边界: // 2. Usage 方便上层做统一统计。
// 1. Text 保存模型最终返回的纯文本; // 3. 这里不负责 JSON 解析,也不负责业务字段映射。
// 2. Usage 保存本次调用的 token 使用量,供后续统一统计;
// 3. 不负责 JSON 解析,不负责业务字段映射。
type TextResult struct { type TextResult struct {
Text string Text string
Usage *schema.TokenUsage Usage *schema.TokenUsage
// FinishReason 透传 provider 的停止原因,便于上层判断是否因 length 等原因被截断。
FinishReason string FinishReason string
} }
// StreamReader 抽象了“可逐块 Recv 的流式返回器 // StreamReader 抽象可以逐块读取消息的流式返回器。
//
// 之所以不直接依赖某个具体 SDK 的 reader 类型,是因为现在还处在骨架收敛阶段,
// 后续接 ark、OpenAI 兼容层还是别的 provider都可以往这个最小接口上适配。
type StreamReader interface { type StreamReader interface {
Recv() (*schema.Message, error) Recv() (*schema.Message, error)
Close() error Close() error
} }
// TextGenerateFunc 是文本生成的统一适配函数签名。 // TextGenerateFunc 定义统一文本生成函数签名。
type TextGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) type TextGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error)
// StreamGenerateFunc 是流式生成的统一适配函数签名。 // StreamGenerateFunc 定义统一流式生成函数签名。
type StreamGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) type StreamGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error)
// Client 是统一模型客户端门面。 // Client 是统一模型客户端门面。
// // 1. 只做最小输入校验和空响应防御。
// 职责边界: // 2. 不负责 prompt 拼装,也不负责业务 fallback。
// 1. 负责把调用方的“模型调用意图”收敛到统一入口; // 3. 具体 provider 的细节由上层适配器收敛进来。
// 2. 负责统一参数校验、空响应防御、GenerateJSON 复用;
// 3. 不负责写 prompt不负责业务 fallback也不直接持有具体厂商 SDK 细节。
type Client struct { type Client struct {
generateText TextGenerateFunc generateText TextGenerateFunc
streamText StreamGenerateFunc streamText StreamGenerateFunc
@@ -84,11 +66,6 @@ func NewClient(generateText TextGenerateFunc, streamText StreamGenerateFunc) *Cl
} }
// GenerateText 执行一次统一文本生成。 // GenerateText 执行一次统一文本生成。
//
// 职责边界:
// 1. 负责做最小必要的入参校验;
// 2. 负责统一拦截“模型空响应”这类公共问题;
// 3. 不负责业务 prompt 拼接,也不负责把文本再映射成业务结构。
func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) { func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
if c == nil || c.generateText == nil { if c == nil || c.generateText == nil {
return nil, errors.New("llm client is not ready") return nil, errors.New("llm client is not ready")
@@ -111,11 +88,6 @@ func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, o
} }
// GenerateJSON 先走统一文本生成,再走统一 JSON 解析。 // GenerateJSON 先走统一文本生成,再走统一 JSON 解析。
//
// 设计说明:
// 1. 把“Generate -> 提取 JSON -> 反序列化”这段公共链路收敛起来;
// 2. 上层只关心业务结构,不需要重复实现解析样板;
// 3. 返回 parsed + rawResult方便打点与回退时保留原文。
func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema.Message, options GenerateOptions) (*T, *TextResult, error) { func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema.Message, options GenerateOptions) (*T, *TextResult, error) {
result, err := client.GenerateText(ctx, messages, options) result, err := client.GenerateText(ctx, messages, options)
if err != nil { if err != nil {
@@ -130,11 +102,6 @@ func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema
} }
// Stream 打开统一流式调用入口。 // Stream 打开统一流式调用入口。
//
// 职责边界:
// 1. 只负责把“流式生成能力”暴露给上层;
// 2. 不负责 chunk 到 OpenAI 协议的转换,那部分应放在 stream/
// 3. 不负责累计全文,也不负责 token 统计落库。
func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) { func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
if c == nil || c.streamText == nil { if c == nil || c.streamText == nil {
return nil, errors.New("llm stream client is not ready") return nil, errors.New("llm stream client is not ready")
@@ -145,12 +112,7 @@ func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options
return c.streamText(ctx, messages, options) return c.streamText(ctx, messages, options)
} }
// BuildSystemUserMessages 构造最常见的system + history + user消息列表。 // BuildSystemUserMessages 构造最常见的 system + history + user 消息列表。
//
// 设计说明:
// 1. 先把最稳定的消息编排方式沉淀下来,减少各业务域样板代码;
// 2. 只做消息切片装配,不做 prompt 生成;
// 3. 供 agent / memory 等多个能力域复用。
func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, userPrompt string) []*schema.Message { func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, userPrompt string) []*schema.Message {
messages := make([]*schema.Message, 0, len(history)+2) messages := make([]*schema.Message, 0, len(history)+2)
if strings.TrimSpace(systemPrompt) != "" { if strings.TrimSpace(systemPrompt) != "" {
@@ -165,7 +127,7 @@ func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, use
return messages return messages
} }
// CloneUsage 深拷贝 token usage避免后续多处累加时共享同一指针。 // CloneUsage 深拷贝 token usage避免后续累加时共享同一指针。
func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage { func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
if usage == nil { if usage == nil {
return nil return nil
@@ -174,12 +136,7 @@ func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
return &copied return &copied
} }
// MergeUsage 合并两段 usage。 // MergeUsage 合并两段 usage,取各字段更大的值作为累计结果
//
// 合并策略:
// 1. 对“同一次调用不同流分片”的场景,取更大值作为最终值;
// 2. 对“多次独立调用累计”的场景,应由上层显式做加法,而不是用这个函数;
// 3. 该函数只适用于“同一次调用的分块 usage 收敛”。
func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.TokenUsage { func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.TokenUsage {
if incoming == nil { if incoming == nil {
return CloneUsage(base) return CloneUsage(base)
@@ -207,7 +164,7 @@ func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.To
return &merged return &merged
} }
// FormatEmptyResponseError 统一生成“模型返回空结果的错误文案。 // FormatEmptyResponseError 统一模型空结果的错误文案。
func FormatEmptyResponseError(scene string) error { func FormatEmptyResponseError(scene string) error {
scene = strings.TrimSpace(scene) scene = strings.TrimSpace(scene)
if scene == "" { if scene == "" {

View File

@@ -7,12 +7,10 @@ import (
"strings" "strings"
) )
// ParseJSONObject 解析模型返回中的 JSON 对象。 // ParseJSONObject 解析模型返回内容中的 JSON 对象。
// // 1. 先剥离常见的 markdown 代码块包装。
// 职责边界: // 2. 再从混合文本里提取最外层 JSON 对象。
// 1. 负责处理“模型输出前后夹杂解释文字 / markdown 代码块”的常见情况; // 3. 这里只负责结构解析,不负责字段合法性校验。
// 2. 负责提取最外层 JSON object 并反序列化为目标结构;
// 3. 不负责业务字段合法性校验,应由上层调用方自行校验。
func ParseJSONObject[T any](raw string) (*T, error) { func ParseJSONObject[T any](raw string) (*T, error) {
clean := strings.TrimSpace(raw) clean := strings.TrimSpace(raw)
if clean == "" { if clean == "" {
@@ -31,12 +29,7 @@ func ParseJSONObject[T any](raw string) (*T, error) {
return &out, nil return &out, nil
} }
// ExtractJSONObject 从混合文本提取第一个完整 JSON 对象。 // ExtractJSONObject 从混合文本提取第一个完整 JSON 对象。
//
// 设计说明:
// 1. LLM 很容易输出“这里是结果:{...}”这种半结构化文本;
// 2. 这里用括号计数而不是正则,避免嵌套对象一多就误截断;
// 3. 目前只提取 object不提取 array因为当前契约基本都是对象。
func ExtractJSONObject(text string) string { func ExtractJSONObject(text string) string {
clean := trimMarkdownCodeFence(strings.TrimSpace(text)) clean := trimMarkdownCodeFence(strings.TrimSpace(text))
if clean == "" { if clean == "" {
@@ -94,9 +87,6 @@ func trimMarkdownCodeFence(text string) string {
return trimmed return trimmed
} }
// 1. 去掉首行 ```json / ```
// 2. 若末行是 ```,一并去掉;
// 3. 中间正文保持原样,避免破坏 JSON 的换行结构。
body := lines[1:] body := lines[1:]
if len(body) > 0 && strings.TrimSpace(body[len(body)-1]) == "```" { if len(body) > 0 && strings.TrimSpace(body[len(body)-1]) == "```" {
body = body[:len(body)-1] body = body[:len(body)-1]

View File

@@ -0,0 +1,109 @@
package llm
import (
"strings"
"github.com/LoveLosita/smartflow/backend/inits"
)
// Service 只负责统一暴露已经构造好的模型客户端,不负责 prompt 和业务编排。
type Service struct {
liteClient *Client
proClient *Client
maxClient *Client
courseImageResponsesClient *ArkResponsesClient
}
// Options 描述 llm-service 初始化时需要接管的启动期依赖。
// 1. AIHub 仍然是当前进程内 Ark ChatModel 的来源,但服务层只保存统一 Client。
// 2. CourseImageResponsesClient 允许外部预先注入,便于测试或特殊启动路径复用。
// 3. 某个字段为空时不报错,直接保留 nil交给上层继续走兼容降级。
type Options struct {
AIHub *inits.AIHub
APIKey string
BaseURL string
CourseVisionModel string
CourseImageResponsesClient *ArkResponsesClient
}
// AgentModelClients 一次性暴露 newAgent 图常用的模型分配结果。
type AgentModelClients struct {
Chat *Client
Plan *Client
Execute *Client
Deliver *Client
Summary *Client
}
// New 构造 llm-service。
// 1. 不返回 error是为了让上层继续按 nil 客户端做逐步降级。
// 2. 只要 AIHub 已初始化,就把其中的 ChatModel 收敛成统一 Client。
// 3. 课程图片解析客户端在这里统一构建,避免业务层直接依赖 Responses SDK。
func New(opts Options) *Service {
svc := &Service{}
if opts.AIHub != nil {
svc.liteClient = WrapArkClient(opts.AIHub.Lite)
svc.proClient = WrapArkClient(opts.AIHub.Pro)
svc.maxClient = WrapArkClient(opts.AIHub.Max)
}
if opts.CourseImageResponsesClient != nil {
svc.courseImageResponsesClient = opts.CourseImageResponsesClient
} else {
apiKey := strings.TrimSpace(opts.APIKey)
baseURL := strings.TrimSpace(opts.BaseURL)
model := strings.TrimSpace(opts.CourseVisionModel)
if apiKey != "" && model != "" {
svc.courseImageResponsesClient = NewArkResponsesClient(apiKey, baseURL, model)
}
}
return svc
}
// LiteClient 返回低成本短输出模型客户端。
func (s *Service) LiteClient() *Client {
if s == nil {
return nil
}
return s.liteClient
}
// ProClient 返回默认复杂对话模型客户端。
func (s *Service) ProClient() *Client {
if s == nil {
return nil
}
return s.proClient
}
// MaxClient 返回深度推理模型客户端。
func (s *Service) MaxClient() *Client {
if s == nil {
return nil
}
return s.maxClient
}
// CourseImageResponsesClient 返回课程图片解析所用的 Responses 客户端。
func (s *Service) CourseImageResponsesClient() *ArkResponsesClient {
if s == nil {
return nil
}
return s.courseImageResponsesClient
}
// NewAgentModelClients 一次性返回 newAgent 图里常用的模型分配。
func (s *Service) NewAgentModelClients() AgentModelClients {
if s == nil {
return AgentModelClients{}
}
return AgentModelClients{
Chat: s.ProClient(),
Plan: s.MaxClient(),
Execute: s.MaxClient(),
Deliver: s.ProClient(),
Summary: s.LiteClient(),
}
}

View File

@@ -5,7 +5,7 @@ import (
"time" "time"
) )
// Runtime 是 RAG Infra 对业务侧暴露的唯一稳定方法面。 // Runtime 是 RAG service 对业务侧暴露的唯一稳定方法面。
// //
// 职责边界: // 职责边界:
// 1. 负责承接 memory/web 两类语料的统一入库与检索入口; // 1. 负责承接 memory/web 两类语料的统一入库与检索入口;

View File

@@ -5,7 +5,7 @@ import (
"fmt" "fmt"
"strings" "strings"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// TextChunker 是默认文本切块器。 // TextChunker 是默认文本切块器。

View File

@@ -21,7 +21,7 @@ const (
// ObserveEvent 描述一次统一观测事件。 // ObserveEvent 描述一次统一观测事件。
// //
// 职责边界: // 职责边界:
// 1. 只承载 RAG Infra 的结构化运行信息; // 1. 只承载 RAG service 的结构化运行信息;
// 2. 不绑定具体日志系统、指标系统或 tracing 实现; // 2. 不绑定具体日志系统、指标系统或 tracing 实现;
// 3. 字段内容应尽量稳定,便于后续统一接入全局观测平台。 // 3. 字段内容应尽量稳定,便于后续统一接入全局观测平台。
type ObserveEvent struct { type ObserveEvent struct {
@@ -31,7 +31,7 @@ type ObserveEvent struct {
Fields map[string]any Fields map[string]any
} }
// Observer 是 RAG Infra 的最小观测接口。 // Observer 是 RAG service 的最小观测接口。
// //
// 职责边界: // 职责边界:
// 1. 负责消费结构化事件; // 1. 负责消费结构化事件;

View File

@@ -7,7 +7,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
const memoryCorpusName = "memory" const memoryCorpusName = "memory"

View File

@@ -7,7 +7,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
const webCorpusName = "web" const webCorpusName = "web"

View File

@@ -7,12 +7,12 @@ import (
"os" "os"
"strings" "strings"
ragchunk "github.com/LoveLosita/smartflow/backend/infra/rag/chunk" ragchunk "github.com/LoveLosita/smartflow/backend/services/rag/chunk"
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config" ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
ragembed "github.com/LoveLosita/smartflow/backend/infra/rag/embed" ragembed "github.com/LoveLosita/smartflow/backend/services/rag/embed"
ragrerank "github.com/LoveLosita/smartflow/backend/infra/rag/rerank" ragrerank "github.com/LoveLosita/smartflow/backend/services/rag/rerank"
ragstore "github.com/LoveLosita/smartflow/backend/infra/rag/store" ragstore "github.com/LoveLosita/smartflow/backend/services/rag/store"
) )
// FactoryDeps 描述 Runtime 工厂所需的可选依赖。 // FactoryDeps 描述 Runtime 工厂所需的可选依赖。

View File

@@ -3,7 +3,7 @@ package rag
import ( import (
"log" "log"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// ObserveLevel 对外暴露统一观测等级别名,避免启动层直接依赖 core 细节。 // ObserveLevel 对外暴露统一观测等级别名,避免启动层直接依赖 core 细节。

View File

@@ -1,11 +1,11 @@
package rag package rag
import ( import (
"github.com/LoveLosita/smartflow/backend/infra/rag/chunk" "github.com/LoveLosita/smartflow/backend/services/rag/chunk"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
"github.com/LoveLosita/smartflow/backend/infra/rag/embed" "github.com/LoveLosita/smartflow/backend/services/rag/embed"
"github.com/LoveLosita/smartflow/backend/infra/rag/rerank" "github.com/LoveLosita/smartflow/backend/services/rag/rerank"
"github.com/LoveLosita/smartflow/backend/infra/rag/store" "github.com/LoveLosita/smartflow/backend/services/rag/store"
) )
// NewDefaultPipeline 构造默认可运行的 RAG Pipeline。 // NewDefaultPipeline 构造默认可运行的 RAG Pipeline。

View File

@@ -4,7 +4,7 @@ import (
"context" "context"
"errors" "errors"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// EinoReranker 是 Eino 重排器占位实现。 // EinoReranker 是 Eino 重排器占位实现。

View File

@@ -4,7 +4,7 @@ import (
"context" "context"
"sort" "sort"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// NoopReranker 是默认重排器(仅按原 score 排序)。 // NoopReranker 是默认重排器(仅按原 score 排序)。

View File

@@ -5,7 +5,7 @@ import (
"fmt" "fmt"
"strings" "strings"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// VectorRetriever 是通用检索器embed + vector search // VectorRetriever 是通用检索器embed + vector search

View File

@@ -7,9 +7,9 @@ import (
"strings" "strings"
"time" "time"
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config" ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
"github.com/LoveLosita/smartflow/backend/infra/rag/corpus" "github.com/LoveLosita/smartflow/backend/services/rag/corpus"
) )
type runtime struct { type runtime struct {
@@ -343,7 +343,7 @@ func (r *runtime) recoverPublicPanic(
return return
} }
// 1. runtime 是 RAG Infra 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。 // 1. runtime 是 RAG service 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。
// 2. 这里统一把 panic 转成 error并补一条结构化观测方便继续排查是哪一层依赖失控。 // 2. 这里统一把 panic 转成 error并补一条结构化观测方便继续排查是哪一层依赖失控。
// 3. 保留 stack 是为了在“进程不崩”的前提下仍能定位根因,避免只剩一句 recovered 无法复盘。 // 3. 保留 stack 是为了在“进程不崩”的前提下仍能定位根因,避免只剩一句 recovered 无法复盘。
panicErr := fmt.Errorf("rag runtime panic recovered: corpus=%s operation=%s panic=%v", corpusName, operation, recovered) panicErr := fmt.Errorf("rag runtime panic recovered: corpus=%s operation=%s panic=%v", corpusName, operation, recovered)

View File

@@ -0,0 +1,111 @@
package rag
import (
"context"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
)
// Options 描述 rag-service 需要持有的底层运行时。
type Options struct {
Runtime Runtime
}
// Service 是 rag-service 对外暴露的统一入口。
//
// 职责边界:
// 1. 负责持有运行时,并把 memory / web 两条能力线统一收口到服务层。
// 2. 负责在服务入口内完成基于配置的运行时装配。
// 3. 不直接承载 chunk / embed / store 的实现细节,这些细节下沉到服务树内部子包。
type Service struct {
runtime Runtime
}
// New 使用调用方传入的运行时构造服务。
func New(opts Options) *Service {
return &Service{runtime: opts.Runtime}
}
// NewFromConfig 基于服务树内的配置与工厂能力构造自给自足的 RAG 服务。
func NewFromConfig(ctx context.Context, cfg ragconfig.Config, deps FactoryDeps) (*Service, error) {
if !cfg.Enabled {
return New(Options{}), nil
}
runtime, err := NewRuntimeFromConfig(ctx, cfg, deps)
if err != nil {
return nil, err
}
return NewWithRuntime(runtime), nil
}
// Runtime 返回当前服务持有的运行时。
func (s *Service) Runtime() Runtime {
if s == nil {
return nil
}
return s.runtime
}
// IngestMemory 写入记忆语料。
func (s *Service) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.IngestMemory(ctx, req)
}
// RetrieveMemory 检索记忆语料。
func (s *Service) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.RetrieveMemory(ctx, req)
}
// DeleteMemory 删除指定记忆文档。
func (s *Service) DeleteMemory(ctx context.Context, documentIDs []string) error {
if s == nil || s.runtime == nil {
return nil
}
if ctx == nil {
ctx = context.Background()
}
return s.runtime.DeleteMemory(ctx, documentIDs)
}
// IngestWeb 写入网页语料。
func (s *Service) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.IngestWeb(ctx, req)
}
// RetrieveWeb 检索网页语料。
func (s *Service) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.RetrieveWeb(ctx, req)
}
// EnsureRuntime 返回一个可继续向下传递的运行时引用。
func (s *Service) EnsureRuntime() Runtime {
if s == nil {
return nil
}
return s.runtime
}
// SetRuntime 允许在装配阶段延迟注入运行时。
func (s *Service) SetRuntime(runtime Runtime) {
if s == nil {
return
}
s.runtime = runtime
}
// NewWithRuntime 用显式运行时构造服务。
func NewWithRuntime(runtime Runtime) *Service {
return New(Options{Runtime: runtime})
}

View File

@@ -10,7 +10,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// InMemoryVectorStore 是本地开发用向量存储实现。 // InMemoryVectorStore 是本地开发用向量存储实现。

View File

@@ -14,7 +14,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core" "github.com/LoveLosita/smartflow/backend/services/rag/core"
) )
// MilvusConfig 描述 Milvus REST 存储配置。 // MilvusConfig 描述 Milvus REST 存储配置。

View File

@@ -1,6 +1,6 @@
package store package store
import "github.com/LoveLosita/smartflow/backend/infra/rag/core" import "github.com/LoveLosita/smartflow/backend/services/rag/core"
// EnsureCompile 用于静态校验实现是否满足接口。 // EnsureCompile 用于静态校验实现是否满足接口。
func EnsureCompile() { func EnsureCompile() {

View File

@@ -36,6 +36,8 @@
4. 消费侧已经按服务 consumer group 隔离,不再用一个 worker 吃全部事件。 4. 消费侧已经按服务 consumer group 隔离,不再用一个 worker 吃全部事件。
5. 当前仍是单体进程内多 worker 装配worker 后续会跟随对应服务一起迁出,不在阶段 1 直接拆进程。 5. 当前仍是单体进程内多 worker 装配worker 后续会跟随对应服务一起迁出,不在阶段 1 直接拆进程。
阶段 1.5 / 1.6 也已经先落地完毕:`backend/services/llm``backend/services/rag` 已经成为当前 canonical 入口,`backend/infra/llm``backend/infra/rag``.go` 旧实现已删除,仅保留迁移说明文档。当前仍然是单体进程内多 worker 装配llm / rag 先完成服务化收口,还没有进入 gozero 进程拆分。
所以后续路线不是再补一次 outbox 基建,而是在这个阶段 1 基线上,按服务边界逐个把 gozero 服务、DAO / model / worker 和启动入口迁出去。 所以后续路线不是再补一次 outbox 基建,而是在这个阶段 1 基线上,按服务边界逐个把 gozero 服务、DAO / model / worker 和启动入口迁出去。
--- ---
@@ -78,6 +80,8 @@ gozero 服务负责领域能力:
> 说明:`agent` 和 `memory` 都可以单独成服务,不应再被写成“公共能力”;其中 `agent` 更像对外对话编排服务,`memory` 更像其支撑服务/worker 服务。 > 说明:`agent` 和 `memory` 都可以单独成服务,不应再被写成“公共能力”;其中 `agent` 更像对外对话编排服务,`memory` 更像其支撑服务/worker 服务。
> >
> 说明:`llm-service` 先抽成全仓统一模型出口,`rag-service` 再抽成检索基础设施服务;`rag-service` 只能依赖 `llm-service`,不反向依赖具体业务服务。 > 说明:`llm-service` 先抽成全仓统一模型出口,`rag-service` 再抽成检索基础设施服务;`rag-service` 只能依赖 `llm-service`,不反向依赖具体业务服务。
>
> 当前状态:`llm-service` / `rag-service` 这两个边界已经先做成 `backend/services/*` 的服务内模块,调用仍由 `backend/cmd/start.go` 在同一进程内装配,不是 gozero 独立进程。
### 3.3 事件层 ### 3.3 事件层
@@ -118,8 +122,8 @@ gozero 服务负责领域能力:
| --- | --- | --- | --- | | --- | --- | --- | --- |
| 0 | 语义冻结和基线确认(已完成) | 阶段 0 已作为历史基线保存;后续只在契约变化时回看 | `go test ./...``api / worker / all` 启动 smoke | | 0 | 语义冻结和基线确认(已完成) | 阶段 0 已作为历史基线保存;后续只在契约变化时回看 | `go test ./...``api / worker / all` 启动 smoke |
| 1 | Outbox v2 基建(已完成,当前基线) | 当前已具备阶段 1 保存点:服务级 outbox 表、topic、group 和多 worker 装配已打通 | 已完成健康检查、服务级 outbox 写入/投递/消费 smoke、Kafka group lag 核对 | | 1 | Outbox v2 基建(已完成,当前基线) | 当前已具备阶段 1 保存点:服务级 outbox 表、topic、group 和多 worker 装配已打通 | 已完成健康检查、服务级 outbox 写入/投递/消费 smoke、Kafka group lag 核对 |
| 1.5 | 先抽 llm-service | 统一模型调用、provider 路由、流式输出和审计后 commit | course / active-scheduler / memory 模型调用 smoke | | 1.5 | 先抽 llm-service(已完成) | 已完成,`backend/services/llm` 作为当前 canonical 入口 | `go test ./...` + course / active-scheduler / memory 模型调用 smoke |
| 1.6 | 再抽 rag-service | 向量化、召回、重排、检索能力跑通后 commit | memory retrieve / rerank smoke | | 1.6 | 再抽 rag-service(已完成) | 已完成,`backend/services/rag` 作为当前 canonical 入口 | `go test ./...` + memory retrieve / rerank smoke |
| 2 | 先拆 user/auth | user 路由、JWT 签发和 token 额度治理独立后 commit | 注册/登录/刷新/登出 smoke + token quota 回归 | | 2 | 先拆 user/auth | user 路由、JWT 签发和 token 额度治理独立后 commit | 注册/登录/刷新/登出 smoke + token quota 回归 |
| 3 | 再拆 notification | notification 服务能独立消费和重试后 commit | notification E2E smoke + worker-only smoke | | 3 | 再拆 notification | notification 服务能独立消费和重试后 commit | notification E2E smoke + worker-only smoke |
| 4 | 再拆 active-scheduler | 预览生成和确认链路通过 gozero 服务跑通后 commit | dry-run / preview / confirm smoke | | 4 | 再拆 active-scheduler | 预览生成和确认链路通过 gozero 服务跑通后 commit | dry-run / preview / confirm smoke |
@@ -229,7 +233,7 @@ flowchart LR
--- ---
### 4.4 阶段 1.5:先抽 llm-service ### 4.4 阶段 1.5:先抽 llm-service(已完成)
目标: 目标:
@@ -237,6 +241,12 @@ flowchart LR
2.`course``active-scheduler``memory``agent` 对模型调用的依赖先收口到统一服务。 2.`course``active-scheduler``memory``agent` 对模型调用的依赖先收口到统一服务。
3. 先把模型 provider 路由、流式输出、限流、审计这些共性收束起来,避免每个服务各写一份。 3. 先把模型 provider 路由、流式输出、限流、审计这些共性收束起来,避免每个服务各写一份。
当前状态:
1. 代码已经落到 `backend/services/llm`
2. `backend/infra/llm``.go` 旧实现已删除,仅保留迁移说明。
3. 仍由 `backend/cmd/start.go` 在同一进程内装配,尚未引入 gozero 独立服务进程。
这一步要做的事: 这一步要做的事:
1. 把当前分散在业务服务里的模型调用入口改成统一调用 `llm-service` 1. 把当前分散在业务服务里的模型调用入口改成统一调用 `llm-service`
@@ -260,7 +270,7 @@ flowchart LR
--- ---
### 4.5 阶段 1.6:再抽 rag-service ### 4.5 阶段 1.6:再抽 rag-service(已完成)
目标: 目标:
@@ -268,6 +278,12 @@ flowchart LR
2. 让向量化、召回、重排、向量库读写先进入独立服务。 2. 让向量化、召回、重排、向量库读写先进入独立服务。
3. 明确 `rag-service` 只能依赖 `llm-service` 做 embedding / rerank不反向依赖业务服务。 3. 明确 `rag-service` 只能依赖 `llm-service` 做 embedding / rerank不反向依赖业务服务。
当前状态:
1. 代码已经落到 `backend/services/rag`
2. `backend/infra/rag``.go` 旧实现已删除,仅保留迁移说明。
3. 仍由 `backend/cmd/start.go` 在同一进程内装配,尚未引入 gozero 独立服务进程。
这一步要做的事: 这一步要做的事:
1. 把当前分散在 `memory``agent` 里的检索逻辑改成统一调用 `rag-service` 1. 把当前分散在 `memory``agent` 里的检索逻辑改成统一调用 `rag-service`
@@ -474,6 +490,8 @@ flowchart LR
当前建议按这个顺序推进: 当前建议按这个顺序推进:
注:阶段 1.5 / 1.6 已完成,当前实际推进可从阶段 2 开始。
1. 以阶段 1 的服务级 outbox 为当前基线,不再回头做共享 outbox 方案。 1. 以阶段 1 的服务级 outbox 为当前基线,不再回头做共享 outbox 方案。
2. 先切 llm-service把统一模型出口从各业务服务里抽出去。 2. 先切 llm-service把统一模型出口从各业务服务里抽出去。
3. 再切 rag-service把检索基础设施从 memory / agent 里抽出去。 3. 再切 rag-service把检索基础设施从 memory / agent 里抽出去。