Version: 0.9.65.dev.260503
后端: 1. 阶段 1.5/1.6 收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现; 2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
@@ -11,7 +11,7 @@ import (
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/active_scheduler/ports"
|
||||
"github.com/LoveLosita/smartflow/backend/active_scheduler/trigger"
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const locateMaxTokens = 800
|
||||
@@ -24,7 +24,7 @@ const locateMaxTokens = 800
|
||||
// 3. 不创建新工具系统,也不直接产出 preview。
|
||||
type Service struct {
|
||||
reader ports.ScheduleReader
|
||||
client *infrallm.Client
|
||||
client *llmservice.Client
|
||||
clock func() time.Time
|
||||
logger *log.Logger
|
||||
}
|
||||
@@ -34,7 +34,7 @@ type Service struct {
|
||||
// 说明:
|
||||
// 1. reader / client 允许为空,方便在模型不可用或读模型暂时不可用时直接回退 ask_user。
|
||||
// 2. 真正的定位能力只在 Resolve 内部按需启用。
|
||||
func NewService(reader ports.ScheduleReader, client *infrallm.Client) *Service {
|
||||
func NewService(reader ports.ScheduleReader, client *llmservice.Client) *Service {
|
||||
return &Service{
|
||||
reader: reader,
|
||||
client: client,
|
||||
@@ -101,15 +101,15 @@ func (s *Service) Resolve(ctx context.Context, req Request) (Result, error) {
|
||||
return s.buildAskUserResult(req, "定位 prompt 构造失败"), nil
|
||||
}
|
||||
|
||||
messages := infrallm.BuildSystemUserMessages(strings.TrimSpace(locateSystemPrompt), nil, userPrompt)
|
||||
resp, rawResult, err := infrallm.GenerateJSON[llmResponse](
|
||||
messages := llmservice.BuildSystemUserMessages(strings.TrimSpace(locateSystemPrompt), nil, userPrompt)
|
||||
resp, rawResult, err := llmservice.GenerateJSON[llmResponse](
|
||||
ctx,
|
||||
s.client,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.1,
|
||||
MaxTokens: locateMaxTokens,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": "active_scheduler_feedback_locate",
|
||||
"candidate_count": len(candidates),
|
||||
@@ -340,7 +340,7 @@ func cloneAndTrimStrings(values []string) []string {
|
||||
return result
|
||||
}
|
||||
|
||||
func truncateRaw(raw *infrallm.TextResult) string {
|
||||
func truncateRaw(raw *llmservice.TextResult) string {
|
||||
if raw == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/active_scheduler/candidate"
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const selectionMaxTokens = 1200
|
||||
@@ -22,7 +22,7 @@ const selectionMaxTokens = 1200
|
||||
// 2. LLM 失败、输出非法或选择不存在候选时,回退到后端 fallback candidate;
|
||||
// 3. 不写 preview、不发通知、不修改正式日程。
|
||||
type Service struct {
|
||||
client *infrallm.Client
|
||||
client *llmservice.Client
|
||||
clock func() time.Time
|
||||
logger *log.Logger
|
||||
}
|
||||
@@ -33,7 +33,7 @@ type Service struct {
|
||||
// 1. client 允许为空;为空时选择器只走确定性 fallback,便于本地测试和降级;
|
||||
// 2. 真正的模型接入在 cmd/start.go 中完成:aiHub.Pro -> llm.Client -> selection.Service;
|
||||
// 3. 选择器本身不持有模型配置,只表达本业务域的 prompt 和结果校验。
|
||||
func NewService(client *infrallm.Client) *Service {
|
||||
func NewService(client *llmservice.Client) *Service {
|
||||
return &Service{
|
||||
client: client,
|
||||
clock: time.Now,
|
||||
@@ -70,19 +70,19 @@ func (s *Service) Select(ctx context.Context, req SelectRequest) (Result, error)
|
||||
return buildFallbackResult(req, "选择器 prompt 构造失败: "+err.Error()), nil
|
||||
}
|
||||
|
||||
messages := infrallm.BuildSystemUserMessages(
|
||||
messages := llmservice.BuildSystemUserMessages(
|
||||
strings.TrimSpace(selectionSystemPrompt),
|
||||
nil,
|
||||
userPrompt,
|
||||
)
|
||||
resp, rawResult, err := infrallm.GenerateJSON[llmSelectionResponse](
|
||||
resp, rawResult, err := llmservice.GenerateJSON[llmSelectionResponse](
|
||||
ctx,
|
||||
s.client,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.1,
|
||||
MaxTokens: selectionMaxTokens,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": "active_scheduler_select",
|
||||
"candidate_count": len(req.Candidates),
|
||||
@@ -275,7 +275,7 @@ func firstNonEmpty(values ...string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func truncateRaw(raw *infrallm.TextResult) string {
|
||||
func truncateRaw(raw *llmservice.TextResult) string {
|
||||
if raw == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -23,10 +23,7 @@ import (
|
||||
"github.com/LoveLosita/smartflow/backend/api"
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
|
||||
"github.com/LoveLosita/smartflow/backend/inits"
|
||||
"github.com/LoveLosita/smartflow/backend/memory"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
@@ -44,6 +41,9 @@ import (
|
||||
"github.com/LoveLosita/smartflow/backend/service"
|
||||
agentsvcsvc "github.com/LoveLosita/smartflow/backend/service/agentsvc"
|
||||
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
|
||||
"github.com/go-redis/redis/v8"
|
||||
"github.com/spf13/viper"
|
||||
"gorm.io/gorm"
|
||||
@@ -167,17 +167,25 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
|
||||
return nil, fmt.Errorf("failed to initialize Eino: %w", err)
|
||||
}
|
||||
|
||||
ragRuntime, err := buildRAGRuntime(ctx)
|
||||
llmService := llmservice.New(llmservice.Options{
|
||||
AIHub: aiHub,
|
||||
APIKey: os.Getenv("ARK_API_KEY"),
|
||||
BaseURL: viper.GetString("agent.baseURL"),
|
||||
CourseVisionModel: viper.GetString("courseImport.visionModel"),
|
||||
})
|
||||
|
||||
ragService, err := buildRAGService(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ragRuntime := ragService.Runtime()
|
||||
|
||||
memoryCfg := memory.LoadConfigFromViper()
|
||||
memoryObserver := memoryobserve.NewLoggerObserver(log.Default())
|
||||
memoryMetrics := memoryobserve.NewMetricsRegistry()
|
||||
memoryModule := memory.NewModuleWithObserve(
|
||||
db,
|
||||
infrallm.WrapArkClient(aiHub.Pro),
|
||||
llmService.ProClient(),
|
||||
ragRuntime,
|
||||
memoryCfg,
|
||||
memory.ObserveDeps{
|
||||
@@ -208,11 +216,11 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
|
||||
userService := service.NewUserService(userRepo, cacheRepo)
|
||||
taskSv := service.NewTaskService(taskRepo, cacheRepo, eventBus)
|
||||
taskSv.SetActiveScheduleDAO(manager.ActiveSchedule)
|
||||
courseService := buildCourseService(courseRepo, scheduleRepo)
|
||||
courseService := buildCourseService(llmService, courseRepo, scheduleRepo)
|
||||
taskClassService := service.NewTaskClassService(taskClassRepo, cacheRepo, scheduleRepo, manager)
|
||||
scheduleService := service.NewScheduleService(scheduleRepo, userRepo, taskClassRepo, manager, cacheRepo)
|
||||
agentService := service.NewAgentServiceWithSchedule(
|
||||
aiHub,
|
||||
llmService,
|
||||
agentRepo,
|
||||
taskRepo,
|
||||
cacheRepo,
|
||||
@@ -251,7 +259,7 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
|
||||
}
|
||||
// 1. 主动调度选择器单独复用 Pro 模型,LLM 失败时由 selection 层显式回退到确定性候选;
|
||||
// 2. dry-run 与 selection 通过 graph runner 串起来,避免 trigger_pipeline 再拼第二套候选逻辑。
|
||||
activeScheduleLLMClient := infrallm.WrapArkClient(aiHub.Pro)
|
||||
activeScheduleLLMClient := llmService.ProClient()
|
||||
activeScheduleSelector := activesel.NewService(activeScheduleLLMClient)
|
||||
activeScheduleFeedbackLocator := activefeedbacklocate.NewService(activeReaders, activeScheduleLLMClient)
|
||||
activeScheduleGraphRunner, err := activegraph.NewRunner(activeScheduleDryRun.AsGraphDryRunFunc(), activeScheduleSelector)
|
||||
@@ -323,26 +331,26 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
|
||||
return runtime, nil
|
||||
}
|
||||
|
||||
func buildRAGRuntime(ctx context.Context) (infrarag.Runtime, error) {
|
||||
func buildRAGService(ctx context.Context) (*ragservice.Service, error) {
|
||||
ragCfg := ragconfig.LoadFromViper()
|
||||
if !ragCfg.Enabled {
|
||||
log.Println("RAG runtime is disabled")
|
||||
return nil, nil
|
||||
log.Println("RAG service is disabled")
|
||||
return ragservice.New(ragservice.Options{}), nil
|
||||
}
|
||||
|
||||
// 1. 当前项目尚未完成全局观测平台建设,这里先注入一层轻量 Observer;
|
||||
// 2. RAG 内部只依赖 Observer 接口,后续若全项目统一日志/指标系统,只需替换这里;
|
||||
// 3. 这样可以避免 RAG 单独自建一套割裂的日志基础设施。
|
||||
ragLogger := log.Default()
|
||||
ragRuntime, err := infrarag.NewRuntimeFromConfig(ctx, ragCfg, infrarag.FactoryDeps{
|
||||
ragService, err := ragservice.NewFromConfig(ctx, ragCfg, ragservice.FactoryDeps{
|
||||
Logger: ragLogger,
|
||||
Observer: infrarag.NewLoggerObserver(ragLogger),
|
||||
Observer: ragservice.NewLoggerObserver(ragLogger),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize RAG runtime: %w", err)
|
||||
return nil, fmt.Errorf("failed to initialize RAG service: %w", err)
|
||||
}
|
||||
log.Printf("RAG runtime initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
|
||||
return ragRuntime, nil
|
||||
log.Printf("RAG service initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
|
||||
return ragService, nil
|
||||
}
|
||||
|
||||
func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, error) {
|
||||
@@ -369,12 +377,8 @@ func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, erro
|
||||
return eventBus, nil
|
||||
}
|
||||
|
||||
func buildCourseService(courseRepo *dao.CourseDAO, scheduleRepo *dao.ScheduleDAO) *service.CourseService {
|
||||
courseImageResponsesClient := infrallm.NewArkResponsesClient(
|
||||
os.Getenv("ARK_API_KEY"),
|
||||
viper.GetString("agent.baseURL"),
|
||||
viper.GetString("courseImport.visionModel"),
|
||||
)
|
||||
func buildCourseService(llmService *llmservice.Service, courseRepo *dao.CourseDAO, scheduleRepo *dao.ScheduleDAO) *service.CourseService {
|
||||
courseImageResponsesClient := llmService.CourseImageResponsesClient()
|
||||
return service.NewCourseService(
|
||||
courseRepo,
|
||||
scheduleRepo,
|
||||
@@ -650,7 +654,7 @@ func containsString(values []string, target string) bool {
|
||||
|
||||
func configureAgentService(
|
||||
agentService *service.AgentService,
|
||||
ragRuntime infrarag.Runtime,
|
||||
ragRuntime ragservice.Runtime,
|
||||
agentRepo *dao.AgentDAO,
|
||||
cacheRepo *dao.CacheDAO,
|
||||
taskRepo *dao.TaskDAO,
|
||||
|
||||
@@ -5,8 +5,6 @@ import (
|
||||
"errors"
|
||||
"log"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
memorycleanup "github.com/LoveLosita/smartflow/backend/memory/cleanup"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
|
||||
@@ -16,6 +14,8 @@ import (
|
||||
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
|
||||
memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -28,8 +28,8 @@ import (
|
||||
type Module struct {
|
||||
db *gorm.DB
|
||||
cfg memorymodel.Config
|
||||
llmClient *infrallm.Client
|
||||
ragRuntime infrarag.Runtime
|
||||
llmClient *llmservice.Client
|
||||
ragRuntime ragservice.Runtime
|
||||
observer memoryobserve.Observer
|
||||
metrics memoryobserve.MetricsRecorder
|
||||
|
||||
@@ -64,15 +64,15 @@ func LoadConfigFromViper() memorymodel.Config {
|
||||
// 2. llmClient 允许为 nil,此时写入链路会自动回退到本地 fallback 抽取;
|
||||
// 3. ragRuntime 允许为 nil,此时读取/向量同步自动回退旧逻辑;
|
||||
// 4. 若后续接入统一 DI 容器,也应优先注册这个 Module,而不是把内部 repo/service 继续向外泄漏。
|
||||
func NewModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module {
|
||||
func NewModule(db *gorm.DB, llmClient *llmservice.Client, ragRuntime ragservice.Runtime, cfg memorymodel.Config) *Module {
|
||||
return NewModuleWithObserve(db, llmClient, ragRuntime, cfg, ObserveDeps{})
|
||||
}
|
||||
|
||||
// NewModuleWithObserve 创建带观测依赖的 memory 模块门面。
|
||||
func NewModuleWithObserve(
|
||||
db *gorm.DB,
|
||||
llmClient *infrallm.Client,
|
||||
ragRuntime infrarag.Runtime,
|
||||
llmClient *llmservice.Client,
|
||||
ragRuntime ragservice.Runtime,
|
||||
cfg memorymodel.Config,
|
||||
deps ObserveDeps,
|
||||
) *Module {
|
||||
@@ -228,8 +228,8 @@ func (m *Module) StartWorker(ctx context.Context) {
|
||||
|
||||
func wireModule(
|
||||
db *gorm.DB,
|
||||
llmClient *infrallm.Client,
|
||||
ragRuntime infrarag.Runtime,
|
||||
llmClient *llmservice.Client,
|
||||
ragRuntime ragservice.Runtime,
|
||||
cfg memorymodel.Config,
|
||||
deps ObserveDeps,
|
||||
) *Module {
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const defaultDecisionCompareMaxTokens = 600
|
||||
@@ -19,13 +19,13 @@ const defaultDecisionCompareMaxTokens = 600
|
||||
// 2. LLM 只输出 relation(关系类型),不输出 action,不输出 target ID;
|
||||
// 3. LLM 调用失败时返回 error,由上层决定是否视为 unrelated。
|
||||
type LLMDecisionOrchestrator struct {
|
||||
client *infrallm.Client
|
||||
client *llmservice.Client
|
||||
cfg memorymodel.Config
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewLLMDecisionOrchestrator 构造决策比对编排器。
|
||||
func NewLLMDecisionOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
|
||||
func NewLLMDecisionOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
|
||||
return &LLMDecisionOrchestrator{
|
||||
client: client,
|
||||
cfg: cfg,
|
||||
@@ -52,14 +52,14 @@ func (o *LLMDecisionOrchestrator) Compare(
|
||||
systemPrompt := buildDecisionCompareSystemPrompt()
|
||||
userPrompt := buildDecisionCompareUserPrompt(fact, candidate)
|
||||
|
||||
messages := infrallm.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
|
||||
messages := llmservice.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
|
||||
|
||||
// 2. 调用 LLM 做结构化输出,温度用低值保证判断稳定。
|
||||
resp, _, err := infrallm.GenerateJSON[decisionCompareResponse](
|
||||
resp, _, err := llmservice.GenerateJSON[decisionCompareResponse](
|
||||
ctx,
|
||||
o.client,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.1,
|
||||
MaxTokens: defaultDecisionCompareMaxTokens,
|
||||
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
|
||||
@@ -127,9 +127,9 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
|
||||
}
|
||||
|
||||
// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
|
||||
func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode {
|
||||
func resolveMemoryThinkingMode(enabled bool) llmservice.ThinkingMode {
|
||||
if enabled {
|
||||
return infrallm.ThinkingModeEnabled
|
||||
return llmservice.ThinkingModeEnabled
|
||||
}
|
||||
return infrallm.ThinkingModeDisabled
|
||||
return llmservice.ThinkingModeDisabled
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -24,13 +24,13 @@ const (
|
||||
// 2. 不负责落库,不负责任务状态机推进;
|
||||
// 3. 当 LLM 不可用或输出异常时,回退到保守的本地抽取,保证链路不完全断。
|
||||
type LLMWriteOrchestrator struct {
|
||||
client *infrallm.Client
|
||||
client *llmservice.Client
|
||||
cfg memorymodel.Config
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。
|
||||
func NewLLMWriteOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
|
||||
func NewLLMWriteOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
|
||||
return &LLMWriteOrchestrator{
|
||||
client: client,
|
||||
cfg: cfg,
|
||||
@@ -54,17 +54,17 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
|
||||
return fallbackNormalizedFacts(payload), nil
|
||||
}
|
||||
|
||||
messages := infrallm.BuildSystemUserMessages(
|
||||
messages := llmservice.BuildSystemUserMessages(
|
||||
buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt),
|
||||
nil,
|
||||
buildMemoryExtractUserPrompt(payload),
|
||||
)
|
||||
|
||||
resp, rawResult, err := infrallm.GenerateJSON[memoryExtractResponse](
|
||||
resp, rawResult, err := llmservice.GenerateJSON[memoryExtractResponse](
|
||||
ctx,
|
||||
o.client,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: clampTemperature(o.cfg.LLMTemperature),
|
||||
MaxTokens: defaultMemoryExtractMaxTokens,
|
||||
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
|
||||
@@ -319,7 +319,7 @@ func isSkipIntent(intent string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func truncateForLog(raw *infrallm.TextResult) string {
|
||||
func truncateForLog(raw *llmservice.TextResult) string {
|
||||
if raw == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@ package service
|
||||
import (
|
||||
"time"
|
||||
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
)
|
||||
|
||||
// buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。
|
||||
@@ -53,8 +53,8 @@ func buildReadScopedRAGRequest(
|
||||
req memorymodel.RetrieveRequest,
|
||||
topK int,
|
||||
threshold float64,
|
||||
) infrarag.MemoryRetrieveRequest {
|
||||
return infrarag.MemoryRetrieveRequest{
|
||||
) ragservice.MemoryRetrieveRequest {
|
||||
return ragservice.MemoryRetrieveRequest{
|
||||
Query: req.Query,
|
||||
TopK: topK,
|
||||
Threshold: threshold,
|
||||
|
||||
@@ -8,12 +8,12 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
|
||||
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
|
||||
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -30,7 +30,7 @@ const (
|
||||
type ReadService struct {
|
||||
itemRepo *memoryrepo.ItemRepo
|
||||
settingsRepo *memoryrepo.SettingsRepo
|
||||
ragRuntime infrarag.Runtime
|
||||
ragRuntime ragservice.Runtime
|
||||
cfg memorymodel.Config
|
||||
observer memoryobserve.Observer
|
||||
metrics memoryobserve.MetricsRecorder
|
||||
@@ -57,7 +57,7 @@ type semanticRetrieveTelemetry struct {
|
||||
func NewReadService(
|
||||
itemRepo *memoryrepo.ItemRepo,
|
||||
settingsRepo *memoryrepo.SettingsRepo,
|
||||
ragRuntime infrarag.Runtime,
|
||||
ragRuntime ragservice.Runtime,
|
||||
cfg memorymodel.Config,
|
||||
observer memoryobserve.Observer,
|
||||
metrics memoryobserve.MetricsRecorder,
|
||||
@@ -347,7 +347,7 @@ func collectMemoryIDs(items []model.MemoryItem) []int64 {
|
||||
return ids
|
||||
}
|
||||
|
||||
func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) {
|
||||
func buildMemoryDTOFromRetrieveHit(hit ragservice.RetrieveHit) (memorymodel.ItemDTO, int64) {
|
||||
memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
|
||||
metadata := hit.Metadata
|
||||
content := strings.TrimSpace(hit.Text)
|
||||
|
||||
@@ -6,10 +6,10 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
|
||||
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
)
|
||||
|
||||
// Syncer 负责 memory_items 与向量库之间的最小桥接。
|
||||
@@ -19,7 +19,7 @@ import (
|
||||
// 2. 不负责决定哪些记忆该写、该删、该恢复,这些决策仍由上游 service/worker/cleanup 控制;
|
||||
// 3. 同步失败时只回写 vector_status 并打观测,不反向回滚业务事务,避免把在线链路拖成强依赖。
|
||||
type Syncer struct {
|
||||
ragRuntime infrarag.Runtime
|
||||
ragRuntime ragservice.Runtime
|
||||
itemRepo *memoryrepo.ItemRepo
|
||||
observer memoryobserve.Observer
|
||||
metrics memoryobserve.MetricsRecorder
|
||||
@@ -27,7 +27,7 @@ type Syncer struct {
|
||||
}
|
||||
|
||||
func NewSyncer(
|
||||
ragRuntime infrarag.Runtime,
|
||||
ragRuntime ragservice.Runtime,
|
||||
itemRepo *memoryrepo.ItemRepo,
|
||||
observer memoryobserve.Observer,
|
||||
metrics memoryobserve.MetricsRecorder,
|
||||
@@ -53,9 +53,9 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
|
||||
return
|
||||
}
|
||||
|
||||
requestItems := make([]infrarag.MemoryIngestItem, 0, len(items))
|
||||
requestItems := make([]ragservice.MemoryIngestItem, 0, len(items))
|
||||
for _, item := range items {
|
||||
requestItems = append(requestItems, infrarag.MemoryIngestItem{
|
||||
requestItems = append(requestItems, ragservice.MemoryIngestItem{
|
||||
MemoryID: item.ID,
|
||||
UserID: item.UserID,
|
||||
ConversationID: strValue(item.ConversationID),
|
||||
@@ -76,7 +76,7 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
|
||||
|
||||
result, err := s.ragRuntime.IngestMemory(memoryobserve.WithFields(ctx, map[string]any{
|
||||
"trace_id": traceID,
|
||||
}), infrarag.MemoryIngestRequest{
|
||||
}), ragservice.MemoryIngestRequest{
|
||||
TraceID: traceID,
|
||||
Action: "add",
|
||||
Items: requestItems,
|
||||
|
||||
@@ -4,11 +4,11 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
|
||||
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -192,7 +192,7 @@ func (r *Runner) recallCandidates(
|
||||
) candidateRecallResult {
|
||||
// 1. 优先使用 Milvus 向量语义召回。
|
||||
if r.ragRuntime != nil {
|
||||
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{
|
||||
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, ragservice.MemoryRetrieveRequest{
|
||||
Query: fact.Content,
|
||||
TopK: r.cfg.DecisionCandidateTopK,
|
||||
Threshold: r.cfg.DecisionCandidateMinScore,
|
||||
@@ -235,7 +235,7 @@ func (r *Runner) recallCandidates(
|
||||
// 1. 从 DocumentID(格式 memory:{id})解析出 mysql_id;
|
||||
// 2. 从 metadata 提取 title 和 memory_type;
|
||||
// 3. 跳过无法解析 DocumentID 的结果。
|
||||
func (r *Runner) buildCandidatesFromRAG(hits []infrarag.RetrieveHit) []memorymodel.CandidateSnapshot {
|
||||
func (r *Runner) buildCandidatesFromRAG(hits []ragservice.RetrieveHit) []memorymodel.CandidateSnapshot {
|
||||
candidates := make([]memorymodel.CandidateSnapshot, 0, len(hits))
|
||||
for _, hit := range hits {
|
||||
memoryID := parseMemoryID(hit.DocumentID)
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
|
||||
memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator"
|
||||
@@ -17,6 +16,7 @@ import (
|
||||
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
|
||||
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -41,7 +41,7 @@ type Runner struct {
|
||||
auditRepo *memoryrepo.AuditRepo
|
||||
settingsRepo *memoryrepo.SettingsRepo
|
||||
extractor Extractor
|
||||
ragRuntime infrarag.Runtime
|
||||
ragRuntime ragservice.Runtime
|
||||
logger *log.Logger
|
||||
vectorSyncer *memoryvectorsync.Syncer
|
||||
observer memoryobserve.Observer
|
||||
@@ -63,7 +63,7 @@ func NewRunner(
|
||||
auditRepo *memoryrepo.AuditRepo,
|
||||
settingsRepo *memoryrepo.SettingsRepo,
|
||||
extractor Extractor,
|
||||
ragRuntime infrarag.Runtime,
|
||||
ragRuntime ragservice.Runtime,
|
||||
cfg memorymodel.Config,
|
||||
decisionOrchestrator *memoryorchestrator.LLMDecisionOrchestrator,
|
||||
vectorSyncer *memoryvectorsync.Syncer,
|
||||
|
||||
@@ -5,10 +5,10 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
|
||||
schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -71,10 +71,10 @@ type PersistVisibleMessageFunc func(ctx context.Context, state *CommonState, msg
|
||||
// 2. Chat/Plan/Execute/Deliver 允许分别挂不同 client,但也允许先复用同一个 client;
|
||||
// 3. ChunkEmitter 统一承接阶段提示、正文、工具事件、确认请求等 SSE 输出。
|
||||
type AgentGraphDeps struct {
|
||||
ChatClient *infrallm.Client
|
||||
PlanClient *infrallm.Client
|
||||
ExecuteClient *infrallm.Client
|
||||
DeliverClient *infrallm.Client
|
||||
ChatClient *llmservice.Client
|
||||
PlanClient *llmservice.Client
|
||||
ExecuteClient *llmservice.Client
|
||||
DeliverClient *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
StateStore AgentStateStore
|
||||
ToolRegistry *newagenttools.ToolRegistry
|
||||
@@ -141,7 +141,7 @@ func (d *AgentGraphDeps) EnsureChunkEmitter() *newagentstream.ChunkEmitter {
|
||||
}
|
||||
|
||||
// ResolveChatClient 返回 chat 阶段可用的模型客户端。
|
||||
func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client {
|
||||
func (d *AgentGraphDeps) ResolveChatClient() *llmservice.Client {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -154,7 +154,7 @@ func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client {
|
||||
// 1. 优先使用显式注入的 PlanClient;
|
||||
// 2. 若未单独注入,则回退到 ChatClient;
|
||||
// 3. 这样在骨架期可先用一套 client 跑通,再按需拆分 strategist / worker。
|
||||
func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client {
|
||||
func (d *AgentGraphDeps) ResolvePlanClient() *llmservice.Client {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -165,7 +165,7 @@ func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client {
|
||||
}
|
||||
|
||||
// ResolveExecuteClient 返回 execute 阶段可用的模型客户端。
|
||||
func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client {
|
||||
func (d *AgentGraphDeps) ResolveExecuteClient() *llmservice.Client {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -179,7 +179,7 @@ func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client {
|
||||
}
|
||||
|
||||
// ResolveDeliverClient 返回 deliver 阶段可用的模型客户端。
|
||||
func (d *AgentGraphDeps) ResolveDeliverClient() *infrallm.Client {
|
||||
func (d *AgentGraphDeps) ResolveDeliverClient() *llmservice.Client {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/google/uuid"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -50,7 +50,7 @@ type ChatNodeInput struct {
|
||||
UserInput string
|
||||
ConfirmAction string
|
||||
ResumeInteractionID string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
|
||||
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
|
||||
@@ -107,9 +107,9 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
|
||||
})
|
||||
logNodeLLMContext(chatStageName, "routing", flowState, messages)
|
||||
|
||||
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
|
||||
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
|
||||
Temperature: 0.7,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": chatStageName,
|
||||
"phase": "routing",
|
||||
@@ -172,7 +172,7 @@ func isExecuteLoopClosedMarker(msg *schema.Message) bool {
|
||||
// 3. 控制码解析超时或流异常结束 → fallback 到 plan。
|
||||
func streamAndDispatch(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
parser *newagentrouter.StreamRouteParser,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
@@ -292,7 +292,7 @@ func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisi
|
||||
// 2. thinking=true:关闭路由流,发起第二次 thinking 流式调用。
|
||||
func handleDirectReplyStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -309,7 +309,7 @@ func handleDirectReplyStream(
|
||||
// handleThinkingReplyStream 处理需要思考的回复:关闭路由流 → 第二次 thinking 流式调用。
|
||||
func handleThinkingReplyStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -327,10 +327,10 @@ func handleThinkingReplyStream(
|
||||
StatusBlockID: chatStatusBlockID,
|
||||
})
|
||||
logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages)
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
|
||||
Temperature: 0.5,
|
||||
MaxTokens: 2000,
|
||||
Thinking: infrallm.ThinkingModeEnabled,
|
||||
Thinking: llmservice.ThinkingModeEnabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": chatStageName,
|
||||
"phase": "direct_reply_thinking",
|
||||
@@ -363,7 +363,7 @@ func handleThinkingReplyStream(
|
||||
// handleDirectReplyContinueStream 处理无思考的闲聊:同一流续传。
|
||||
func handleDirectReplyContinueStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -419,7 +419,7 @@ func handleDirectReplyContinueStream(
|
||||
// 2. 推送轻量状态通知;
|
||||
// 3. 设置流程状态,进入 Execute 或 RoughBuild。
|
||||
func handleRouteExecuteStream(
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
flowState *newagentmodel.CommonState,
|
||||
decision *newagentmodel.ChatRoutingDecision,
|
||||
@@ -674,7 +674,7 @@ func isExplicitNoRefineAfterRoughBuildRequest(userInput string) bool {
|
||||
// 4. 完整回复写入 history。
|
||||
func handleDeepAnswerStream(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
input ChatNodeInput,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
@@ -685,9 +685,9 @@ func handleDeepAnswerStream(
|
||||
_ = reader.Close()
|
||||
|
||||
// 2. 第二次流式调用。
|
||||
thinkingOpt := infrallm.ThinkingModeDisabled
|
||||
thinkingOpt := llmservice.ThinkingModeDisabled
|
||||
if effectiveThinking {
|
||||
thinkingOpt = infrallm.ThinkingModeEnabled
|
||||
thinkingOpt = llmservice.ThinkingModeEnabled
|
||||
}
|
||||
deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput)
|
||||
deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{
|
||||
@@ -699,7 +699,7 @@ func handleDeepAnswerStream(
|
||||
StatusBlockID: chatStatusBlockID,
|
||||
})
|
||||
logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages)
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
|
||||
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
|
||||
Temperature: 0.5,
|
||||
MaxTokens: 2000,
|
||||
Thinking: thinkingOpt,
|
||||
@@ -741,7 +741,7 @@ func handleDeepAnswerStream(
|
||||
|
||||
// handleRoutePlanStream 处理规划路由:推送状态确认 → 设 PhasePlanning。
|
||||
func handleRoutePlanStream(
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
emitter *newagentstream.ChunkEmitter,
|
||||
flowState *newagentmodel.CommonState,
|
||||
effectiveThinking bool,
|
||||
|
||||
@@ -9,10 +9,10 @@ import (
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -31,7 +31,7 @@ const (
|
||||
type DeliverNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
ThinkingEnabled bool // 是否开启 thinking,由 config.yaml 的 agent.thinking.deliver 注入
|
||||
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
|
||||
@@ -128,7 +128,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
|
||||
// - streamed:true 表示文本已通过 EmitStreamAssistantText 真流式推送到前端,调用方无需再伪流式。
|
||||
func generateDeliverSummary(
|
||||
ctx context.Context,
|
||||
client *infrallm.Client,
|
||||
client *llmservice.Client,
|
||||
flowState *newagentmodel.CommonState,
|
||||
conversationContext *newagentmodel.ConversationContext,
|
||||
thinkingEnabled bool,
|
||||
@@ -162,7 +162,7 @@ func generateDeliverSummary(
|
||||
reader, err := client.Stream(
|
||||
ctx,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.5,
|
||||
MaxTokens: 800,
|
||||
Thinking: resolveThinkingMode(thinkingEnabled),
|
||||
|
||||
@@ -8,11 +8,11 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -38,7 +38,7 @@ func collectExecuteDecisionFromLLM(
|
||||
reader, err := input.Client.Stream(
|
||||
ctx,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 1.0,
|
||||
MaxTokens: 131072,
|
||||
Thinking: newagentshared.ResolveThinkingMode(input.ThinkingEnabled),
|
||||
@@ -123,7 +123,7 @@ func collectExecuteDecisionFromLLM(
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
|
||||
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
|
||||
if parseErr != nil {
|
||||
log.Printf(
|
||||
"[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s",
|
||||
|
||||
@@ -5,12 +5,12 @@ import (
|
||||
"fmt"
|
||||
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
|
||||
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -29,7 +29,7 @@ type ExecuteNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
UserInput string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
ResumeNode string
|
||||
ToolRegistry *newagenttools.ToolRegistry
|
||||
|
||||
@@ -10,11 +10,11 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -34,7 +34,7 @@ type PlanNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
UserInput string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
ResumeNode string
|
||||
AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点
|
||||
@@ -87,7 +87,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
reader, err := input.Client.Stream(
|
||||
ctx,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.2,
|
||||
// 显式设置上限,避免依赖框架默认值(默认 4096)导致长决策被截断。
|
||||
// 注意:当前模型接口 max_tokens 上限为 131072,超过会 400。
|
||||
@@ -149,7 +149,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
|
||||
return fmt.Errorf("规划解析失败,原始输出=%s", result.RawBuffer)
|
||||
}
|
||||
|
||||
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
|
||||
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
|
||||
if parseErr != nil {
|
||||
return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer)
|
||||
}
|
||||
@@ -390,9 +390,9 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
|
||||
|
||||
// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
|
||||
// 供 plan / execute / deliver 节点统一使用。
|
||||
func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
|
||||
func resolveThinkingMode(enabled bool) llmservice.ThinkingMode {
|
||||
if enabled {
|
||||
return infrallm.ThinkingModeEnabled
|
||||
return llmservice.ThinkingModeEnabled
|
||||
}
|
||||
return infrallm.ThinkingModeDisabled
|
||||
return llmservice.ThinkingModeDisabled
|
||||
}
|
||||
|
||||
@@ -8,13 +8,13 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
taskmodel "github.com/LoveLosita/smartflow/backend/model"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
|
||||
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -30,7 +30,7 @@ type QuickTaskNodeInput struct {
|
||||
RuntimeState *newagentmodel.AgentRuntimeState
|
||||
ConversationContext *newagentmodel.ConversationContext
|
||||
UserInput string
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
ChunkEmitter *newagentstream.ChunkEmitter
|
||||
QuickTaskDeps newagentmodel.QuickTaskDeps
|
||||
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
|
||||
@@ -77,7 +77,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
|
||||
messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput)
|
||||
|
||||
// 2. 真流式调用 LLM。
|
||||
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
|
||||
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
|
||||
Temperature: 0.3,
|
||||
MaxTokens: 512,
|
||||
})
|
||||
@@ -130,7 +130,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
|
||||
// 解析 JSON。
|
||||
log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
|
||||
var parseErr error
|
||||
decision, parseErr = infrallm.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
|
||||
decision, parseErr = llmservice.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
|
||||
if parseErr != nil {
|
||||
log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
|
||||
if result.RawBuffer != "" {
|
||||
|
||||
@@ -6,11 +6,11 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
"github.com/LoveLosita/smartflow/backend/pkg"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ import (
|
||||
// 3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。
|
||||
type UnifiedCompactInput struct {
|
||||
// Client 用于调用 LLM 压缩 msg1/msg2。
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
|
||||
CompactionStore newagentmodel.CompactionStore
|
||||
// FlowState 提供 userID / chatID / roundUsed 等定位信息。
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -24,7 +24,7 @@ const compactMsg1SystemPrompt = `你是一个对话压缩助手。你的任务
|
||||
// existingSummary 不为空时表示已有旧摘要,需要合并压缩。
|
||||
func CompactMsg1(
|
||||
ctx context.Context,
|
||||
client *infrallm.Client,
|
||||
client *llmservice.Client,
|
||||
historyText string,
|
||||
existingSummary string,
|
||||
) (string, error) {
|
||||
@@ -49,7 +49,7 @@ func CompactMsg1(
|
||||
schema.UserMessage(userContent),
|
||||
}
|
||||
|
||||
result, err := client.GenerateText(ctx, messages, infrallm.GenerateOptions{
|
||||
result, err := client.GenerateText(ctx, messages, llmservice.GenerateOptions{
|
||||
MaxTokens: 4000,
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -23,7 +23,7 @@ const compactMsg2SystemPrompt = `你是一个执行记录压缩助手。你的
|
||||
// recentText 是保留的近期记录原文,不参与压缩。
|
||||
func CompactMsg2(
|
||||
ctx context.Context,
|
||||
client *infrallm.Client,
|
||||
client *llmservice.Client,
|
||||
earlyLoopText string,
|
||||
) (string, error) {
|
||||
userContent := fmt.Sprintf(`早期的 ReAct 执行记录:
|
||||
@@ -36,7 +36,7 @@ func CompactMsg2(
|
||||
schema.UserMessage(userContent),
|
||||
}
|
||||
|
||||
result, err := client.GenerateText(ctx, messages, infrallm.GenerateOptions{
|
||||
result, err := client.GenerateText(ctx, messages, llmservice.GenerateOptions{
|
||||
MaxTokens: 4000,
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -26,7 +26,7 @@ var (
|
||||
// StreamDecisionResult 描述解析器的最终输出状态。
|
||||
type StreamDecisionResult struct {
|
||||
// DecisionJSON 是标签内提取的完整 JSON 字符串。
|
||||
// 调用方应使用 infrallm.ParseJSONObject[T] 将其解析为具体决策类型。
|
||||
// 调用方应使用 llmservice.ParseJSONObject[T] 将其解析为具体决策类型。
|
||||
DecisionJSON string
|
||||
|
||||
// BeforeText 是 <SMARTFLOW_DECISION> 标签之前的自然语言前言。
|
||||
@@ -179,7 +179,7 @@ func (p *StreamDecisionParser) Result() *StreamDecisionResult {
|
||||
}
|
||||
|
||||
// extractJSONFromTag 从标签内文本中提取第一个完整 JSON 对象。
|
||||
// 复用括号计数逻辑,与 infrallm.ExtractJSONObject 一致。
|
||||
// 复用括号计数逻辑,与 llmservice.ExtractJSONObject 一致。
|
||||
func extractJSONFromTag(text string) string {
|
||||
clean := strings.TrimSpace(text)
|
||||
if clean == "" {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
package newagentshared
|
||||
|
||||
import infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
import llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
|
||||
func ResolveThinkingMode(enabled bool) infrallm.ThinkingMode {
|
||||
func ResolveThinkingMode(enabled bool) llmservice.ThinkingMode {
|
||||
if enabled {
|
||||
return infrallm.ThinkingModeEnabled
|
||||
return llmservice.ThinkingModeEnabled
|
||||
}
|
||||
return infrallm.ThinkingModeDisabled
|
||||
return llmservice.ThinkingModeDisabled
|
||||
}
|
||||
|
||||
@@ -6,11 +6,11 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
"github.com/LoveLosita/smartflow/backend/pkg"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ import (
|
||||
// 3. StageName 和 StatusBlockID 用于区分日志来源与 SSE 状态推送目标。
|
||||
type UnifiedCompactInput struct {
|
||||
// Client 用于调用 LLM 压缩 msg1/msg2。
|
||||
Client *infrallm.Client
|
||||
Client *llmservice.Client
|
||||
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
|
||||
CompactionStore newagentmodel.CompactionStore
|
||||
// FlowState 提供 userID / conversationID / roundUsed 等定位信息。
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
// PayloadEmitter 是真正向外层 SSE 管道写 chunk 的最小接口。
|
||||
@@ -540,7 +540,7 @@ func (e *ChunkEmitter) EmitDone() error {
|
||||
// 3. 不负责打开/关闭 StreamReader,调用方负责生命周期管理。
|
||||
func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
blockID, stage string,
|
||||
) (string, error) {
|
||||
if e == nil || reader == nil {
|
||||
@@ -598,7 +598,7 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
|
||||
// 用于只需展示思考过程而无需展示正文的场景。
|
||||
func (e *ChunkEmitter) EmitStreamReasoningText(
|
||||
ctx context.Context,
|
||||
reader infrallm.StreamReader,
|
||||
reader llmservice.StreamReader,
|
||||
blockID, stage string,
|
||||
) (string, error) {
|
||||
if e == nil || reader == nil {
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
|
||||
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
|
||||
"github.com/LoveLosita/smartflow/backend/newAgent/tools/web"
|
||||
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
|
||||
)
|
||||
|
||||
// ToolHandler 约定所有工具的统一执行签名。
|
||||
@@ -32,7 +32,7 @@ type ToolSchemaEntry struct {
|
||||
// 2. 某些依赖即便暂未使用也允许保留,避免业务层重新到处 new;
|
||||
// 3. 具体依赖缺失时由对应工具自行返回结构化失败结果。
|
||||
type DefaultRegistryDeps struct {
|
||||
RAGRuntime infrarag.Runtime
|
||||
RAGRuntime ragservice.Runtime
|
||||
|
||||
// WebSearchProvider 为 nil 时,web_search / web_fetch 仍会注册,
|
||||
// 但 handler 会返回“暂未启用”的只读 observation,不阻断主流程。
|
||||
|
||||
@@ -3,8 +3,8 @@ package service
|
||||
import (
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
|
||||
"github.com/LoveLosita/smartflow/backend/inits"
|
||||
"github.com/LoveLosita/smartflow/backend/service/agentsvc"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
// AgentService 是 service 层对 agentsvc.AgentService 的兼容别名。
|
||||
@@ -20,7 +20,7 @@ type AgentService = agentsvc.AgentService
|
||||
// 2) 主动调度 session DAO 也在这里显式透传,避免聊天入口再去回查全局单例;
|
||||
// 3) 真实构造逻辑已下沉到 service/agentsvc 包。
|
||||
func NewAgentService(
|
||||
aiHub *inits.AIHub,
|
||||
llmService *llmservice.Service,
|
||||
repo *dao.AgentDAO,
|
||||
taskRepo *dao.TaskDAO,
|
||||
cacheDAO *dao.CacheDAO,
|
||||
@@ -29,7 +29,7 @@ func NewAgentService(
|
||||
activeSessionDAO *dao.ActiveScheduleSessionDAO,
|
||||
eventPublisher outboxinfra.EventPublisher,
|
||||
) *AgentService {
|
||||
return agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
|
||||
return agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
|
||||
}
|
||||
|
||||
// NewAgentServiceWithSchedule 在基础 AgentService 上注入排程依赖。
|
||||
@@ -39,7 +39,7 @@ func NewAgentService(
|
||||
// 2) 排程依赖为可选:未注入时排程路由自动回退到普通聊天;
|
||||
// 3) 主动调度 session DAO 仍沿用统一构造注入,避免排程分支自己拼装仓储。
|
||||
func NewAgentServiceWithSchedule(
|
||||
aiHub *inits.AIHub,
|
||||
llmService *llmservice.Service,
|
||||
repo *dao.AgentDAO,
|
||||
taskRepo *dao.TaskDAO,
|
||||
cacheDAO *dao.CacheDAO,
|
||||
@@ -50,7 +50,7 @@ func NewAgentServiceWithSchedule(
|
||||
scheduleSvc *ScheduleService,
|
||||
taskSvc *TaskService,
|
||||
) *AgentService {
|
||||
svc := agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
|
||||
svc := agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
|
||||
|
||||
// 注入排程依赖:将 service 层方法包装为函数闭包,避免循环依赖。
|
||||
if scheduleSvc != nil {
|
||||
|
||||
@@ -3,6 +3,7 @@ package agentsvc
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -11,7 +12,6 @@ import (
|
||||
"github.com/LoveLosita/smartflow/backend/conv"
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
|
||||
"github.com/LoveLosita/smartflow/backend/inits"
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
@@ -20,13 +20,13 @@ import (
|
||||
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
|
||||
"github.com/LoveLosita/smartflow/backend/pkg"
|
||||
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type AgentService struct {
|
||||
AIHub *inits.AIHub
|
||||
llmService *llmservice.Service
|
||||
repo *dao.AgentDAO
|
||||
taskRepo *dao.TaskDAO
|
||||
cacheDAO *dao.CacheDAO
|
||||
@@ -75,7 +75,7 @@ type AgentService struct {
|
||||
// 这里通过依赖注入把“模型、仓储、缓存、异步持久化通道”统一交给服务层管理,
|
||||
// 便于后续在单测中替换实现,或在启动流程中按环境切换配置。
|
||||
func NewAgentService(
|
||||
aiHub *inits.AIHub,
|
||||
llmService *llmservice.Service,
|
||||
repo *dao.AgentDAO,
|
||||
taskRepo *dao.TaskDAO,
|
||||
cacheDAO *dao.CacheDAO,
|
||||
@@ -90,7 +90,7 @@ func NewAgentService(
|
||||
ensureTokenMeterCallbackRegistered()
|
||||
|
||||
return &AgentService{
|
||||
AIHub: aiHub,
|
||||
llmService: llmService,
|
||||
repo: repo,
|
||||
taskRepo: taskRepo,
|
||||
cacheDAO: cacheDAO,
|
||||
@@ -123,8 +123,11 @@ func thinkingModeToBool(mode string) bool {
|
||||
// 当前约定:
|
||||
// - 旧链路已全面切到 newAgent graph,这里仅作为 runNormalChatFlow 回退时的模型选择入口;
|
||||
// - 统一返回 Pro 模型,旧 strategist 参数不再生效。
|
||||
func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
|
||||
return s.AIHub.Pro, "pro"
|
||||
func (s *AgentService) pickChatModel(requestModel string) (*llmservice.Client, string) {
|
||||
if s == nil || s.llmService == nil {
|
||||
return nil, "pro"
|
||||
}
|
||||
return s.llmService.ProClient(), "pro"
|
||||
}
|
||||
|
||||
// PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。
|
||||
@@ -304,7 +307,7 @@ func pushErrNonBlocking(errChan chan error, err error) {
|
||||
// 2) 开启随口记进度推送后,最终判定“非随口记”时回落到普通聊天。
|
||||
func (s *AgentService) runNormalChatFlow(
|
||||
ctx context.Context,
|
||||
selectedModel *ark.ChatModel,
|
||||
selectedModel *llmservice.Client,
|
||||
resolvedModelName string,
|
||||
userMessage string,
|
||||
userPersisted bool,
|
||||
@@ -365,6 +368,12 @@ func (s *AgentService) runNormalChatFlow(
|
||||
}
|
||||
}
|
||||
|
||||
// 6.0. 没有可用模型时,直接中止普通聊天,避免写入半截用户消息后没有后续回复。
|
||||
if selectedModel == nil {
|
||||
pushErrNonBlocking(errChan, errors.New("llm client is not ready"))
|
||||
return
|
||||
}
|
||||
|
||||
// 6. 执行真正的流式聊天。
|
||||
// fullText 用于后续写 Redis/持久化,outChan 用于把流片段实时推给前端。
|
||||
fullText, _, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt, userID, chatID)
|
||||
|
||||
@@ -11,10 +11,8 @@ import (
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
"github.com/LoveLosita/smartflow/backend/respond"
|
||||
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
einoModel "github.com/cloudwego/eino/components/model"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -253,11 +251,11 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
|
||||
}
|
||||
|
||||
// 2. 标题生成属于结构化短输出,关闭 thinking 并限制 tokens,降低延迟与发散。
|
||||
resp, err := modelInst.Generate(ctx, messages,
|
||||
ark.WithThinking(&arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}),
|
||||
einoModel.WithTemperature(0.2),
|
||||
einoModel.WithMaxTokens(40),
|
||||
)
|
||||
resp, err := modelInst.GenerateText(ctx, messages, llmservice.GenerateOptions{
|
||||
Temperature: 0.2,
|
||||
MaxTokens: 40,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
})
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
@@ -267,26 +265,26 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
|
||||
|
||||
// 2.1 标题链路的 token 从模型响应 usage 中提取;缺失则按 0 处理,不影响主流程。
|
||||
titleTokens := 0
|
||||
if resp.ResponseMeta != nil && resp.ResponseMeta.Usage != nil {
|
||||
if resp.Usage != nil {
|
||||
titleTokens = normalizeUsageTotal(
|
||||
resp.ResponseMeta.Usage.TotalTokens,
|
||||
resp.ResponseMeta.Usage.PromptTokens,
|
||||
resp.ResponseMeta.Usage.CompletionTokens,
|
||||
resp.Usage.TotalTokens,
|
||||
resp.Usage.PromptTokens,
|
||||
resp.Usage.CompletionTokens,
|
||||
)
|
||||
}
|
||||
return normalizeConversationTitle(resp.Content), titleTokens, nil
|
||||
return normalizeConversationTitle(resp.Text), titleTokens, nil
|
||||
}
|
||||
|
||||
// pickTitleModel 选择用于标题生成的模型。
|
||||
// 优先 Lite(成本低、速度快);Lite 不可用时回退 Pro。
|
||||
func (s *AgentService) pickTitleModel() *ark.ChatModel {
|
||||
if s.AIHub == nil {
|
||||
func (s *AgentService) pickTitleModel() *llmservice.Client {
|
||||
if s == nil || s.llmService == nil {
|
||||
return nil
|
||||
}
|
||||
if s.AIHub.Lite != nil {
|
||||
return s.AIHub.Lite
|
||||
if client := s.llmService.LiteClient(); client != nil {
|
||||
return client
|
||||
}
|
||||
return s.AIHub.Pro
|
||||
return s.llmService.ProClient()
|
||||
}
|
||||
|
||||
// buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv"
|
||||
newagentgraph "github.com/LoveLosita/smartflow/backend/newAgent/graph"
|
||||
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
|
||||
@@ -57,6 +56,11 @@ func (s *AgentService) runNewAgentGraph(
|
||||
errChan chan error,
|
||||
) {
|
||||
requestCtx, _ := withRequestTokenMeter(ctx)
|
||||
if s == nil || s.llmService == nil {
|
||||
// 0. newAgent 主链强依赖 llm-service;装配漏传时直接返回错误,避免 nil receiver panic。
|
||||
pushErrNonBlocking(errChan, errors.New("agent llm service is not initialized"))
|
||||
return
|
||||
}
|
||||
|
||||
// 1. 规范会话 ID 和模型选择。
|
||||
chatID = normalizeConversationID(chatID)
|
||||
@@ -184,14 +188,15 @@ func (s *AgentService) runNewAgentGraph(
|
||||
}
|
||||
graphRequest.Normalize()
|
||||
|
||||
// 8. 适配 LLM clients(从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client)。
|
||||
// 8. 适配 LLM clients(统一从 llm-service 取出 newAgent 图所需模型,不再直接碰 AIHub)。
|
||||
// 8.1 Chat/Deliver 使用 Pro 模型:路由分流、闲聊、交付总结属于标准复杂度。
|
||||
// 8.2 Plan/Execute 使用 Max 模型:规划和 ReAct 循环需要深度推理能力。
|
||||
chatClient := infrallm.WrapArkClient(s.AIHub.Pro)
|
||||
planClient := infrallm.WrapArkClient(s.AIHub.Max)
|
||||
executeClient := infrallm.WrapArkClient(s.AIHub.Max)
|
||||
deliverClient := infrallm.WrapArkClient(s.AIHub.Pro)
|
||||
summaryClient := infrallm.WrapArkClient(s.AIHub.Lite)
|
||||
llmClients := s.llmService.NewAgentModelClients()
|
||||
chatClient := llmClients.Chat
|
||||
planClient := llmClients.Plan
|
||||
executeClient := llmClients.Execute
|
||||
deliverClient := llmClients.Deliver
|
||||
summaryClient := llmClients.Summary
|
||||
|
||||
// 9. 适配 SSE emitter。
|
||||
sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
|
||||
@@ -244,8 +249,8 @@ func (s *AgentService) runNewAgentGraph(
|
||||
log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr)
|
||||
pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr))
|
||||
|
||||
// Graph 出错时回退普通聊天,保证可用性。回退使用 Pro 模型。
|
||||
s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
|
||||
// Graph 出错时回退普通聊天,保证可用性。回退使用 llm-service 的 Pro 模型。
|
||||
s.runNormalChatFlow(requestCtx, chatClient, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -6,20 +6,18 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/google/uuid"
|
||||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||||
)
|
||||
|
||||
// streamChatFallback 是 graph 执行失败时的降级流式聊天。
|
||||
// 内联了旧 agentchat.StreamChat 的核心逻辑,不再依赖 agent/ 包。
|
||||
func (s *AgentService) streamChatFallback(
|
||||
ctx context.Context,
|
||||
llm *ark.ChatModel,
|
||||
llm *llmservice.Client,
|
||||
modelName string,
|
||||
userInput string,
|
||||
ifThinking bool,
|
||||
@@ -36,13 +34,6 @@ func (s *AgentService) streamChatFallback(
|
||||
}
|
||||
messages = append(messages, schema.UserMessage(userInput))
|
||||
|
||||
var thinking *ark.Thinking
|
||||
if ifThinking {
|
||||
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
|
||||
} else {
|
||||
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(modelName) == "" {
|
||||
modelName = "smartflow-worker"
|
||||
}
|
||||
@@ -50,7 +41,11 @@ func (s *AgentService) streamChatFallback(
|
||||
created := time.Now().Unix()
|
||||
firstChunk := true
|
||||
chunkEmitter := newagentstream.NewChunkEmitter(newagentstream.NewSSEPayloadEmitter(outChan), requestID, modelName, created)
|
||||
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(infrallm.WrapArkClient(s.AIHub.Lite)))
|
||||
reasoningSummaryClient := s.llmService.LiteClient()
|
||||
if reasoningSummaryClient == nil {
|
||||
reasoningSummaryClient = s.llmService.ProClient()
|
||||
}
|
||||
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(reasoningSummaryClient))
|
||||
chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) {
|
||||
s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra)
|
||||
})
|
||||
@@ -75,7 +70,14 @@ func (s *AgentService) streamChatFallback(
|
||||
}
|
||||
var reasoningEndAt *time.Time
|
||||
|
||||
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
|
||||
thinkingMode := llmservice.ThinkingModeDisabled
|
||||
if ifThinking {
|
||||
thinkingMode = llmservice.ThinkingModeEnabled
|
||||
}
|
||||
|
||||
reader, err := llm.Stream(ctx, messages, llmservice.GenerateOptions{
|
||||
Thinking: thinkingMode,
|
||||
})
|
||||
if err != nil {
|
||||
return "", "", 0, nil, err
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
|
||||
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
const reasoningSummaryMaxTokens = 700
|
||||
@@ -24,7 +24,7 @@ type reasoningSummaryLLMResponse struct {
|
||||
// 1. service 层负责选择模型与 prompt,stream 层只负责调度和闸门;
|
||||
// 2. 这里不持久化摘要,持久化统一走 ChunkEmitter 的 extra hook;
|
||||
// 3. 摘要失败时返回 error,由 ReasoningDigestor 吞掉并等待下一次水位线/Flush 兜底。
|
||||
func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagentstream.ReasoningSummaryFunc {
|
||||
func (s *AgentService) makeReasoningSummaryFunc(client *llmservice.Client) newagentstream.ReasoningSummaryFunc {
|
||||
if client == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -47,14 +47,14 @@ func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagen
|
||||
DurationSeconds: input.DurationSeconds,
|
||||
})
|
||||
|
||||
resp, rawResult, err := infrallm.GenerateJSON[reasoningSummaryLLMResponse](
|
||||
resp, rawResult, err := llmservice.GenerateJSON[reasoningSummaryLLMResponse](
|
||||
ctx,
|
||||
client,
|
||||
messages,
|
||||
infrallm.GenerateOptions{
|
||||
llmservice.GenerateOptions{
|
||||
Temperature: 0.1,
|
||||
MaxTokens: reasoningSummaryMaxTokens,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
Metadata: map[string]any{
|
||||
"stage": "reasoning_summary",
|
||||
"candidate_seq": input.CandidateSeq,
|
||||
@@ -99,7 +99,7 @@ func limitReasoningDetailSummary(text string, maxRunes int) string {
|
||||
return string(runes[:maxRunes])
|
||||
}
|
||||
|
||||
func truncateReasoningSummaryRaw(raw *infrallm.TextResult) string {
|
||||
func truncateReasoningSummaryRaw(raw *llmservice.TextResult) string {
|
||||
if raw == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -6,16 +6,16 @@ import (
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/conv"
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
"github.com/LoveLosita/smartflow/backend/respond"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
type CourseService struct {
|
||||
// 伸出手:准备接住 DAO
|
||||
courseDAO *dao.CourseDAO
|
||||
scheduleDAO *dao.ScheduleDAO
|
||||
courseImageResponsesClient *infrallm.ArkResponsesClient
|
||||
courseImageResponsesClient *llmservice.ArkResponsesClient
|
||||
courseImageConfig CourseImageParseConfig
|
||||
courseImageModel string
|
||||
}
|
||||
@@ -24,7 +24,7 @@ type CourseService struct {
|
||||
func NewCourseService(
|
||||
courseDAO *dao.CourseDAO,
|
||||
scheduleDAO *dao.ScheduleDAO,
|
||||
courseImageResponsesClient *infrallm.ArkResponsesClient,
|
||||
courseImageResponsesClient *llmservice.ArkResponsesClient,
|
||||
courseImageConfig CourseImageParseConfig,
|
||||
courseImageModel string,
|
||||
) *CourseService {
|
||||
|
||||
@@ -8,16 +8,20 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
|
||||
)
|
||||
|
||||
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
|
||||
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
|
||||
if ss == nil || ss.courseImageResponsesClient == nil {
|
||||
modelName := ""
|
||||
if ss != nil {
|
||||
modelName = ss.courseImageModel
|
||||
}
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
|
||||
ss.courseImageModel,
|
||||
modelName,
|
||||
req.Filename,
|
||||
req.MIMEType,
|
||||
len(req.ImageBytes),
|
||||
@@ -57,7 +61,7 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
|
||||
base64Chars,
|
||||
promptChars,
|
||||
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
|
||||
infrallm.ThinkingModeDisabled,
|
||||
llmservice.ThinkingModeDisabled,
|
||||
courseImageParseTemperature,
|
||||
ss.courseImageConfig.MaxTokens,
|
||||
"json_object",
|
||||
@@ -66,10 +70,10 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
|
||||
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens,避免被默认值截断。
|
||||
// 2. text_format 固定为 json_object,降低输出混入解释文本导致解析失败的概率。
|
||||
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。
|
||||
draft, rawResult, err := infrallm.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, infrallm.ArkResponsesOptions{
|
||||
draft, rawResult, err := llmservice.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, llmservice.ArkResponsesOptions{
|
||||
Temperature: courseImageParseTemperature,
|
||||
MaxOutputTokens: ss.courseImageConfig.MaxTokens,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
Thinking: llmservice.ThinkingModeDisabled,
|
||||
TextFormat: "json_object",
|
||||
})
|
||||
if err != nil {
|
||||
@@ -188,12 +192,12 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
|
||||
return normalizedDraft, nil
|
||||
}
|
||||
|
||||
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]infrallm.ArkResponsesMessage, int, int) {
|
||||
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]llmservice.ArkResponsesMessage, int, int) {
|
||||
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
|
||||
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
|
||||
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
|
||||
|
||||
messages := []infrallm.ArkResponsesMessage{
|
||||
messages := []llmservice.ArkResponsesMessage{
|
||||
{
|
||||
Role: "system",
|
||||
Text: strings.TrimSpace(courseImageParseSystemPrompt),
|
||||
@@ -208,7 +212,7 @@ func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest)
|
||||
return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
|
||||
}
|
||||
|
||||
func isCourseImageOutputTruncated(rawResult *infrallm.ArkResponsesResult) bool {
|
||||
func isCourseImageOutputTruncated(rawResult *llmservice.ArkResponsesResult) bool {
|
||||
if rawResult == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
// 过渡期统一 Ark 调用封装。
|
||||
//
|
||||
// 这里保留 CallArkText / CallArkJSON,方便暂时还直接持有 *ark.ChatModel 的调用点
|
||||
// 逐步迁移到统一 Client。后续 memory 也可以直接复用这套中立层。
|
||||
package llm
|
||||
|
||||
import (
|
||||
@@ -15,12 +11,7 @@ import (
|
||||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||||
)
|
||||
|
||||
// ArkCallOptions 是基于 ark.ChatModel 的通用调用选项。
|
||||
//
|
||||
// 设计目的:
|
||||
// 1. 先把 Ark 调用样板抽成公共层;
|
||||
// 2. 再由 WrapArkClient 提供统一 Client;
|
||||
// 3. 让上层尽量只关注业务 prompt 和结构化结果。
|
||||
// ArkCallOptions 是直接调用 ark.ChatModel 时使用的通用入参。
|
||||
type ArkCallOptions struct {
|
||||
Temperature float64
|
||||
MaxTokens int
|
||||
@@ -28,12 +19,6 @@ type ArkCallOptions struct {
|
||||
}
|
||||
|
||||
// CallArkText 调用 ark 模型并返回纯文本。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责拼 system + user 两段消息;
|
||||
// 2. 负责统一配置 thinking / temperature / maxTokens;
|
||||
// 3. 负责拦截空响应;
|
||||
// 4. 不负责 JSON 解析,不负责业务字段校验。
|
||||
func CallArkText(ctx context.Context, chatModel *ark.ChatModel, systemPrompt, userPrompt string, options ArkCallOptions) (string, error) {
|
||||
if chatModel == nil {
|
||||
return "", errors.New("ark model is nil")
|
||||
@@ -76,6 +61,7 @@ func buildArkOptions(options ArkCallOptions) []einoModel.Option {
|
||||
if options.Thinking == ThinkingModeEnabled {
|
||||
thinkingType = arkModel.ThinkingTypeEnabled
|
||||
}
|
||||
|
||||
opts := []einoModel.Option{
|
||||
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
|
||||
einoModel.WithTemperature(float32(options.Temperature)),
|
||||
@@ -12,17 +12,14 @@ import (
|
||||
)
|
||||
|
||||
// WrapArkClient 将 ark.ChatModel 适配为统一 Client。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. generateText:调用 ark.ChatModel.Generate(非流式),供 GenerateJSON 使用;
|
||||
// 2. streamText:调用 ark.ChatModel.Stream(流式),供需要流式输出的场景使用;
|
||||
// 3. 两者共用同一套 options 转换。
|
||||
// 1. generateText 走 Generate,供 GenerateJSON/GenerateText 使用。
|
||||
// 2. streamText 走 Stream,供需要流式输出的场景使用。
|
||||
// 3. 两条路径共用同一套参数转换逻辑。
|
||||
func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
|
||||
if arkChatModel == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 非流式文本生成,供 GenerateJSON / GenerateText 调用路径使用。
|
||||
generateFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
|
||||
arkOpts := buildArkStreamOptions(options)
|
||||
msg, err := arkChatModel.Generate(ctx, messages, arkOpts...)
|
||||
@@ -47,7 +44,6 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
|
||||
}, nil
|
||||
}
|
||||
|
||||
// 流式文本生成。
|
||||
streamFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
|
||||
arkOpts := buildArkStreamOptions(options)
|
||||
reader, err := arkChatModel.Stream(ctx, messages, arkOpts...)
|
||||
@@ -60,11 +56,10 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
|
||||
return NewClient(generateFunc, streamFunc)
|
||||
}
|
||||
|
||||
// buildArkStreamOptions 将统一 GenerateOptions 转换为 ark 的流式调用选项。
|
||||
// buildArkStreamOptions 将统一的 GenerateOptions 转换为 ark 的流式调用参数。
|
||||
func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
|
||||
thinkingEnabled := options.Thinking == ThinkingModeEnabled
|
||||
|
||||
// Thinking
|
||||
thinkingType := arkModel.ThinkingTypeDisabled
|
||||
if thinkingEnabled {
|
||||
thinkingType = arkModel.ThinkingTypeEnabled
|
||||
@@ -73,16 +68,12 @@ func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
|
||||
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
|
||||
}
|
||||
|
||||
// Temperature:thinking 模型强制要求 temperature=1,否则 API 静默忽略 thinking。
|
||||
if thinkingEnabled {
|
||||
opts = append(opts, einoModel.WithTemperature(1.0))
|
||||
} else if options.Temperature > 0 {
|
||||
opts = append(opts, einoModel.WithTemperature(float32(options.Temperature)))
|
||||
}
|
||||
|
||||
// MaxTokens:thinking 模式下 thinking token 占用 max_tokens 预算,
|
||||
// 调用方设定的值仅代表"期望输出长度",实际预算需留出思考空间。
|
||||
// 最低保障 16000,避免思考链被截断导致输出为空或非 JSON。
|
||||
maxTokens := options.MaxTokens
|
||||
if thinkingEnabled {
|
||||
const minThinkingBudget = 16000
|
||||
@@ -97,14 +88,12 @@ func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
|
||||
return opts
|
||||
}
|
||||
|
||||
// arkStreamReaderAdapter 适配 ark.ChatModel.Stream 返回的 reader。
|
||||
// ark.Stream 返回 schema.StreamReader[*schema.Message],其 Close() 方法无返回值
|
||||
// 而我们的 StreamReader 接口要求 Close() error
|
||||
// arkStreamReaderAdapter 把 ark 的流式 reader 转成统一的 StreamReader 接口。
|
||||
type arkStreamReaderAdapter struct {
|
||||
reader *schema.StreamReader[*schema.Message]
|
||||
}
|
||||
|
||||
// Recv 转发到 ark reader 的 Recv 方法。
|
||||
// Recv 转发到底层 reader。
|
||||
func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
|
||||
if r == nil || r.reader == nil {
|
||||
return nil, io.EOF
|
||||
@@ -112,8 +101,7 @@ func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
|
||||
return r.reader.Recv()
|
||||
}
|
||||
|
||||
// Close 转发到 ark reader 的 Close 方法。
|
||||
// ark 的 Close() 无返回值,我们适配为返回 nil
|
||||
// Close 适配 ark reader 的 Close 行为。
|
||||
func (r *arkStreamReaderAdapter) Close() error {
|
||||
if r == nil || r.reader == nil {
|
||||
return nil
|
||||
@@ -11,11 +11,6 @@ import (
|
||||
)
|
||||
|
||||
// ArkResponsesMessage 描述一次 Responses 输入消息。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责表达角色与多模态内容(文本/图片);
|
||||
// 2. 不负责业务 prompt 生成;
|
||||
// 3. 不负责输出 JSON 的字段校验。
|
||||
type ArkResponsesMessage struct {
|
||||
Role string
|
||||
Text string
|
||||
@@ -23,7 +18,7 @@ type ArkResponsesMessage struct {
|
||||
ImageDetail string
|
||||
}
|
||||
|
||||
// ArkResponsesOptions 描述 Responses 生成选项。
|
||||
// ArkResponsesOptions 描述 Responses 调用参数。
|
||||
type ArkResponsesOptions struct {
|
||||
Model string
|
||||
Temperature float64
|
||||
@@ -32,14 +27,14 @@ type ArkResponsesOptions struct {
|
||||
TextFormat string
|
||||
}
|
||||
|
||||
// ArkResponsesUsage 统一透传 token 使用量。
|
||||
// ArkResponsesUsage 统一转写 token usage。
|
||||
type ArkResponsesUsage struct {
|
||||
InputTokens int64
|
||||
OutputTokens int64
|
||||
TotalTokens int64
|
||||
}
|
||||
|
||||
// ArkResponsesResult 是 Ark Responses 的统一输出结构。
|
||||
// ArkResponsesResult 是 Responses 调用的统一输出结构。
|
||||
type ArkResponsesResult struct {
|
||||
Text string
|
||||
Status string
|
||||
@@ -56,11 +51,9 @@ type ArkResponsesClient struct {
|
||||
}
|
||||
|
||||
// NewArkResponsesClient 创建 Ark SDK Responses 客户端。
|
||||
//
|
||||
// 说明:
|
||||
// 1. model 为空时返回 nil,表示当前能力未启用;
|
||||
// 2. baseURL 为空时使用 SDK 默认地址;
|
||||
// 3. 仅负责客户端创建,不做连通性探测。
|
||||
// 1. model 为空时直接返回 nil,表示这条能力没有启用。
|
||||
// 2. baseURL 为空时使用 SDK 默认地址。
|
||||
// 3. 这里只负责本地构造,不做连通性探测。
|
||||
func NewArkResponsesClient(apiKey string, baseURL string, model string) *ArkResponsesClient {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
@@ -104,7 +97,7 @@ func (c *ArkResponsesClient) GenerateText(ctx context.Context, messages []ArkRes
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// GenerateArkResponsesJSON 先调用 Responses,再解析为 JSON 结构体。
|
||||
// GenerateArkResponsesJSON 先调用 Responses,再解析成 JSON 结构体。
|
||||
func GenerateArkResponsesJSON[T any](ctx context.Context, client *ArkResponsesClient, messages []ArkResponsesMessage, options ArkResponsesOptions) (*T, *ArkResponsesResult, error) {
|
||||
if client == nil {
|
||||
return nil, nil, errors.New("ark responses client is not ready")
|
||||
@@ -9,12 +9,7 @@ import (
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
// ThinkingMode 描述本次模型调用对 thinking 的期望。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 这里只表达“调用方希望怎样配置推理模式”;
|
||||
// 2. 不直接绑定某个具体模型厂商的参数枚举;
|
||||
// 3. 真正如何把它翻译成 ark / OpenAI / 其他 provider 的 option,由后续适配层负责。
|
||||
// ThinkingMode 描述这次模型调用对 thinking 的期望。
|
||||
type ThinkingMode string
|
||||
|
||||
const (
|
||||
@@ -23,12 +18,7 @@ const (
|
||||
ThinkingModeDisabled ThinkingMode = "disabled"
|
||||
)
|
||||
|
||||
// GenerateOptions 是统一模型调用选项。
|
||||
//
|
||||
// 设计目的:
|
||||
// 1. 先把“每个 skill / worker 都会反复传的参数”收敛成一份结构;
|
||||
// 2. 让上层以后只表达“我要什么”,不再自己重复组织 option;
|
||||
// 3. 暂时不追求覆盖所有 provider 参数,先把最常用的几个公共位抽出来。
|
||||
// GenerateOptions 统一收敛文本调用时最常见的公共参数。
|
||||
type GenerateOptions struct {
|
||||
Temperature float64
|
||||
MaxTokens int
|
||||
@@ -36,40 +26,32 @@ type GenerateOptions struct {
|
||||
Metadata map[string]any
|
||||
}
|
||||
|
||||
// TextResult 是统一文本生成结果。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. Text 保存模型最终返回的纯文本;
|
||||
// 2. Usage 保存本次调用的 token 使用量,供后续统一统计;
|
||||
// 3. 不负责 JSON 解析,不负责业务字段映射。
|
||||
// TextResult 保存一次文本生成的最终结果和 usage。
|
||||
// 1. Text 存放模型返回的纯文本。
|
||||
// 2. Usage 方便上层做统一统计。
|
||||
// 3. 这里不负责 JSON 解析,也不负责业务字段映射。
|
||||
type TextResult struct {
|
||||
Text string
|
||||
Usage *schema.TokenUsage
|
||||
// FinishReason 透传 provider 的停止原因,便于上层判断是否因 length 等原因被截断。
|
||||
FinishReason string
|
||||
}
|
||||
|
||||
// StreamReader 抽象了“可逐块 Recv 的流式返回器”。
|
||||
//
|
||||
// 之所以不直接依赖某个具体 SDK 的 reader 类型,是因为现在还处在骨架收敛阶段,
|
||||
// 后续接 ark、OpenAI 兼容层还是别的 provider,都可以往这个最小接口上适配。
|
||||
// StreamReader 抽象可以逐块读取消息的流式返回器。
|
||||
type StreamReader interface {
|
||||
Recv() (*schema.Message, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
// TextGenerateFunc 是文本生成的统一适配函数签名。
|
||||
// TextGenerateFunc 定义统一文本生成函数签名。
|
||||
type TextGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error)
|
||||
|
||||
// StreamGenerateFunc 是流式生成的统一适配函数签名。
|
||||
// StreamGenerateFunc 定义统一流式生成函数签名。
|
||||
type StreamGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error)
|
||||
|
||||
// Client 是统一模型客户端门面。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责把调用方的“模型调用意图”收敛到统一入口;
|
||||
// 2. 负责统一参数校验、空响应防御、GenerateJSON 复用;
|
||||
// 3. 不负责写 prompt,不负责业务 fallback,也不直接持有具体厂商 SDK 细节。
|
||||
// 1. 只做最小输入校验和空响应防御。
|
||||
// 2. 不负责 prompt 拼装,也不负责业务 fallback。
|
||||
// 3. 具体 provider 的细节由上层适配器收敛进来。
|
||||
type Client struct {
|
||||
generateText TextGenerateFunc
|
||||
streamText StreamGenerateFunc
|
||||
@@ -84,11 +66,6 @@ func NewClient(generateText TextGenerateFunc, streamText StreamGenerateFunc) *Cl
|
||||
}
|
||||
|
||||
// GenerateText 执行一次统一文本生成。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责做最小必要的入参校验;
|
||||
// 2. 负责统一拦截“模型空响应”这类公共问题;
|
||||
// 3. 不负责业务 prompt 拼接,也不负责把文本再映射成业务结构。
|
||||
func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
|
||||
if c == nil || c.generateText == nil {
|
||||
return nil, errors.New("llm client is not ready")
|
||||
@@ -111,11 +88,6 @@ func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, o
|
||||
}
|
||||
|
||||
// GenerateJSON 先走统一文本生成,再走统一 JSON 解析。
|
||||
//
|
||||
// 设计说明:
|
||||
// 1. 把“Generate -> 提取 JSON -> 反序列化”这段公共链路收敛起来;
|
||||
// 2. 上层只关心业务结构,不需要重复实现解析样板;
|
||||
// 3. 返回 parsed + rawResult,方便打点与回退时保留原文。
|
||||
func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema.Message, options GenerateOptions) (*T, *TextResult, error) {
|
||||
result, err := client.GenerateText(ctx, messages, options)
|
||||
if err != nil {
|
||||
@@ -130,11 +102,6 @@ func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema
|
||||
}
|
||||
|
||||
// Stream 打开统一流式调用入口。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责把“流式生成能力”暴露给上层;
|
||||
// 2. 不负责 chunk 到 OpenAI 协议的转换,那部分应放在 stream/;
|
||||
// 3. 不负责累计全文,也不负责 token 统计落库。
|
||||
func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
|
||||
if c == nil || c.streamText == nil {
|
||||
return nil, errors.New("llm stream client is not ready")
|
||||
@@ -145,12 +112,7 @@ func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options
|
||||
return c.streamText(ctx, messages, options)
|
||||
}
|
||||
|
||||
// BuildSystemUserMessages 构造最常见的“system + history + user”消息列表。
|
||||
//
|
||||
// 设计说明:
|
||||
// 1. 先把最稳定的消息编排方式沉淀下来,减少各业务域样板代码;
|
||||
// 2. 只做消息切片装配,不做 prompt 生成;
|
||||
// 3. 供 agent / memory 等多个能力域复用。
|
||||
// BuildSystemUserMessages 构造最常见的 system + history + user 消息列表。
|
||||
func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, userPrompt string) []*schema.Message {
|
||||
messages := make([]*schema.Message, 0, len(history)+2)
|
||||
if strings.TrimSpace(systemPrompt) != "" {
|
||||
@@ -165,7 +127,7 @@ func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, use
|
||||
return messages
|
||||
}
|
||||
|
||||
// CloneUsage 深拷贝 token usage,避免后续多处累加时共享同一指针。
|
||||
// CloneUsage 深拷贝 token usage,避免后续累加时共享同一个指针。
|
||||
func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
|
||||
if usage == nil {
|
||||
return nil
|
||||
@@ -174,12 +136,7 @@ func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
|
||||
return &copied
|
||||
}
|
||||
|
||||
// MergeUsage 合并两段 usage。
|
||||
//
|
||||
// 合并策略:
|
||||
// 1. 对“同一次调用不同流分片”的场景,取更大值作为最终值;
|
||||
// 2. 对“多次独立调用累计”的场景,应由上层显式做加法,而不是用这个函数;
|
||||
// 3. 该函数只适用于“同一次调用的分块 usage 收敛”。
|
||||
// MergeUsage 合并两段 usage,取各字段更大的值作为累计结果。
|
||||
func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.TokenUsage {
|
||||
if incoming == nil {
|
||||
return CloneUsage(base)
|
||||
@@ -207,7 +164,7 @@ func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.To
|
||||
return &merged
|
||||
}
|
||||
|
||||
// FormatEmptyResponseError 统一生成“模型返回空结果”的错误文案。
|
||||
// FormatEmptyResponseError 统一模型空结果的错误文案。
|
||||
func FormatEmptyResponseError(scene string) error {
|
||||
scene = strings.TrimSpace(scene)
|
||||
if scene == "" {
|
||||
@@ -7,12 +7,10 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseJSONObject 解析模型返回中的 JSON 对象。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责处理“模型输出前后夹杂解释文字 / markdown 代码块”的常见情况;
|
||||
// 2. 负责提取最外层 JSON object 并反序列化为目标结构;
|
||||
// 3. 不负责业务字段合法性校验,应由上层调用方自行校验。
|
||||
// ParseJSONObject 解析模型返回内容中的 JSON 对象。
|
||||
// 1. 先剥离常见的 markdown 代码块包装。
|
||||
// 2. 再从混合文本里提取最外层 JSON 对象。
|
||||
// 3. 这里只负责结构解析,不负责字段合法性校验。
|
||||
func ParseJSONObject[T any](raw string) (*T, error) {
|
||||
clean := strings.TrimSpace(raw)
|
||||
if clean == "" {
|
||||
@@ -31,12 +29,7 @@ func ParseJSONObject[T any](raw string) (*T, error) {
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
// ExtractJSONObject 从混合文本里提取第一个完整 JSON 对象。
|
||||
//
|
||||
// 设计说明:
|
||||
// 1. LLM 很容易输出“这里是结果:{...}”这种半结构化文本;
|
||||
// 2. 这里用括号计数而不是正则,避免嵌套对象一多就误截断;
|
||||
// 3. 目前只提取 object,不提取 array,因为当前契约基本都是对象。
|
||||
// ExtractJSONObject 从混合文本中提取第一个完整的 JSON 对象。
|
||||
func ExtractJSONObject(text string) string {
|
||||
clean := trimMarkdownCodeFence(strings.TrimSpace(text))
|
||||
if clean == "" {
|
||||
@@ -94,9 +87,6 @@ func trimMarkdownCodeFence(text string) string {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
// 1. 去掉首行 ```json / ```;
|
||||
// 2. 若末行是 ```,一并去掉;
|
||||
// 3. 中间正文保持原样,避免破坏 JSON 的换行结构。
|
||||
body := lines[1:]
|
||||
if len(body) > 0 && strings.TrimSpace(body[len(body)-1]) == "```" {
|
||||
body = body[:len(body)-1]
|
||||
109
backend/services/llm/service.go
Normal file
109
backend/services/llm/service.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/inits"
|
||||
)
|
||||
|
||||
// Service 只负责统一暴露已经构造好的模型客户端,不负责 prompt 和业务编排。
|
||||
type Service struct {
|
||||
liteClient *Client
|
||||
proClient *Client
|
||||
maxClient *Client
|
||||
courseImageResponsesClient *ArkResponsesClient
|
||||
}
|
||||
|
||||
// Options 描述 llm-service 初始化时需要接管的启动期依赖。
|
||||
// 1. AIHub 仍然是当前进程内 Ark ChatModel 的来源,但服务层只保存统一 Client。
|
||||
// 2. CourseImageResponsesClient 允许外部预先注入,便于测试或特殊启动路径复用。
|
||||
// 3. 某个字段为空时不报错,直接保留 nil,交给上层继续走兼容降级。
|
||||
type Options struct {
|
||||
AIHub *inits.AIHub
|
||||
APIKey string
|
||||
BaseURL string
|
||||
CourseVisionModel string
|
||||
CourseImageResponsesClient *ArkResponsesClient
|
||||
}
|
||||
|
||||
// AgentModelClients 一次性暴露 newAgent 图常用的模型分配结果。
|
||||
type AgentModelClients struct {
|
||||
Chat *Client
|
||||
Plan *Client
|
||||
Execute *Client
|
||||
Deliver *Client
|
||||
Summary *Client
|
||||
}
|
||||
|
||||
// New 构造 llm-service。
|
||||
// 1. 不返回 error,是为了让上层继续按 nil 客户端做逐步降级。
|
||||
// 2. 只要 AIHub 已初始化,就把其中的 ChatModel 收敛成统一 Client。
|
||||
// 3. 课程图片解析客户端在这里统一构建,避免业务层直接依赖 Responses SDK。
|
||||
func New(opts Options) *Service {
|
||||
svc := &Service{}
|
||||
|
||||
if opts.AIHub != nil {
|
||||
svc.liteClient = WrapArkClient(opts.AIHub.Lite)
|
||||
svc.proClient = WrapArkClient(opts.AIHub.Pro)
|
||||
svc.maxClient = WrapArkClient(opts.AIHub.Max)
|
||||
}
|
||||
|
||||
if opts.CourseImageResponsesClient != nil {
|
||||
svc.courseImageResponsesClient = opts.CourseImageResponsesClient
|
||||
} else {
|
||||
apiKey := strings.TrimSpace(opts.APIKey)
|
||||
baseURL := strings.TrimSpace(opts.BaseURL)
|
||||
model := strings.TrimSpace(opts.CourseVisionModel)
|
||||
if apiKey != "" && model != "" {
|
||||
svc.courseImageResponsesClient = NewArkResponsesClient(apiKey, baseURL, model)
|
||||
}
|
||||
}
|
||||
|
||||
return svc
|
||||
}
|
||||
|
||||
// LiteClient 返回低成本短输出模型客户端。
|
||||
func (s *Service) LiteClient() *Client {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.liteClient
|
||||
}
|
||||
|
||||
// ProClient 返回默认复杂对话模型客户端。
|
||||
func (s *Service) ProClient() *Client {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.proClient
|
||||
}
|
||||
|
||||
// MaxClient 返回深度推理模型客户端。
|
||||
func (s *Service) MaxClient() *Client {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.maxClient
|
||||
}
|
||||
|
||||
// CourseImageResponsesClient 返回课程图片解析所用的 Responses 客户端。
|
||||
func (s *Service) CourseImageResponsesClient() *ArkResponsesClient {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.courseImageResponsesClient
|
||||
}
|
||||
|
||||
// NewAgentModelClients 一次性返回 newAgent 图里常用的模型分配。
|
||||
func (s *Service) NewAgentModelClients() AgentModelClients {
|
||||
if s == nil {
|
||||
return AgentModelClients{}
|
||||
}
|
||||
return AgentModelClients{
|
||||
Chat: s.ProClient(),
|
||||
Plan: s.MaxClient(),
|
||||
Execute: s.MaxClient(),
|
||||
Deliver: s.ProClient(),
|
||||
Summary: s.LiteClient(),
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Runtime 是 RAG Infra 对业务侧暴露的唯一稳定方法面。
|
||||
// Runtime 是 RAG service 对业务侧暴露的唯一稳定方法面。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责承接 memory/web 两类语料的统一入库与检索入口;
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// TextChunker 是默认文本切块器。
|
||||
@@ -21,7 +21,7 @@ const (
|
||||
// ObserveEvent 描述一次统一观测事件。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只承载 RAG Infra 的结构化运行信息;
|
||||
// 1. 只承载 RAG service 的结构化运行信息;
|
||||
// 2. 不绑定具体日志系统、指标系统或 tracing 实现;
|
||||
// 3. 字段内容应尽量稳定,便于后续统一接入全局观测平台。
|
||||
type ObserveEvent struct {
|
||||
@@ -31,7 +31,7 @@ type ObserveEvent struct {
|
||||
Fields map[string]any
|
||||
}
|
||||
|
||||
// Observer 是 RAG Infra 的最小观测接口。
|
||||
// Observer 是 RAG service 的最小观测接口。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责消费结构化事件;
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
const memoryCorpusName = "memory"
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
const webCorpusName = "web"
|
||||
@@ -7,12 +7,12 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
ragchunk "github.com/LoveLosita/smartflow/backend/infra/rag/chunk"
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
ragembed "github.com/LoveLosita/smartflow/backend/infra/rag/embed"
|
||||
ragrerank "github.com/LoveLosita/smartflow/backend/infra/rag/rerank"
|
||||
ragstore "github.com/LoveLosita/smartflow/backend/infra/rag/store"
|
||||
ragchunk "github.com/LoveLosita/smartflow/backend/services/rag/chunk"
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
ragembed "github.com/LoveLosita/smartflow/backend/services/rag/embed"
|
||||
ragrerank "github.com/LoveLosita/smartflow/backend/services/rag/rerank"
|
||||
ragstore "github.com/LoveLosita/smartflow/backend/services/rag/store"
|
||||
)
|
||||
|
||||
// FactoryDeps 描述 Runtime 工厂所需的可选依赖。
|
||||
@@ -3,7 +3,7 @@ package rag
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// ObserveLevel 对外暴露统一观测等级别名,避免启动层直接依赖 core 细节。
|
||||
@@ -1,11 +1,11 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/chunk"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/embed"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/rerank"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/store"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/chunk"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/embed"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/rerank"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/store"
|
||||
)
|
||||
|
||||
// NewDefaultPipeline 构造默认可运行的 RAG Pipeline。
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// EinoReranker 是 Eino 重排器占位实现。
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"sort"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// NoopReranker 是默认重排器(仅按原 score 排序)。
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// VectorRetriever 是通用检索器(embed + vector search)。
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/corpus"
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/corpus"
|
||||
)
|
||||
|
||||
type runtime struct {
|
||||
@@ -343,7 +343,7 @@ func (r *runtime) recoverPublicPanic(
|
||||
return
|
||||
}
|
||||
|
||||
// 1. runtime 是 RAG Infra 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。
|
||||
// 1. runtime 是 RAG service 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。
|
||||
// 2. 这里统一把 panic 转成 error,并补一条结构化观测,方便继续排查是哪一层依赖失控。
|
||||
// 3. 保留 stack 是为了在“进程不崩”的前提下仍能定位根因,避免只剩一句 recovered 无法复盘。
|
||||
panicErr := fmt.Errorf("rag runtime panic recovered: corpus=%s operation=%s panic=%v", corpusName, operation, recovered)
|
||||
111
backend/services/rag/service.go
Normal file
111
backend/services/rag/service.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
|
||||
)
|
||||
|
||||
// Options 描述 rag-service 需要持有的底层运行时。
|
||||
type Options struct {
|
||||
Runtime Runtime
|
||||
}
|
||||
|
||||
// Service 是 rag-service 对外暴露的统一入口。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责持有运行时,并把 memory / web 两条能力线统一收口到服务层。
|
||||
// 2. 负责在服务入口内完成基于配置的运行时装配。
|
||||
// 3. 不直接承载 chunk / embed / store 的实现细节,这些细节下沉到服务树内部子包。
|
||||
type Service struct {
|
||||
runtime Runtime
|
||||
}
|
||||
|
||||
// New 使用调用方传入的运行时构造服务。
|
||||
func New(opts Options) *Service {
|
||||
return &Service{runtime: opts.Runtime}
|
||||
}
|
||||
|
||||
// NewFromConfig 基于服务树内的配置与工厂能力构造自给自足的 RAG 服务。
|
||||
func NewFromConfig(ctx context.Context, cfg ragconfig.Config, deps FactoryDeps) (*Service, error) {
|
||||
if !cfg.Enabled {
|
||||
return New(Options{}), nil
|
||||
}
|
||||
runtime, err := NewRuntimeFromConfig(ctx, cfg, deps)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewWithRuntime(runtime), nil
|
||||
}
|
||||
|
||||
// Runtime 返回当前服务持有的运行时。
|
||||
func (s *Service) Runtime() Runtime {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.runtime
|
||||
}
|
||||
|
||||
// IngestMemory 写入记忆语料。
|
||||
func (s *Service) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error) {
|
||||
if s == nil || s.runtime == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return s.runtime.IngestMemory(ctx, req)
|
||||
}
|
||||
|
||||
// RetrieveMemory 检索记忆语料。
|
||||
func (s *Service) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error) {
|
||||
if s == nil || s.runtime == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return s.runtime.RetrieveMemory(ctx, req)
|
||||
}
|
||||
|
||||
// DeleteMemory 删除指定记忆文档。
|
||||
func (s *Service) DeleteMemory(ctx context.Context, documentIDs []string) error {
|
||||
if s == nil || s.runtime == nil {
|
||||
return nil
|
||||
}
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
return s.runtime.DeleteMemory(ctx, documentIDs)
|
||||
}
|
||||
|
||||
// IngestWeb 写入网页语料。
|
||||
func (s *Service) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error) {
|
||||
if s == nil || s.runtime == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return s.runtime.IngestWeb(ctx, req)
|
||||
}
|
||||
|
||||
// RetrieveWeb 检索网页语料。
|
||||
func (s *Service) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error) {
|
||||
if s == nil || s.runtime == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return s.runtime.RetrieveWeb(ctx, req)
|
||||
}
|
||||
|
||||
// EnsureRuntime 返回一个可继续向下传递的运行时引用。
|
||||
func (s *Service) EnsureRuntime() Runtime {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.runtime
|
||||
}
|
||||
|
||||
// SetRuntime 允许在装配阶段延迟注入运行时。
|
||||
func (s *Service) SetRuntime(runtime Runtime) {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
s.runtime = runtime
|
||||
}
|
||||
|
||||
// NewWithRuntime 用显式运行时构造服务。
|
||||
func NewWithRuntime(runtime Runtime) *Service {
|
||||
return New(Options{Runtime: runtime})
|
||||
}
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// InMemoryVectorStore 是本地开发用向量存储实现。
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
"github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
)
|
||||
|
||||
// MilvusConfig 描述 Milvus REST 存储配置。
|
||||
@@ -1,6 +1,6 @@
|
||||
package store
|
||||
|
||||
import "github.com/LoveLosita/smartflow/backend/infra/rag/core"
|
||||
import "github.com/LoveLosita/smartflow/backend/services/rag/core"
|
||||
|
||||
// EnsureCompile 用于静态校验实现是否满足接口。
|
||||
func EnsureCompile() {
|
||||
@@ -36,6 +36,8 @@
|
||||
4. 消费侧已经按服务 consumer group 隔离,不再用一个 worker 吃全部事件。
|
||||
5. 当前仍是单体进程内多 worker 装配;worker 后续会跟随对应服务一起迁出,不在阶段 1 直接拆进程。
|
||||
|
||||
阶段 1.5 / 1.6 也已经先落地完毕:`backend/services/llm` 和 `backend/services/rag` 已经成为当前 canonical 入口,`backend/infra/llm` 和 `backend/infra/rag` 的 `.go` 旧实现已删除,仅保留迁移说明文档。当前仍然是单体进程内多 worker 装配,llm / rag 先完成服务化收口,还没有进入 gozero 进程拆分。
|
||||
|
||||
所以后续路线不是再补一次 outbox 基建,而是在这个阶段 1 基线上,按服务边界逐个把 gozero 服务、DAO / model / worker 和启动入口迁出去。
|
||||
|
||||
---
|
||||
@@ -78,6 +80,8 @@ gozero 服务负责领域能力:
|
||||
> 说明:`agent` 和 `memory` 都可以单独成服务,不应再被写成“公共能力”;其中 `agent` 更像对外对话编排服务,`memory` 更像其支撑服务/worker 服务。
|
||||
>
|
||||
> 说明:`llm-service` 先抽成全仓统一模型出口,`rag-service` 再抽成检索基础设施服务;`rag-service` 只能依赖 `llm-service`,不反向依赖具体业务服务。
|
||||
>
|
||||
> 当前状态:`llm-service` / `rag-service` 这两个边界已经先做成 `backend/services/*` 的服务内模块,调用仍由 `backend/cmd/start.go` 在同一进程内装配,不是 gozero 独立进程。
|
||||
|
||||
### 3.3 事件层
|
||||
|
||||
@@ -118,8 +122,8 @@ gozero 服务负责领域能力:
|
||||
| --- | --- | --- | --- |
|
||||
| 0 | 语义冻结和基线确认(已完成) | 阶段 0 已作为历史基线保存;后续只在契约变化时回看 | `go test ./...`,`api / worker / all` 启动 smoke |
|
||||
| 1 | Outbox v2 基建(已完成,当前基线) | 当前已具备阶段 1 保存点:服务级 outbox 表、topic、group 和多 worker 装配已打通 | 已完成健康检查、服务级 outbox 写入/投递/消费 smoke、Kafka group lag 核对 |
|
||||
| 1.5 | 先抽 llm-service | 统一模型调用、provider 路由、流式输出和审计后 commit | course / active-scheduler / memory 模型调用 smoke |
|
||||
| 1.6 | 再抽 rag-service | 向量化、召回、重排、检索能力跑通后 commit | memory retrieve / rerank smoke |
|
||||
| 1.5 | 先抽 llm-service(已完成) | 已完成,`backend/services/llm` 作为当前 canonical 入口 | `go test ./...` + course / active-scheduler / memory 模型调用 smoke |
|
||||
| 1.6 | 再抽 rag-service(已完成) | 已完成,`backend/services/rag` 作为当前 canonical 入口 | `go test ./...` + memory retrieve / rerank smoke |
|
||||
| 2 | 先拆 user/auth | user 路由、JWT 签发和 token 额度治理独立后 commit | 注册/登录/刷新/登出 smoke + token quota 回归 |
|
||||
| 3 | 再拆 notification | notification 服务能独立消费和重试后 commit | notification E2E smoke + worker-only smoke |
|
||||
| 4 | 再拆 active-scheduler | 预览生成和确认链路通过 gozero 服务跑通后 commit | dry-run / preview / confirm smoke |
|
||||
@@ -229,7 +233,7 @@ flowchart LR
|
||||
|
||||
---
|
||||
|
||||
### 4.4 阶段 1.5:先抽 llm-service
|
||||
### 4.4 阶段 1.5:先抽 llm-service(已完成)
|
||||
|
||||
目标:
|
||||
|
||||
@@ -237,6 +241,12 @@ flowchart LR
|
||||
2. 让 `course`、`active-scheduler`、`memory`、`agent` 对模型调用的依赖先收口到统一服务。
|
||||
3. 先把模型 provider 路由、流式输出、限流、审计这些共性收束起来,避免每个服务各写一份。
|
||||
|
||||
当前状态:
|
||||
|
||||
1. 代码已经落到 `backend/services/llm`。
|
||||
2. `backend/infra/llm` 的 `.go` 旧实现已删除,仅保留迁移说明。
|
||||
3. 仍由 `backend/cmd/start.go` 在同一进程内装配,尚未引入 gozero 独立服务进程。
|
||||
|
||||
这一步要做的事:
|
||||
|
||||
1. 把当前分散在业务服务里的模型调用入口改成统一调用 `llm-service`。
|
||||
@@ -260,7 +270,7 @@ flowchart LR
|
||||
|
||||
---
|
||||
|
||||
### 4.5 阶段 1.6:再抽 rag-service
|
||||
### 4.5 阶段 1.6:再抽 rag-service(已完成)
|
||||
|
||||
目标:
|
||||
|
||||
@@ -268,6 +278,12 @@ flowchart LR
|
||||
2. 让向量化、召回、重排、向量库读写先进入独立服务。
|
||||
3. 明确 `rag-service` 只能依赖 `llm-service` 做 embedding / rerank,不反向依赖业务服务。
|
||||
|
||||
当前状态:
|
||||
|
||||
1. 代码已经落到 `backend/services/rag`。
|
||||
2. `backend/infra/rag` 的 `.go` 旧实现已删除,仅保留迁移说明。
|
||||
3. 仍由 `backend/cmd/start.go` 在同一进程内装配,尚未引入 gozero 独立服务进程。
|
||||
|
||||
这一步要做的事:
|
||||
|
||||
1. 把当前分散在 `memory`、`agent` 里的检索逻辑改成统一调用 `rag-service`。
|
||||
@@ -474,6 +490,8 @@ flowchart LR
|
||||
|
||||
当前建议按这个顺序推进:
|
||||
|
||||
注:阶段 1.5 / 1.6 已完成,当前实际推进可从阶段 2 开始。
|
||||
|
||||
1. 以阶段 1 的服务级 outbox 为当前基线,不再回头做共享 outbox 方案。
|
||||
2. 先切 llm-service,把统一模型出口从各业务服务里抽出去。
|
||||
3. 再切 rag-service,把检索基础设施从 memory / agent 里抽出去。
|
||||
|
||||
Reference in New Issue
Block a user