Version: 0.9.65.dev.260503

后端:
1. 阶段 1.5/1.6
收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现;
2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
Losita
2026-05-03 23:21:03 +08:00
parent a6c1e5d077
commit 9902ca3563
65 changed files with 550 additions and 376 deletions

View File

@@ -11,7 +11,7 @@ import (
"github.com/LoveLosita/smartflow/backend/active_scheduler/ports"
"github.com/LoveLosita/smartflow/backend/active_scheduler/trigger"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const locateMaxTokens = 800
@@ -24,7 +24,7 @@ const locateMaxTokens = 800
// 3. 不创建新工具系统,也不直接产出 preview。
type Service struct {
reader ports.ScheduleReader
client *infrallm.Client
client *llmservice.Client
clock func() time.Time
logger *log.Logger
}
@@ -34,7 +34,7 @@ type Service struct {
// 说明:
// 1. reader / client 允许为空,方便在模型不可用或读模型暂时不可用时直接回退 ask_user。
// 2. 真正的定位能力只在 Resolve 内部按需启用。
func NewService(reader ports.ScheduleReader, client *infrallm.Client) *Service {
func NewService(reader ports.ScheduleReader, client *llmservice.Client) *Service {
return &Service{
reader: reader,
client: client,
@@ -101,15 +101,15 @@ func (s *Service) Resolve(ctx context.Context, req Request) (Result, error) {
return s.buildAskUserResult(req, "定位 prompt 构造失败"), nil
}
messages := infrallm.BuildSystemUserMessages(strings.TrimSpace(locateSystemPrompt), nil, userPrompt)
resp, rawResult, err := infrallm.GenerateJSON[llmResponse](
messages := llmservice.BuildSystemUserMessages(strings.TrimSpace(locateSystemPrompt), nil, userPrompt)
resp, rawResult, err := llmservice.GenerateJSON[llmResponse](
ctx,
s.client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.1,
MaxTokens: locateMaxTokens,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{
"stage": "active_scheduler_feedback_locate",
"candidate_count": len(candidates),
@@ -340,7 +340,7 @@ func cloneAndTrimStrings(values []string) []string {
return result
}
func truncateRaw(raw *infrallm.TextResult) string {
func truncateRaw(raw *llmservice.TextResult) string {
if raw == nil {
return ""
}

View File

@@ -10,7 +10,7 @@ import (
"time"
"github.com/LoveLosita/smartflow/backend/active_scheduler/candidate"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const selectionMaxTokens = 1200
@@ -22,7 +22,7 @@ const selectionMaxTokens = 1200
// 2. LLM 失败、输出非法或选择不存在候选时,回退到后端 fallback candidate
// 3. 不写 preview、不发通知、不修改正式日程。
type Service struct {
client *infrallm.Client
client *llmservice.Client
clock func() time.Time
logger *log.Logger
}
@@ -33,7 +33,7 @@ type Service struct {
// 1. client 允许为空;为空时选择器只走确定性 fallback便于本地测试和降级
// 2. 真正的模型接入在 cmd/start.go 中完成aiHub.Pro -> llm.Client -> selection.Service
// 3. 选择器本身不持有模型配置,只表达本业务域的 prompt 和结果校验。
func NewService(client *infrallm.Client) *Service {
func NewService(client *llmservice.Client) *Service {
return &Service{
client: client,
clock: time.Now,
@@ -70,19 +70,19 @@ func (s *Service) Select(ctx context.Context, req SelectRequest) (Result, error)
return buildFallbackResult(req, "选择器 prompt 构造失败: "+err.Error()), nil
}
messages := infrallm.BuildSystemUserMessages(
messages := llmservice.BuildSystemUserMessages(
strings.TrimSpace(selectionSystemPrompt),
nil,
userPrompt,
)
resp, rawResult, err := infrallm.GenerateJSON[llmSelectionResponse](
resp, rawResult, err := llmservice.GenerateJSON[llmSelectionResponse](
ctx,
s.client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.1,
MaxTokens: selectionMaxTokens,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{
"stage": "active_scheduler_select",
"candidate_count": len(req.Candidates),
@@ -275,7 +275,7 @@ func firstNonEmpty(values ...string) string {
return ""
}
func truncateRaw(raw *infrallm.TextResult) string {
func truncateRaw(raw *llmservice.TextResult) string {
if raw == nil {
return ""
}

View File

@@ -23,10 +23,7 @@ import (
"github.com/LoveLosita/smartflow/backend/api"
"github.com/LoveLosita/smartflow/backend/dao"
kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
"github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/memory"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
@@ -44,6 +41,9 @@ import (
"github.com/LoveLosita/smartflow/backend/service"
agentsvcsvc "github.com/LoveLosita/smartflow/backend/service/agentsvc"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/go-redis/redis/v8"
"github.com/spf13/viper"
"gorm.io/gorm"
@@ -167,17 +167,25 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
return nil, fmt.Errorf("failed to initialize Eino: %w", err)
}
ragRuntime, err := buildRAGRuntime(ctx)
llmService := llmservice.New(llmservice.Options{
AIHub: aiHub,
APIKey: os.Getenv("ARK_API_KEY"),
BaseURL: viper.GetString("agent.baseURL"),
CourseVisionModel: viper.GetString("courseImport.visionModel"),
})
ragService, err := buildRAGService(ctx)
if err != nil {
return nil, err
}
ragRuntime := ragService.Runtime()
memoryCfg := memory.LoadConfigFromViper()
memoryObserver := memoryobserve.NewLoggerObserver(log.Default())
memoryMetrics := memoryobserve.NewMetricsRegistry()
memoryModule := memory.NewModuleWithObserve(
db,
infrallm.WrapArkClient(aiHub.Pro),
llmService.ProClient(),
ragRuntime,
memoryCfg,
memory.ObserveDeps{
@@ -208,11 +216,11 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
userService := service.NewUserService(userRepo, cacheRepo)
taskSv := service.NewTaskService(taskRepo, cacheRepo, eventBus)
taskSv.SetActiveScheduleDAO(manager.ActiveSchedule)
courseService := buildCourseService(courseRepo, scheduleRepo)
courseService := buildCourseService(llmService, courseRepo, scheduleRepo)
taskClassService := service.NewTaskClassService(taskClassRepo, cacheRepo, scheduleRepo, manager)
scheduleService := service.NewScheduleService(scheduleRepo, userRepo, taskClassRepo, manager, cacheRepo)
agentService := service.NewAgentServiceWithSchedule(
aiHub,
llmService,
agentRepo,
taskRepo,
cacheRepo,
@@ -251,7 +259,7 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
}
// 1. 主动调度选择器单独复用 Pro 模型LLM 失败时由 selection 层显式回退到确定性候选;
// 2. dry-run 与 selection 通过 graph runner 串起来,避免 trigger_pipeline 再拼第二套候选逻辑。
activeScheduleLLMClient := infrallm.WrapArkClient(aiHub.Pro)
activeScheduleLLMClient := llmService.ProClient()
activeScheduleSelector := activesel.NewService(activeScheduleLLMClient)
activeScheduleFeedbackLocator := activefeedbacklocate.NewService(activeReaders, activeScheduleLLMClient)
activeScheduleGraphRunner, err := activegraph.NewRunner(activeScheduleDryRun.AsGraphDryRunFunc(), activeScheduleSelector)
@@ -323,26 +331,26 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) {
return runtime, nil
}
func buildRAGRuntime(ctx context.Context) (infrarag.Runtime, error) {
func buildRAGService(ctx context.Context) (*ragservice.Service, error) {
ragCfg := ragconfig.LoadFromViper()
if !ragCfg.Enabled {
log.Println("RAG runtime is disabled")
return nil, nil
log.Println("RAG service is disabled")
return ragservice.New(ragservice.Options{}), nil
}
// 1. 当前项目尚未完成全局观测平台建设,这里先注入一层轻量 Observer
// 2. RAG 内部只依赖 Observer 接口,后续若全项目统一日志/指标系统,只需替换这里;
// 3. 这样可以避免 RAG 单独自建一套割裂的日志基础设施。
ragLogger := log.Default()
ragRuntime, err := infrarag.NewRuntimeFromConfig(ctx, ragCfg, infrarag.FactoryDeps{
ragService, err := ragservice.NewFromConfig(ctx, ragCfg, ragservice.FactoryDeps{
Logger: ragLogger,
Observer: infrarag.NewLoggerObserver(ragLogger),
Observer: ragservice.NewLoggerObserver(ragLogger),
})
if err != nil {
return nil, fmt.Errorf("failed to initialize RAG runtime: %w", err)
return nil, fmt.Errorf("failed to initialize RAG service: %w", err)
}
log.Printf("RAG runtime initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
return ragRuntime, nil
log.Printf("RAG service initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
return ragService, nil
}
func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, error) {
@@ -369,12 +377,8 @@ func buildEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, erro
return eventBus, nil
}
func buildCourseService(courseRepo *dao.CourseDAO, scheduleRepo *dao.ScheduleDAO) *service.CourseService {
courseImageResponsesClient := infrallm.NewArkResponsesClient(
os.Getenv("ARK_API_KEY"),
viper.GetString("agent.baseURL"),
viper.GetString("courseImport.visionModel"),
)
func buildCourseService(llmService *llmservice.Service, courseRepo *dao.CourseDAO, scheduleRepo *dao.ScheduleDAO) *service.CourseService {
courseImageResponsesClient := llmService.CourseImageResponsesClient()
return service.NewCourseService(
courseRepo,
scheduleRepo,
@@ -650,7 +654,7 @@ func containsString(values []string, target string) bool {
func configureAgentService(
agentService *service.AgentService,
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
agentRepo *dao.AgentDAO,
cacheRepo *dao.CacheDAO,
taskRepo *dao.TaskDAO,

View File

@@ -5,8 +5,6 @@ import (
"errors"
"log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorycleanup "github.com/LoveLosita/smartflow/backend/memory/cleanup"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
@@ -16,6 +14,8 @@ import (
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker"
"github.com/LoveLosita/smartflow/backend/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm"
)
@@ -28,8 +28,8 @@ import (
type Module struct {
db *gorm.DB
cfg memorymodel.Config
llmClient *infrallm.Client
ragRuntime infrarag.Runtime
llmClient *llmservice.Client
ragRuntime ragservice.Runtime
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
@@ -64,15 +64,15 @@ func LoadConfigFromViper() memorymodel.Config {
// 2. llmClient 允许为 nil此时写入链路会自动回退到本地 fallback 抽取;
// 3. ragRuntime 允许为 nil此时读取/向量同步自动回退旧逻辑;
// 4. 若后续接入统一 DI 容器,也应优先注册这个 Module而不是把内部 repo/service 继续向外泄漏。
func NewModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module {
func NewModule(db *gorm.DB, llmClient *llmservice.Client, ragRuntime ragservice.Runtime, cfg memorymodel.Config) *Module {
return NewModuleWithObserve(db, llmClient, ragRuntime, cfg, ObserveDeps{})
}
// NewModuleWithObserve 创建带观测依赖的 memory 模块门面。
func NewModuleWithObserve(
db *gorm.DB,
llmClient *infrallm.Client,
ragRuntime infrarag.Runtime,
llmClient *llmservice.Client,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
deps ObserveDeps,
) *Module {
@@ -228,8 +228,8 @@ func (m *Module) StartWorker(ctx context.Context) {
func wireModule(
db *gorm.DB,
llmClient *infrallm.Client,
ragRuntime infrarag.Runtime,
llmClient *llmservice.Client,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
deps ObserveDeps,
) *Module {

View File

@@ -6,8 +6,8 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const defaultDecisionCompareMaxTokens = 600
@@ -19,13 +19,13 @@ const defaultDecisionCompareMaxTokens = 600
// 2. LLM 只输出 relation关系类型不输出 action不输出 target ID
// 3. LLM 调用失败时返回 error由上层决定是否视为 unrelated。
type LLMDecisionOrchestrator struct {
client *infrallm.Client
client *llmservice.Client
cfg memorymodel.Config
logger *log.Logger
}
// NewLLMDecisionOrchestrator 构造决策比对编排器。
func NewLLMDecisionOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
func NewLLMDecisionOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMDecisionOrchestrator {
return &LLMDecisionOrchestrator{
client: client,
cfg: cfg,
@@ -52,14 +52,14 @@ func (o *LLMDecisionOrchestrator) Compare(
systemPrompt := buildDecisionCompareSystemPrompt()
userPrompt := buildDecisionCompareUserPrompt(fact, candidate)
messages := infrallm.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
messages := llmservice.BuildSystemUserMessages(systemPrompt, nil, userPrompt)
// 2. 调用 LLM 做结构化输出,温度用低值保证判断稳定。
resp, _, err := infrallm.GenerateJSON[decisionCompareResponse](
resp, _, err := llmservice.GenerateJSON[decisionCompareResponse](
ctx,
o.client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.1,
MaxTokens: defaultDecisionCompareMaxTokens,
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -127,9 +127,9 @@ func buildDecisionCompareUserPrompt(fact memorymodel.NormalizedFact, candidate m
}
// resolveMemoryThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
func resolveMemoryThinkingMode(enabled bool) infrallm.ThinkingMode {
func resolveMemoryThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled {
return infrallm.ThinkingModeEnabled
return llmservice.ThinkingModeEnabled
}
return infrallm.ThinkingModeDisabled
return llmservice.ThinkingModeDisabled
}

View File

@@ -7,9 +7,9 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -24,13 +24,13 @@ const (
// 2. 不负责落库,不负责任务状态机推进;
// 3. 当 LLM 不可用或输出异常时,回退到保守的本地抽取,保证链路不完全断。
type LLMWriteOrchestrator struct {
client *infrallm.Client
client *llmservice.Client
cfg memorymodel.Config
logger *log.Logger
}
// NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。
func NewLLMWriteOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
func NewLLMWriteOrchestrator(client *llmservice.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
return &LLMWriteOrchestrator{
client: client,
cfg: cfg,
@@ -54,17 +54,17 @@ func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorym
return fallbackNormalizedFacts(payload), nil
}
messages := infrallm.BuildSystemUserMessages(
messages := llmservice.BuildSystemUserMessages(
buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt),
nil,
buildMemoryExtractUserPrompt(payload),
)
resp, rawResult, err := infrallm.GenerateJSON[memoryExtractResponse](
resp, rawResult, err := llmservice.GenerateJSON[memoryExtractResponse](
ctx,
o.client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: clampTemperature(o.cfg.LLMTemperature),
MaxTokens: defaultMemoryExtractMaxTokens,
Thinking: resolveMemoryThinkingMode(o.cfg.LLMThinking),
@@ -319,7 +319,7 @@ func isSkipIntent(intent string) bool {
}
}
func truncateForLog(raw *infrallm.TextResult) string {
func truncateForLog(raw *llmservice.TextResult) string {
if raw == nil {
return ""
}

View File

@@ -3,8 +3,8 @@ package service
import (
"time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
// buildReadScopedItemQuery 构造读侧统一使用的 MySQL 查询条件。
@@ -53,8 +53,8 @@ func buildReadScopedRAGRequest(
req memorymodel.RetrieveRequest,
topK int,
threshold float64,
) infrarag.MemoryRetrieveRequest {
return infrarag.MemoryRetrieveRequest{
) ragservice.MemoryRetrieveRequest {
return ragservice.MemoryRetrieveRequest{
Query: req.Query,
TopK: topK,
Threshold: threshold,

View File

@@ -8,12 +8,12 @@ import (
"strings"
"time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
const (
@@ -30,7 +30,7 @@ const (
type ReadService struct {
itemRepo *memoryrepo.ItemRepo
settingsRepo *memoryrepo.SettingsRepo
ragRuntime infrarag.Runtime
ragRuntime ragservice.Runtime
cfg memorymodel.Config
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
@@ -57,7 +57,7 @@ type semanticRetrieveTelemetry struct {
func NewReadService(
itemRepo *memoryrepo.ItemRepo,
settingsRepo *memoryrepo.SettingsRepo,
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder,
@@ -347,7 +347,7 @@ func collectMemoryIDs(items []model.MemoryItem) []int64 {
return ids
}
func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) {
func buildMemoryDTOFromRetrieveHit(hit ragservice.RetrieveHit) (memorymodel.ItemDTO, int64) {
memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
metadata := hit.Metadata
content := strings.TrimSpace(hit.Text)

View File

@@ -6,10 +6,10 @@ import (
"log"
"strings"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
// Syncer 负责 memory_items 与向量库之间的最小桥接。
@@ -19,7 +19,7 @@ import (
// 2. 不负责决定哪些记忆该写、该删、该恢复,这些决策仍由上游 service/worker/cleanup 控制;
// 3. 同步失败时只回写 vector_status 并打观测,不反向回滚业务事务,避免把在线链路拖成强依赖。
type Syncer struct {
ragRuntime infrarag.Runtime
ragRuntime ragservice.Runtime
itemRepo *memoryrepo.ItemRepo
observer memoryobserve.Observer
metrics memoryobserve.MetricsRecorder
@@ -27,7 +27,7 @@ type Syncer struct {
}
func NewSyncer(
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
itemRepo *memoryrepo.ItemRepo,
observer memoryobserve.Observer,
metrics memoryobserve.MetricsRecorder,
@@ -53,9 +53,9 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
return
}
requestItems := make([]infrarag.MemoryIngestItem, 0, len(items))
requestItems := make([]ragservice.MemoryIngestItem, 0, len(items))
for _, item := range items {
requestItems = append(requestItems, infrarag.MemoryIngestItem{
requestItems = append(requestItems, ragservice.MemoryIngestItem{
MemoryID: item.ID,
UserID: item.UserID,
ConversationID: strValue(item.ConversationID),
@@ -76,7 +76,7 @@ func (s *Syncer) Upsert(ctx context.Context, traceID string, items []model.Memor
result, err := s.ragRuntime.IngestMemory(memoryobserve.WithFields(ctx, map[string]any{
"trace_id": traceID,
}), infrarag.MemoryIngestRequest{
}), ragservice.MemoryIngestRequest{
TraceID: traceID,
Action: "add",
Items: requestItems,

View File

@@ -4,11 +4,11 @@ import (
"context"
"fmt"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm"
)
@@ -192,7 +192,7 @@ func (r *Runner) recallCandidates(
) candidateRecallResult {
// 1. 优先使用 Milvus 向量语义召回。
if r.ragRuntime != nil {
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{
retrieveResult, err := r.ragRuntime.RetrieveMemory(ctx, ragservice.MemoryRetrieveRequest{
Query: fact.Content,
TopK: r.cfg.DecisionCandidateTopK,
Threshold: r.cfg.DecisionCandidateMinScore,
@@ -235,7 +235,7 @@ func (r *Runner) recallCandidates(
// 1. 从 DocumentID格式 memory:{id})解析出 mysql_id
// 2. 从 metadata 提取 title 和 memory_type
// 3. 跳过无法解析 DocumentID 的结果。
func (r *Runner) buildCandidatesFromRAG(hits []infrarag.RetrieveHit) []memorymodel.CandidateSnapshot {
func (r *Runner) buildCandidatesFromRAG(hits []ragservice.RetrieveHit) []memorymodel.CandidateSnapshot {
candidates := make([]memorymodel.CandidateSnapshot, 0, len(hits))
for _, hit := range hits {
memoryID := parseMemoryID(hit.DocumentID)

View File

@@ -9,7 +9,6 @@ import (
"strings"
"time"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator"
@@ -17,6 +16,7 @@ import (
memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
memoryvectorsync "github.com/LoveLosita/smartflow/backend/memory/vectorsync"
"github.com/LoveLosita/smartflow/backend/model"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
"gorm.io/gorm"
)
@@ -41,7 +41,7 @@ type Runner struct {
auditRepo *memoryrepo.AuditRepo
settingsRepo *memoryrepo.SettingsRepo
extractor Extractor
ragRuntime infrarag.Runtime
ragRuntime ragservice.Runtime
logger *log.Logger
vectorSyncer *memoryvectorsync.Syncer
observer memoryobserve.Observer
@@ -63,7 +63,7 @@ func NewRunner(
auditRepo *memoryrepo.AuditRepo,
settingsRepo *memoryrepo.SettingsRepo,
extractor Extractor,
ragRuntime infrarag.Runtime,
ragRuntime ragservice.Runtime,
cfg memorymodel.Config,
decisionOrchestrator *memoryorchestrator.LLMDecisionOrchestrator,
vectorSyncer *memoryvectorsync.Syncer,

View File

@@ -5,10 +5,10 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
schedule "github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -71,10 +71,10 @@ type PersistVisibleMessageFunc func(ctx context.Context, state *CommonState, msg
// 2. Chat/Plan/Execute/Deliver 允许分别挂不同 client但也允许先复用同一个 client
// 3. ChunkEmitter 统一承接阶段提示、正文、工具事件、确认请求等 SSE 输出。
type AgentGraphDeps struct {
ChatClient *infrallm.Client
PlanClient *infrallm.Client
ExecuteClient *infrallm.Client
DeliverClient *infrallm.Client
ChatClient *llmservice.Client
PlanClient *llmservice.Client
ExecuteClient *llmservice.Client
DeliverClient *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
StateStore AgentStateStore
ToolRegistry *newagenttools.ToolRegistry
@@ -141,7 +141,7 @@ func (d *AgentGraphDeps) EnsureChunkEmitter() *newagentstream.ChunkEmitter {
}
// ResolveChatClient 返回 chat 阶段可用的模型客户端。
func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client {
func (d *AgentGraphDeps) ResolveChatClient() *llmservice.Client {
if d == nil {
return nil
}
@@ -154,7 +154,7 @@ func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client {
// 1. 优先使用显式注入的 PlanClient
// 2. 若未单独注入,则回退到 ChatClient
// 3. 这样在骨架期可先用一套 client 跑通,再按需拆分 strategist / worker。
func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client {
func (d *AgentGraphDeps) ResolvePlanClient() *llmservice.Client {
if d == nil {
return nil
}
@@ -165,7 +165,7 @@ func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client {
}
// ResolveExecuteClient 返回 execute 阶段可用的模型客户端。
func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client {
func (d *AgentGraphDeps) ResolveExecuteClient() *llmservice.Client {
if d == nil {
return nil
}
@@ -179,7 +179,7 @@ func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client {
}
// ResolveDeliverClient 返回 deliver 阶段可用的模型客户端。
func (d *AgentGraphDeps) ResolveDeliverClient() *infrallm.Client {
func (d *AgentGraphDeps) ResolveDeliverClient() *llmservice.Client {
if d == nil {
return nil
}

View File

@@ -11,11 +11,11 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -50,7 +50,7 @@ type ChatNodeInput struct {
UserInput string
ConfirmAction string
ResumeInteractionID string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -107,9 +107,9 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
})
logNodeLLMContext(chatStageName, "routing", flowState, messages)
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.7,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{
"stage": chatStageName,
"phase": "routing",
@@ -172,7 +172,7 @@ func isExecuteLoopClosedMarker(msg *schema.Message) bool {
// 3. 控制码解析超时或流异常结束 → fallback 到 plan。
func streamAndDispatch(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
parser *newagentrouter.StreamRouteParser,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
@@ -292,7 +292,7 @@ func resolveEffectiveThinking(mode string, route newagentmodel.ChatRoute, decisi
// 2. thinking=true关闭路由流发起第二次 thinking 流式调用。
func handleDirectReplyStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -309,7 +309,7 @@ func handleDirectReplyStream(
// handleThinkingReplyStream 处理需要思考的回复:关闭路由流 → 第二次 thinking 流式调用。
func handleThinkingReplyStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -327,10 +327,10 @@ func handleThinkingReplyStream(
StatusBlockID: chatStatusBlockID,
})
logNodeLLMContext(chatStageName, "direct_reply_thinking", flowState, deepMessages)
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
Temperature: 0.5,
MaxTokens: 2000,
Thinking: infrallm.ThinkingModeEnabled,
Thinking: llmservice.ThinkingModeEnabled,
Metadata: map[string]any{
"stage": chatStageName,
"phase": "direct_reply_thinking",
@@ -363,7 +363,7 @@ func handleThinkingReplyStream(
// handleDirectReplyContinueStream 处理无思考的闲聊:同一流续传。
func handleDirectReplyContinueStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -419,7 +419,7 @@ func handleDirectReplyContinueStream(
// 2. 推送轻量状态通知;
// 3. 设置流程状态,进入 Execute 或 RoughBuild。
func handleRouteExecuteStream(
reader infrallm.StreamReader,
reader llmservice.StreamReader,
emitter *newagentstream.ChunkEmitter,
flowState *newagentmodel.CommonState,
decision *newagentmodel.ChatRoutingDecision,
@@ -674,7 +674,7 @@ func isExplicitNoRefineAfterRoughBuildRequest(userInput string) bool {
// 4. 完整回复写入 history。
func handleDeepAnswerStream(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
input ChatNodeInput,
emitter *newagentstream.ChunkEmitter,
conversationContext *newagentmodel.ConversationContext,
@@ -685,9 +685,9 @@ func handleDeepAnswerStream(
_ = reader.Close()
// 2. 第二次流式调用。
thinkingOpt := infrallm.ThinkingModeDisabled
thinkingOpt := llmservice.ThinkingModeDisabled
if effectiveThinking {
thinkingOpt = infrallm.ThinkingModeEnabled
thinkingOpt = llmservice.ThinkingModeEnabled
}
deepMessages := newagentprompt.BuildDeepAnswerMessages(flowState, conversationContext, input.UserInput)
deepMessages = compactUnifiedMessagesIfNeeded(ctx, deepMessages, UnifiedCompactInput{
@@ -699,7 +699,7 @@ func handleDeepAnswerStream(
StatusBlockID: chatStatusBlockID,
})
logNodeLLMContext(chatStageName, "deep_answer", flowState, deepMessages)
deepReader, err := input.Client.Stream(ctx, deepMessages, infrallm.GenerateOptions{
deepReader, err := input.Client.Stream(ctx, deepMessages, llmservice.GenerateOptions{
Temperature: 0.5,
MaxTokens: 2000,
Thinking: thinkingOpt,
@@ -741,7 +741,7 @@ func handleDeepAnswerStream(
// handleRoutePlanStream 处理规划路由:推送状态确认 → 设 PhasePlanning。
func handleRoutePlanStream(
reader infrallm.StreamReader,
reader llmservice.StreamReader,
emitter *newagentstream.ChunkEmitter,
flowState *newagentmodel.CommonState,
effectiveThinking bool,

View File

@@ -9,10 +9,10 @@ import (
"github.com/cloudwego/eino/schema"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -31,7 +31,7 @@ const (
type DeliverNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
ThinkingEnabled bool // 是否开启 thinking由 config.yaml 的 agent.thinking.deliver 注入
CompactionStore newagentmodel.CompactionStore // 上下文压缩持久化
@@ -128,7 +128,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
// - streamedtrue 表示文本已通过 EmitStreamAssistantText 真流式推送到前端,调用方无需再伪流式。
func generateDeliverSummary(
ctx context.Context,
client *infrallm.Client,
client *llmservice.Client,
flowState *newagentmodel.CommonState,
conversationContext *newagentmodel.ConversationContext,
thinkingEnabled bool,
@@ -162,7 +162,7 @@ func generateDeliverSummary(
reader, err := client.Stream(
ctx,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.5,
MaxTokens: 800,
Thinking: resolveThinkingMode(thinkingEnabled),

View File

@@ -8,11 +8,11 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
)
@@ -38,7 +38,7 @@ func collectExecuteDecisionFromLLM(
reader, err := input.Client.Stream(
ctx,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 1.0,
MaxTokens: 131072,
Thinking: newagentshared.ResolveThinkingMode(input.ThinkingEnabled),
@@ -123,7 +123,7 @@ func collectExecuteDecisionFromLLM(
return nil, nil
}
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.ExecuteDecision](result.DecisionJSON)
if parseErr != nil {
log.Printf(
"[DEBUG] execute LLM JSON 解析失败 chat=%s round=%d json=%s raw=%s",

View File

@@ -5,12 +5,12 @@ import (
"fmt"
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const (
@@ -29,7 +29,7 @@ type ExecuteNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
UserInput string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
ResumeNode string
ToolRegistry *newagenttools.ToolRegistry

View File

@@ -10,11 +10,11 @@ import (
"github.com/google/uuid"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -34,7 +34,7 @@ type PlanNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
UserInput string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
ResumeNode string
AlwaysExecute bool // true 时计划生成后自动确认,不进入 confirm 节点
@@ -87,7 +87,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
reader, err := input.Client.Stream(
ctx,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.2,
// 显式设置上限,避免依赖框架默认值(默认 4096导致长决策被截断。
// 注意:当前模型接口 max_tokens 上限为 131072超过会 400。
@@ -149,7 +149,7 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
return fmt.Errorf("规划解析失败,原始输出=%s", result.RawBuffer)
}
decision, parseErr := infrallm.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
decision, parseErr := llmservice.ParseJSONObject[newagentmodel.PlanDecision](result.DecisionJSON)
if parseErr != nil {
return fmt.Errorf("规划决策 JSON 解析失败: %w (raw=%s)", parseErr, result.RawBuffer)
}
@@ -390,9 +390,9 @@ func buildPinnedPlanText(steps []newagentmodel.PlanStep) string {
// resolveThinkingMode 根据配置布尔值返回对应的 ThinkingMode。
// 供 plan / execute / deliver 节点统一使用。
func resolveThinkingMode(enabled bool) infrallm.ThinkingMode {
func resolveThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled {
return infrallm.ThinkingModeEnabled
return llmservice.ThinkingModeEnabled
}
return infrallm.ThinkingModeDisabled
return llmservice.ThinkingModeDisabled
}

View File

@@ -8,13 +8,13 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
taskmodel "github.com/LoveLosita/smartflow/backend/model"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentrouter "github.com/LoveLosita/smartflow/backend/newAgent/router"
newagentshared "github.com/LoveLosita/smartflow/backend/newAgent/shared"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -30,7 +30,7 @@ type QuickTaskNodeInput struct {
RuntimeState *newagentmodel.AgentRuntimeState
ConversationContext *newagentmodel.ConversationContext
UserInput string
Client *infrallm.Client
Client *llmservice.Client
ChunkEmitter *newagentstream.ChunkEmitter
QuickTaskDeps newagentmodel.QuickTaskDeps
PersistVisibleMessage newagentmodel.PersistVisibleMessageFunc
@@ -77,7 +77,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
messages := newagentprompt.BuildQuickTaskMessagesSimple(input.UserInput)
// 2. 真流式调用 LLM。
reader, err := input.Client.Stream(ctx, messages, infrallm.GenerateOptions{
reader, err := input.Client.Stream(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.3,
MaxTokens: 512,
})
@@ -130,7 +130,7 @@ func RunQuickTaskNode(ctx context.Context, input QuickTaskNodeInput) error {
// 解析 JSON。
log.Printf("[DEBUG] quick_task: LLM 原始决策 JSON chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
var parseErr error
decision, parseErr = infrallm.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
decision, parseErr = llmservice.ParseJSONObject[quickTaskDecision](result.DecisionJSON)
if parseErr != nil {
log.Printf("[DEBUG] quick_task: JSON 解析失败 chat=%s json=%s", flowState.ConversationID, result.DecisionJSON)
if result.RawBuffer != "" {

View File

@@ -6,11 +6,11 @@ import (
"fmt"
"log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/LoveLosita/smartflow/backend/pkg"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -22,7 +22,7 @@ import (
// 3. StageName 和 StatusBlockID 用于区分日志来源和 SSE 状态推送。
type UnifiedCompactInput struct {
// Client 用于调用 LLM 压缩 msg1/msg2。
Client *infrallm.Client
Client *llmservice.Client
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
CompactionStore newagentmodel.CompactionStore
// FlowState 提供 userID / chatID / roundUsed 等定位信息。

View File

@@ -4,7 +4,7 @@ import (
"context"
"fmt"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -24,7 +24,7 @@ const compactMsg1SystemPrompt = `你是一个对话压缩助手。你的任务
// existingSummary 不为空时表示已有旧摘要,需要合并压缩。
func CompactMsg1(
ctx context.Context,
client *infrallm.Client,
client *llmservice.Client,
historyText string,
existingSummary string,
) (string, error) {
@@ -49,7 +49,7 @@ func CompactMsg1(
schema.UserMessage(userContent),
}
result, err := client.GenerateText(ctx, messages, infrallm.GenerateOptions{
result, err := client.GenerateText(ctx, messages, llmservice.GenerateOptions{
MaxTokens: 4000,
})
if err != nil {

View File

@@ -4,7 +4,7 @@ import (
"context"
"fmt"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -23,7 +23,7 @@ const compactMsg2SystemPrompt = `你是一个执行记录压缩助手。你的
// recentText 是保留的近期记录原文,不参与压缩。
func CompactMsg2(
ctx context.Context,
client *infrallm.Client,
client *llmservice.Client,
earlyLoopText string,
) (string, error) {
userContent := fmt.Sprintf(`早期的 ReAct 执行记录:
@@ -36,7 +36,7 @@ func CompactMsg2(
schema.UserMessage(userContent),
}
result, err := client.GenerateText(ctx, messages, infrallm.GenerateOptions{
result, err := client.GenerateText(ctx, messages, llmservice.GenerateOptions{
MaxTokens: 4000,
})
if err != nil {

View File

@@ -26,7 +26,7 @@ var (
// StreamDecisionResult 描述解析器的最终输出状态。
type StreamDecisionResult struct {
// DecisionJSON 是标签内提取的完整 JSON 字符串。
// 调用方应使用 infrallm.ParseJSONObject[T] 将其解析为具体决策类型。
// 调用方应使用 llmservice.ParseJSONObject[T] 将其解析为具体决策类型。
DecisionJSON string
// BeforeText 是 <SMARTFLOW_DECISION> 标签之前的自然语言前言。
@@ -179,7 +179,7 @@ func (p *StreamDecisionParser) Result() *StreamDecisionResult {
}
// extractJSONFromTag 从标签内文本中提取第一个完整 JSON 对象。
// 复用括号计数逻辑,与 infrallm.ExtractJSONObject 一致。
// 复用括号计数逻辑,与 llmservice.ExtractJSONObject 一致。
func extractJSONFromTag(text string) string {
clean := strings.TrimSpace(text)
if clean == "" {

View File

@@ -1,10 +1,10 @@
package newagentshared
import infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
import llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
func ResolveThinkingMode(enabled bool) infrallm.ThinkingMode {
func ResolveThinkingMode(enabled bool) llmservice.ThinkingMode {
if enabled {
return infrallm.ThinkingModeEnabled
return llmservice.ThinkingModeEnabled
}
return infrallm.ThinkingModeDisabled
return llmservice.ThinkingModeDisabled
}

View File

@@ -6,11 +6,11 @@ import (
"fmt"
"log"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/LoveLosita/smartflow/backend/pkg"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
)
@@ -22,7 +22,7 @@ import (
// 3. StageName 和 StatusBlockID 用于区分日志来源与 SSE 状态推送目标。
type UnifiedCompactInput struct {
// Client 用于调用 LLM 压缩 msg1/msg2。
Client *infrallm.Client
Client *llmservice.Client
// CompactionStore 用于持久化压缩摘要和 token 统计,为 nil 时跳过持久化。
CompactionStore newagentmodel.CompactionStore
// FlowState 提供 userID / conversationID / roundUsed 等定位信息。

View File

@@ -8,7 +8,7 @@ import (
"sync"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
// PayloadEmitter 是真正向外层 SSE 管道写 chunk 的最小接口。
@@ -540,7 +540,7 @@ func (e *ChunkEmitter) EmitDone() error {
// 3. 不负责打开/关闭 StreamReader调用方负责生命周期管理。
func (e *ChunkEmitter) EmitStreamAssistantText(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
blockID, stage string,
) (string, error) {
if e == nil || reader == nil {
@@ -598,7 +598,7 @@ func (e *ChunkEmitter) EmitStreamAssistantText(
// 用于只需展示思考过程而无需展示正文的场景。
func (e *ChunkEmitter) EmitStreamReasoningText(
ctx context.Context,
reader infrallm.StreamReader,
reader llmservice.StreamReader,
blockID, stage string,
) (string, error) {
if e == nil || reader == nil {

View File

@@ -5,9 +5,9 @@ import (
"sort"
"strings"
infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/schedule"
"github.com/LoveLosita/smartflow/backend/newAgent/tools/web"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
)
// ToolHandler 约定所有工具的统一执行签名。
@@ -32,7 +32,7 @@ type ToolSchemaEntry struct {
// 2. 某些依赖即便暂未使用也允许保留,避免业务层重新到处 new
// 3. 具体依赖缺失时由对应工具自行返回结构化失败结果。
type DefaultRegistryDeps struct {
RAGRuntime infrarag.Runtime
RAGRuntime ragservice.Runtime
// WebSearchProvider 为 nil 时web_search / web_fetch 仍会注册,
// 但 handler 会返回“暂未启用”的只读 observation不阻断主流程。

View File

@@ -3,8 +3,8 @@ package service
import (
"github.com/LoveLosita/smartflow/backend/dao"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/service/agentsvc"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
// AgentService 是 service 层对 agentsvc.AgentService 的兼容别名。
@@ -20,7 +20,7 @@ type AgentService = agentsvc.AgentService
// 2) 主动调度 session DAO 也在这里显式透传,避免聊天入口再去回查全局单例;
// 3) 真实构造逻辑已下沉到 service/agentsvc 包。
func NewAgentService(
aiHub *inits.AIHub,
llmService *llmservice.Service,
repo *dao.AgentDAO,
taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO,
@@ -29,7 +29,7 @@ func NewAgentService(
activeSessionDAO *dao.ActiveScheduleSessionDAO,
eventPublisher outboxinfra.EventPublisher,
) *AgentService {
return agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
return agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
}
// NewAgentServiceWithSchedule 在基础 AgentService 上注入排程依赖。
@@ -39,7 +39,7 @@ func NewAgentService(
// 2) 排程依赖为可选:未注入时排程路由自动回退到普通聊天;
// 3) 主动调度 session DAO 仍沿用统一构造注入,避免排程分支自己拼装仓储。
func NewAgentServiceWithSchedule(
aiHub *inits.AIHub,
llmService *llmservice.Service,
repo *dao.AgentDAO,
taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO,
@@ -50,7 +50,7 @@ func NewAgentServiceWithSchedule(
scheduleSvc *ScheduleService,
taskSvc *TaskService,
) *AgentService {
svc := agentsvc.NewAgentService(aiHub, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
svc := agentsvc.NewAgentService(llmService, repo, taskRepo, cacheDAO, agentRedis, activeScheduleDAO, activeSessionDAO, eventPublisher)
// 注入排程依赖:将 service 层方法包装为函数闭包,避免循环依赖。
if scheduleSvc != nil {

View File

@@ -3,6 +3,7 @@ package agentsvc
import (
"context"
"encoding/json"
"errors"
"log"
"strconv"
"strings"
@@ -11,7 +12,6 @@ import (
"github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/memory/observe"
"github.com/LoveLosita/smartflow/backend/model"
@@ -20,13 +20,13 @@ import (
newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
"github.com/LoveLosita/smartflow/backend/pkg"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
"github.com/cloudwego/eino-ext/components/model/ark"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
)
type AgentService struct {
AIHub *inits.AIHub
llmService *llmservice.Service
repo *dao.AgentDAO
taskRepo *dao.TaskDAO
cacheDAO *dao.CacheDAO
@@ -75,7 +75,7 @@ type AgentService struct {
// 这里通过依赖注入把“模型、仓储、缓存、异步持久化通道”统一交给服务层管理,
// 便于后续在单测中替换实现,或在启动流程中按环境切换配置。
func NewAgentService(
aiHub *inits.AIHub,
llmService *llmservice.Service,
repo *dao.AgentDAO,
taskRepo *dao.TaskDAO,
cacheDAO *dao.CacheDAO,
@@ -90,7 +90,7 @@ func NewAgentService(
ensureTokenMeterCallbackRegistered()
return &AgentService{
AIHub: aiHub,
llmService: llmService,
repo: repo,
taskRepo: taskRepo,
cacheDAO: cacheDAO,
@@ -123,8 +123,11 @@ func thinkingModeToBool(mode string) bool {
// 当前约定:
// - 旧链路已全面切到 newAgent graph这里仅作为 runNormalChatFlow 回退时的模型选择入口;
// - 统一返回 Pro 模型,旧 strategist 参数不再生效。
func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
return s.AIHub.Pro, "pro"
func (s *AgentService) pickChatModel(requestModel string) (*llmservice.Client, string) {
if s == nil || s.llmService == nil {
return nil, "pro"
}
return s.llmService.ProClient(), "pro"
}
// PersistChatHistory 是 Agent 聊天链路唯一的“消息持久化入口”。
@@ -304,7 +307,7 @@ func pushErrNonBlocking(errChan chan error, err error) {
// 2) 开启随口记进度推送后,最终判定“非随口记”时回落到普通聊天。
func (s *AgentService) runNormalChatFlow(
ctx context.Context,
selectedModel *ark.ChatModel,
selectedModel *llmservice.Client,
resolvedModelName string,
userMessage string,
userPersisted bool,
@@ -365,6 +368,12 @@ func (s *AgentService) runNormalChatFlow(
}
}
// 6.0. 没有可用模型时,直接中止普通聊天,避免写入半截用户消息后没有后续回复。
if selectedModel == nil {
pushErrNonBlocking(errChan, errors.New("llm client is not ready"))
return
}
// 6. 执行真正的流式聊天。
// fullText 用于后续写 Redis/持久化outChan 用于把流片段实时推给前端。
fullText, _, reasoningDurationSeconds, streamUsage, streamErr := s.streamChatFallback(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan, assistantReasoningStartedAt, userID, chatID)

View File

@@ -11,10 +11,8 @@ import (
"github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/respond"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
"github.com/cloudwego/eino-ext/components/model/ark"
einoModel "github.com/cloudwego/eino/components/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
const (
@@ -253,11 +251,11 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
}
// 2. 标题生成属于结构化短输出,关闭 thinking 并限制 tokens降低延迟与发散。
resp, err := modelInst.Generate(ctx, messages,
ark.WithThinking(&arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}),
einoModel.WithTemperature(0.2),
einoModel.WithMaxTokens(40),
)
resp, err := modelInst.GenerateText(ctx, messages, llmservice.GenerateOptions{
Temperature: 0.2,
MaxTokens: 40,
Thinking: llmservice.ThinkingModeDisabled,
})
if err != nil {
return "", 0, err
}
@@ -267,26 +265,26 @@ func (s *AgentService) generateConversationTitle(ctx context.Context, history []
// 2.1 标题链路的 token 从模型响应 usage 中提取;缺失则按 0 处理,不影响主流程。
titleTokens := 0
if resp.ResponseMeta != nil && resp.ResponseMeta.Usage != nil {
if resp.Usage != nil {
titleTokens = normalizeUsageTotal(
resp.ResponseMeta.Usage.TotalTokens,
resp.ResponseMeta.Usage.PromptTokens,
resp.ResponseMeta.Usage.CompletionTokens,
resp.Usage.TotalTokens,
resp.Usage.PromptTokens,
resp.Usage.CompletionTokens,
)
}
return normalizeConversationTitle(resp.Content), titleTokens, nil
return normalizeConversationTitle(resp.Text), titleTokens, nil
}
// pickTitleModel 选择用于标题生成的模型。
// 优先 Lite成本低、速度快Lite 不可用时回退 Pro。
func (s *AgentService) pickTitleModel() *ark.ChatModel {
if s.AIHub == nil {
func (s *AgentService) pickTitleModel() *llmservice.Client {
if s == nil || s.llmService == nil {
return nil
}
if s.AIHub.Lite != nil {
return s.AIHub.Lite
if client := s.llmService.LiteClient(); client != nil {
return client
}
return s.AIHub.Pro
return s.llmService.ProClient()
}
// buildConversationTitleUserPrompt 把消息历史拼成可读文本供模型总结。

View File

@@ -8,7 +8,6 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv"
newagentgraph "github.com/LoveLosita/smartflow/backend/newAgent/graph"
newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
@@ -57,6 +56,11 @@ func (s *AgentService) runNewAgentGraph(
errChan chan error,
) {
requestCtx, _ := withRequestTokenMeter(ctx)
if s == nil || s.llmService == nil {
// 0. newAgent 主链强依赖 llm-service装配漏传时直接返回错误避免 nil receiver panic。
pushErrNonBlocking(errChan, errors.New("agent llm service is not initialized"))
return
}
// 1. 规范会话 ID 和模型选择。
chatID = normalizeConversationID(chatID)
@@ -184,14 +188,15 @@ func (s *AgentService) runNewAgentGraph(
}
graphRequest.Normalize()
// 8. 适配 LLM clients从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client)。
// 8. 适配 LLM clients统一从 llm-service 取出 newAgent 图所需模型,不再直接碰 AIHub)。
// 8.1 Chat/Deliver 使用 Pro 模型:路由分流、闲聊、交付总结属于标准复杂度。
// 8.2 Plan/Execute 使用 Max 模型:规划和 ReAct 循环需要深度推理能力。
chatClient := infrallm.WrapArkClient(s.AIHub.Pro)
planClient := infrallm.WrapArkClient(s.AIHub.Max)
executeClient := infrallm.WrapArkClient(s.AIHub.Max)
deliverClient := infrallm.WrapArkClient(s.AIHub.Pro)
summaryClient := infrallm.WrapArkClient(s.AIHub.Lite)
llmClients := s.llmService.NewAgentModelClients()
chatClient := llmClients.Chat
planClient := llmClients.Plan
executeClient := llmClients.Execute
deliverClient := llmClients.Deliver
summaryClient := llmClients.Summary
// 9. 适配 SSE emitter。
sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
@@ -244,8 +249,8 @@ func (s *AgentService) runNewAgentGraph(
log.Printf("[ERROR] newAgent graph 执行失败 trace=%s chat=%s: %v", traceID, chatID, graphErr)
pushErrNonBlocking(errChan, fmt.Errorf("graph 执行失败: %w", graphErr))
// Graph 出错时回退普通聊天,保证可用性。回退使用 Pro 模型。
s.runNormalChatFlow(requestCtx, s.AIHub.Pro, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
// Graph 出错时回退普通聊天,保证可用性。回退使用 llm-service 的 Pro 模型。
s.runNormalChatFlow(requestCtx, chatClient, resolvedModelName, userMessage, true, "", nil, thinkingModeToBool(thinkingMode), userID, chatID, traceID, requestStart, outChan, errChan)
return
}

View File

@@ -6,20 +6,18 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
"github.com/cloudwego/eino-ext/components/model/ark"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
// streamChatFallback 是 graph 执行失败时的降级流式聊天。
// 内联了旧 agentchat.StreamChat 的核心逻辑,不再依赖 agent/ 包。
func (s *AgentService) streamChatFallback(
ctx context.Context,
llm *ark.ChatModel,
llm *llmservice.Client,
modelName string,
userInput string,
ifThinking bool,
@@ -36,13 +34,6 @@ func (s *AgentService) streamChatFallback(
}
messages = append(messages, schema.UserMessage(userInput))
var thinking *ark.Thinking
if ifThinking {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
} else {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
}
if strings.TrimSpace(modelName) == "" {
modelName = "smartflow-worker"
}
@@ -50,7 +41,11 @@ func (s *AgentService) streamChatFallback(
created := time.Now().Unix()
firstChunk := true
chunkEmitter := newagentstream.NewChunkEmitter(newagentstream.NewSSEPayloadEmitter(outChan), requestID, modelName, created)
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(infrallm.WrapArkClient(s.AIHub.Lite)))
reasoningSummaryClient := s.llmService.LiteClient()
if reasoningSummaryClient == nil {
reasoningSummaryClient = s.llmService.ProClient()
}
chunkEmitter.SetReasoningSummaryFunc(s.makeReasoningSummaryFunc(reasoningSummaryClient))
chunkEmitter.SetExtraEventHook(func(extra *newagentstream.OpenAIChunkExtra) {
s.persistNewAgentTimelineExtraEvent(context.Background(), userID, chatID, extra)
})
@@ -75,7 +70,14 @@ func (s *AgentService) streamChatFallback(
}
var reasoningEndAt *time.Time
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
thinkingMode := llmservice.ThinkingModeDisabled
if ifThinking {
thinkingMode = llmservice.ThinkingModeEnabled
}
reader, err := llm.Stream(ctx, messages, llmservice.GenerateOptions{
Thinking: thinkingMode,
})
if err != nil {
return "", "", 0, nil, err
}

View File

@@ -6,9 +6,9 @@ import (
"log"
"strings"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
const reasoningSummaryMaxTokens = 700
@@ -24,7 +24,7 @@ type reasoningSummaryLLMResponse struct {
// 1. service 层负责选择模型与 promptstream 层只负责调度和闸门;
// 2. 这里不持久化摘要,持久化统一走 ChunkEmitter 的 extra hook
// 3. 摘要失败时返回 error由 ReasoningDigestor 吞掉并等待下一次水位线/Flush 兜底。
func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagentstream.ReasoningSummaryFunc {
func (s *AgentService) makeReasoningSummaryFunc(client *llmservice.Client) newagentstream.ReasoningSummaryFunc {
if client == nil {
return nil
}
@@ -47,14 +47,14 @@ func (s *AgentService) makeReasoningSummaryFunc(client *infrallm.Client) newagen
DurationSeconds: input.DurationSeconds,
})
resp, rawResult, err := infrallm.GenerateJSON[reasoningSummaryLLMResponse](
resp, rawResult, err := llmservice.GenerateJSON[reasoningSummaryLLMResponse](
ctx,
client,
messages,
infrallm.GenerateOptions{
llmservice.GenerateOptions{
Temperature: 0.1,
MaxTokens: reasoningSummaryMaxTokens,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
Metadata: map[string]any{
"stage": "reasoning_summary",
"candidate_seq": input.CandidateSeq,
@@ -99,7 +99,7 @@ func limitReasoningDetailSummary(text string, maxRunes int) string {
return string(runes[:maxRunes])
}
func truncateReasoningSummaryRaw(raw *infrallm.TextResult) string {
func truncateReasoningSummaryRaw(raw *llmservice.TextResult) string {
if raw == nil {
return ""
}

View File

@@ -6,16 +6,16 @@ import (
"github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
"github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/respond"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
type CourseService struct {
// 伸出手:准备接住 DAO
courseDAO *dao.CourseDAO
scheduleDAO *dao.ScheduleDAO
courseImageResponsesClient *infrallm.ArkResponsesClient
courseImageResponsesClient *llmservice.ArkResponsesClient
courseImageConfig CourseImageParseConfig
courseImageModel string
}
@@ -24,7 +24,7 @@ type CourseService struct {
func NewCourseService(
courseDAO *dao.CourseDAO,
scheduleDAO *dao.ScheduleDAO,
courseImageResponsesClient *infrallm.ArkResponsesClient,
courseImageResponsesClient *llmservice.ArkResponsesClient,
courseImageConfig CourseImageParseConfig,
courseImageModel string,
) *CourseService {

View File

@@ -8,16 +8,20 @@ import (
"strings"
"time"
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
"github.com/LoveLosita/smartflow/backend/model"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
)
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
if ss == nil || ss.courseImageResponsesClient == nil {
modelName := ""
if ss != nil {
modelName = ss.courseImageModel
}
log.Printf(
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
ss.courseImageModel,
modelName,
req.Filename,
req.MIMEType,
len(req.ImageBytes),
@@ -57,7 +61,7 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
base64Chars,
promptChars,
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
infrallm.ThinkingModeDisabled,
llmservice.ThinkingModeDisabled,
courseImageParseTemperature,
ss.courseImageConfig.MaxTokens,
"json_object",
@@ -66,10 +70,10 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens避免被默认值截断。
// 2. text_format 固定为 json_object降低输出混入解释文本导致解析失败的概率。
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。
draft, rawResult, err := infrallm.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, infrallm.ArkResponsesOptions{
draft, rawResult, err := llmservice.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, llmservice.ArkResponsesOptions{
Temperature: courseImageParseTemperature,
MaxOutputTokens: ss.courseImageConfig.MaxTokens,
Thinking: infrallm.ThinkingModeDisabled,
Thinking: llmservice.ThinkingModeDisabled,
TextFormat: "json_object",
})
if err != nil {
@@ -188,12 +192,12 @@ func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.Co
return normalizedDraft, nil
}
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]infrallm.ArkResponsesMessage, int, int) {
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]llmservice.ArkResponsesMessage, int, int) {
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
messages := []infrallm.ArkResponsesMessage{
messages := []llmservice.ArkResponsesMessage{
{
Role: "system",
Text: strings.TrimSpace(courseImageParseSystemPrompt),
@@ -208,7 +212,7 @@ func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest)
return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
}
func isCourseImageOutputTruncated(rawResult *infrallm.ArkResponsesResult) bool {
func isCourseImageOutputTruncated(rawResult *llmservice.ArkResponsesResult) bool {
if rawResult == nil {
return false
}

View File

@@ -1,7 +1,3 @@
// 过渡期统一 Ark 调用封装。
//
// 这里保留 CallArkText / CallArkJSON方便暂时还直接持有 *ark.ChatModel 的调用点
// 逐步迁移到统一 Client。后续 memory 也可以直接复用这套中立层。
package llm
import (
@@ -15,12 +11,7 @@ import (
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
// ArkCallOptions 是基于 ark.ChatModel 的通用调用选项
//
// 设计目的:
// 1. 先把 Ark 调用样板抽成公共层;
// 2. 再由 WrapArkClient 提供统一 Client
// 3. 让上层尽量只关注业务 prompt 和结构化结果。
// ArkCallOptions 是直接调用 ark.ChatModel 时使用的通用入参
type ArkCallOptions struct {
Temperature float64
MaxTokens int
@@ -28,12 +19,6 @@ type ArkCallOptions struct {
}
// CallArkText 调用 ark 模型并返回纯文本。
//
// 职责边界:
// 1. 负责拼 system + user 两段消息;
// 2. 负责统一配置 thinking / temperature / maxTokens
// 3. 负责拦截空响应;
// 4. 不负责 JSON 解析,不负责业务字段校验。
func CallArkText(ctx context.Context, chatModel *ark.ChatModel, systemPrompt, userPrompt string, options ArkCallOptions) (string, error) {
if chatModel == nil {
return "", errors.New("ark model is nil")
@@ -76,6 +61,7 @@ func buildArkOptions(options ArkCallOptions) []einoModel.Option {
if options.Thinking == ThinkingModeEnabled {
thinkingType = arkModel.ThinkingTypeEnabled
}
opts := []einoModel.Option{
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
einoModel.WithTemperature(float32(options.Temperature)),

View File

@@ -12,17 +12,14 @@ import (
)
// WrapArkClient 将 ark.ChatModel 适配为统一 Client。
//
// 职责边界:
// 1. generateText调用 ark.ChatModel.Generate非流式供 GenerateJSON 使用;
// 2. streamText调用 ark.ChatModel.Stream流式供需要流式输出的场景使用
// 3. 两者共用同一套 options 转换。
// 1. generateText 走 Generate供 GenerateJSON/GenerateText 使用。
// 2. streamText 走 Stream供需要流式输出的场景使用。
// 3. 两条路径共用同一套参数转换逻辑。
func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
if arkChatModel == nil {
return nil
}
// 非流式文本生成,供 GenerateJSON / GenerateText 调用路径使用。
generateFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
arkOpts := buildArkStreamOptions(options)
msg, err := arkChatModel.Generate(ctx, messages, arkOpts...)
@@ -47,7 +44,6 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
}, nil
}
// 流式文本生成。
streamFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
arkOpts := buildArkStreamOptions(options)
reader, err := arkChatModel.Stream(ctx, messages, arkOpts...)
@@ -60,11 +56,10 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
return NewClient(generateFunc, streamFunc)
}
// buildArkStreamOptions 将统一 GenerateOptions 转换为 ark 的流式调用选项
// buildArkStreamOptions 将统一 GenerateOptions 转换为 ark 的流式调用参数
func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
thinkingEnabled := options.Thinking == ThinkingModeEnabled
// Thinking
thinkingType := arkModel.ThinkingTypeDisabled
if thinkingEnabled {
thinkingType = arkModel.ThinkingTypeEnabled
@@ -73,16 +68,12 @@ func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
}
// Temperaturethinking 模型强制要求 temperature=1否则 API 静默忽略 thinking。
if thinkingEnabled {
opts = append(opts, einoModel.WithTemperature(1.0))
} else if options.Temperature > 0 {
opts = append(opts, einoModel.WithTemperature(float32(options.Temperature)))
}
// MaxTokensthinking 模式下 thinking token 占用 max_tokens 预算,
// 调用方设定的值仅代表"期望输出长度",实际预算需留出思考空间。
// 最低保障 16000避免思考链被截断导致输出为空或非 JSON。
maxTokens := options.MaxTokens
if thinkingEnabled {
const minThinkingBudget = 16000
@@ -97,14 +88,12 @@ func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
return opts
}
// arkStreamReaderAdapter 适配 ark.ChatModel.Stream 返回的 reader。
// ark.Stream 返回 schema.StreamReader[*schema.Message],其 Close() 方法无返回值
// 而我们的 StreamReader 接口要求 Close() error
// arkStreamReaderAdapter ark 的流式 reader 转成统一的 StreamReader 接口
type arkStreamReaderAdapter struct {
reader *schema.StreamReader[*schema.Message]
}
// Recv 转发到 ark reader 的 Recv 方法
// Recv 转发到底层 reader。
func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
if r == nil || r.reader == nil {
return nil, io.EOF
@@ -112,8 +101,7 @@ func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
return r.reader.Recv()
}
// Close 转发到 ark reader 的 Close 方法
// ark 的 Close() 无返回值,我们适配为返回 nil
// Close 适配 ark reader 的 Close 行为
func (r *arkStreamReaderAdapter) Close() error {
if r == nil || r.reader == nil {
return nil

View File

@@ -11,11 +11,6 @@ import (
)
// ArkResponsesMessage 描述一次 Responses 输入消息。
//
// 职责边界:
// 1. 负责表达角色与多模态内容(文本/图片);
// 2. 不负责业务 prompt 生成;
// 3. 不负责输出 JSON 的字段校验。
type ArkResponsesMessage struct {
Role string
Text string
@@ -23,7 +18,7 @@ type ArkResponsesMessage struct {
ImageDetail string
}
// ArkResponsesOptions 描述 Responses 生成选项
// ArkResponsesOptions 描述 Responses 调用参数
type ArkResponsesOptions struct {
Model string
Temperature float64
@@ -32,14 +27,14 @@ type ArkResponsesOptions struct {
TextFormat string
}
// ArkResponsesUsage 统一透传 token 使用量
// ArkResponsesUsage 统一转写 token usage
type ArkResponsesUsage struct {
InputTokens int64
OutputTokens int64
TotalTokens int64
}
// ArkResponsesResult 是 Ark Responses 的统一输出结构。
// ArkResponsesResult 是 Responses 调用的统一输出结构。
type ArkResponsesResult struct {
Text string
Status string
@@ -56,11 +51,9 @@ type ArkResponsesClient struct {
}
// NewArkResponsesClient 创建 Ark SDK Responses 客户端。
//
// 说明:
// 1. model 为空时返回 nil表示当前能力未启用
// 2. baseURL 为空时使用 SDK 默认地址;
// 3. 仅负责客户端创建,不做连通性探测。
// 1. model 为空时直接返回 nil表示这条能力没有启用。
// 2. baseURL 为空时使用 SDK 默认地址。
// 3. 这里只负责本地构造,不做连通性探测。
func NewArkResponsesClient(apiKey string, baseURL string, model string) *ArkResponsesClient {
model = strings.TrimSpace(model)
if model == "" {
@@ -104,7 +97,7 @@ func (c *ArkResponsesClient) GenerateText(ctx context.Context, messages []ArkRes
return result, nil
}
// GenerateArkResponsesJSON 先调用 Responses再解析 JSON 结构体。
// GenerateArkResponsesJSON 先调用 Responses再解析 JSON 结构体。
func GenerateArkResponsesJSON[T any](ctx context.Context, client *ArkResponsesClient, messages []ArkResponsesMessage, options ArkResponsesOptions) (*T, *ArkResponsesResult, error) {
if client == nil {
return nil, nil, errors.New("ark responses client is not ready")

View File

@@ -9,12 +9,7 @@ import (
"github.com/cloudwego/eino/schema"
)
// ThinkingMode 描述次模型调用对 thinking 的期望。
//
// 职责边界:
// 1. 这里只表达“调用方希望怎样配置推理模式”;
// 2. 不直接绑定某个具体模型厂商的参数枚举;
// 3. 真正如何把它翻译成 ark / OpenAI / 其他 provider 的 option由后续适配层负责。
// ThinkingMode 描述次模型调用对 thinking 的期望。
type ThinkingMode string
const (
@@ -23,12 +18,7 @@ const (
ThinkingModeDisabled ThinkingMode = "disabled"
)
// GenerateOptions 统一模型调用选项
//
// 设计目的:
// 1. 先把“每个 skill / worker 都会反复传的参数”收敛成一份结构;
// 2. 让上层以后只表达“我要什么”,不再自己重复组织 option
// 3. 暂时不追求覆盖所有 provider 参数,先把最常用的几个公共位抽出来。
// GenerateOptions 统一收敛文本调用时最常见的公共参数
type GenerateOptions struct {
Temperature float64
MaxTokens int
@@ -36,40 +26,32 @@ type GenerateOptions struct {
Metadata map[string]any
}
// TextResult 是统一文本生成结果
//
// 职责边界:
// 1. Text 保存模型最终返回的纯文本;
// 2. Usage 保存本次调用的 token 使用量,供后续统一统计;
// 3. 不负责 JSON 解析,不负责业务字段映射。
// TextResult 保存一次文本生成的最终结果和 usage
// 1. Text 存放模型返回的纯文本。
// 2. Usage 方便上层做统一统计。
// 3. 这里不负责 JSON 解析,也不负责业务字段映射。
type TextResult struct {
Text string
Usage *schema.TokenUsage
// FinishReason 透传 provider 的停止原因,便于上层判断是否因 length 等原因被截断。
Text string
Usage *schema.TokenUsage
FinishReason string
}
// StreamReader 抽象了“可逐块 Recv 的流式返回器
//
// 之所以不直接依赖某个具体 SDK 的 reader 类型,是因为现在还处在骨架收敛阶段,
// 后续接 ark、OpenAI 兼容层还是别的 provider都可以往这个最小接口上适配。
// StreamReader 抽象可以逐块读取消息的流式返回器。
type StreamReader interface {
Recv() (*schema.Message, error)
Close() error
}
// TextGenerateFunc 是文本生成的统一适配函数签名。
// TextGenerateFunc 定义统一文本生成函数签名。
type TextGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error)
// StreamGenerateFunc 是流式生成的统一适配函数签名。
// StreamGenerateFunc 定义统一流式生成函数签名。
type StreamGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error)
// Client 是统一模型客户端门面。
//
// 职责边界:
// 1. 负责把调用方的“模型调用意图”收敛到统一入口;
// 2. 负责统一参数校验、空响应防御、GenerateJSON 复用;
// 3. 不负责写 prompt不负责业务 fallback也不直接持有具体厂商 SDK 细节。
// 1. 只做最小输入校验和空响应防御。
// 2. 不负责 prompt 拼装,也不负责业务 fallback。
// 3. 具体 provider 的细节由上层适配器收敛进来。
type Client struct {
generateText TextGenerateFunc
streamText StreamGenerateFunc
@@ -84,11 +66,6 @@ func NewClient(generateText TextGenerateFunc, streamText StreamGenerateFunc) *Cl
}
// GenerateText 执行一次统一文本生成。
//
// 职责边界:
// 1. 负责做最小必要的入参校验;
// 2. 负责统一拦截“模型空响应”这类公共问题;
// 3. 不负责业务 prompt 拼接,也不负责把文本再映射成业务结构。
func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
if c == nil || c.generateText == nil {
return nil, errors.New("llm client is not ready")
@@ -111,11 +88,6 @@ func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, o
}
// GenerateJSON 先走统一文本生成,再走统一 JSON 解析。
//
// 设计说明:
// 1. 把“Generate -> 提取 JSON -> 反序列化”这段公共链路收敛起来;
// 2. 上层只关心业务结构,不需要重复实现解析样板;
// 3. 返回 parsed + rawResult方便打点与回退时保留原文。
func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema.Message, options GenerateOptions) (*T, *TextResult, error) {
result, err := client.GenerateText(ctx, messages, options)
if err != nil {
@@ -130,11 +102,6 @@ func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema
}
// Stream 打开统一流式调用入口。
//
// 职责边界:
// 1. 只负责把“流式生成能力”暴露给上层;
// 2. 不负责 chunk 到 OpenAI 协议的转换,那部分应放在 stream/
// 3. 不负责累计全文,也不负责 token 统计落库。
func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
if c == nil || c.streamText == nil {
return nil, errors.New("llm stream client is not ready")
@@ -145,12 +112,7 @@ func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options
return c.streamText(ctx, messages, options)
}
// BuildSystemUserMessages 构造最常见的system + history + user消息列表。
//
// 设计说明:
// 1. 先把最稳定的消息编排方式沉淀下来,减少各业务域样板代码;
// 2. 只做消息切片装配,不做 prompt 生成;
// 3. 供 agent / memory 等多个能力域复用。
// BuildSystemUserMessages 构造最常见的 system + history + user 消息列表。
func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, userPrompt string) []*schema.Message {
messages := make([]*schema.Message, 0, len(history)+2)
if strings.TrimSpace(systemPrompt) != "" {
@@ -165,7 +127,7 @@ func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, use
return messages
}
// CloneUsage 深拷贝 token usage避免后续多处累加时共享同一指针。
// CloneUsage 深拷贝 token usage避免后续累加时共享同一指针。
func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
if usage == nil {
return nil
@@ -174,12 +136,7 @@ func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
return &copied
}
// MergeUsage 合并两段 usage。
//
// 合并策略:
// 1. 对“同一次调用不同流分片”的场景,取更大值作为最终值;
// 2. 对“多次独立调用累计”的场景,应由上层显式做加法,而不是用这个函数;
// 3. 该函数只适用于“同一次调用的分块 usage 收敛”。
// MergeUsage 合并两段 usage,取各字段更大的值作为累计结果
func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.TokenUsage {
if incoming == nil {
return CloneUsage(base)
@@ -207,7 +164,7 @@ func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.To
return &merged
}
// FormatEmptyResponseError 统一生成“模型返回空结果的错误文案。
// FormatEmptyResponseError 统一模型空结果的错误文案。
func FormatEmptyResponseError(scene string) error {
scene = strings.TrimSpace(scene)
if scene == "" {

View File

@@ -7,12 +7,10 @@ import (
"strings"
)
// ParseJSONObject 解析模型返回中的 JSON 对象。
//
// 职责边界:
// 1. 负责处理“模型输出前后夹杂解释文字 / markdown 代码块”的常见情况;
// 2. 负责提取最外层 JSON object 并反序列化为目标结构;
// 3. 不负责业务字段合法性校验,应由上层调用方自行校验。
// ParseJSONObject 解析模型返回内容中的 JSON 对象。
// 1. 先剥离常见的 markdown 代码块包装。
// 2. 再从混合文本里提取最外层 JSON 对象。
// 3. 这里只负责结构解析,不负责字段合法性校验。
func ParseJSONObject[T any](raw string) (*T, error) {
clean := strings.TrimSpace(raw)
if clean == "" {
@@ -31,12 +29,7 @@ func ParseJSONObject[T any](raw string) (*T, error) {
return &out, nil
}
// ExtractJSONObject 从混合文本提取第一个完整 JSON 对象。
//
// 设计说明:
// 1. LLM 很容易输出“这里是结果:{...}”这种半结构化文本;
// 2. 这里用括号计数而不是正则,避免嵌套对象一多就误截断;
// 3. 目前只提取 object不提取 array因为当前契约基本都是对象。
// ExtractJSONObject 从混合文本提取第一个完整 JSON 对象。
func ExtractJSONObject(text string) string {
clean := trimMarkdownCodeFence(strings.TrimSpace(text))
if clean == "" {
@@ -94,9 +87,6 @@ func trimMarkdownCodeFence(text string) string {
return trimmed
}
// 1. 去掉首行 ```json / ```
// 2. 若末行是 ```,一并去掉;
// 3. 中间正文保持原样,避免破坏 JSON 的换行结构。
body := lines[1:]
if len(body) > 0 && strings.TrimSpace(body[len(body)-1]) == "```" {
body = body[:len(body)-1]

View File

@@ -0,0 +1,109 @@
package llm
import (
"strings"
"github.com/LoveLosita/smartflow/backend/inits"
)
// Service 只负责统一暴露已经构造好的模型客户端,不负责 prompt 和业务编排。
type Service struct {
liteClient *Client
proClient *Client
maxClient *Client
courseImageResponsesClient *ArkResponsesClient
}
// Options 描述 llm-service 初始化时需要接管的启动期依赖。
// 1. AIHub 仍然是当前进程内 Ark ChatModel 的来源,但服务层只保存统一 Client。
// 2. CourseImageResponsesClient 允许外部预先注入,便于测试或特殊启动路径复用。
// 3. 某个字段为空时不报错,直接保留 nil交给上层继续走兼容降级。
type Options struct {
AIHub *inits.AIHub
APIKey string
BaseURL string
CourseVisionModel string
CourseImageResponsesClient *ArkResponsesClient
}
// AgentModelClients 一次性暴露 newAgent 图常用的模型分配结果。
type AgentModelClients struct {
Chat *Client
Plan *Client
Execute *Client
Deliver *Client
Summary *Client
}
// New 构造 llm-service。
// 1. 不返回 error是为了让上层继续按 nil 客户端做逐步降级。
// 2. 只要 AIHub 已初始化,就把其中的 ChatModel 收敛成统一 Client。
// 3. 课程图片解析客户端在这里统一构建,避免业务层直接依赖 Responses SDK。
func New(opts Options) *Service {
svc := &Service{}
if opts.AIHub != nil {
svc.liteClient = WrapArkClient(opts.AIHub.Lite)
svc.proClient = WrapArkClient(opts.AIHub.Pro)
svc.maxClient = WrapArkClient(opts.AIHub.Max)
}
if opts.CourseImageResponsesClient != nil {
svc.courseImageResponsesClient = opts.CourseImageResponsesClient
} else {
apiKey := strings.TrimSpace(opts.APIKey)
baseURL := strings.TrimSpace(opts.BaseURL)
model := strings.TrimSpace(opts.CourseVisionModel)
if apiKey != "" && model != "" {
svc.courseImageResponsesClient = NewArkResponsesClient(apiKey, baseURL, model)
}
}
return svc
}
// LiteClient 返回低成本短输出模型客户端。
func (s *Service) LiteClient() *Client {
if s == nil {
return nil
}
return s.liteClient
}
// ProClient 返回默认复杂对话模型客户端。
func (s *Service) ProClient() *Client {
if s == nil {
return nil
}
return s.proClient
}
// MaxClient 返回深度推理模型客户端。
func (s *Service) MaxClient() *Client {
if s == nil {
return nil
}
return s.maxClient
}
// CourseImageResponsesClient 返回课程图片解析所用的 Responses 客户端。
func (s *Service) CourseImageResponsesClient() *ArkResponsesClient {
if s == nil {
return nil
}
return s.courseImageResponsesClient
}
// NewAgentModelClients 一次性返回 newAgent 图里常用的模型分配。
func (s *Service) NewAgentModelClients() AgentModelClients {
if s == nil {
return AgentModelClients{}
}
return AgentModelClients{
Chat: s.ProClient(),
Plan: s.MaxClient(),
Execute: s.MaxClient(),
Deliver: s.ProClient(),
Summary: s.LiteClient(),
}
}

View File

@@ -5,7 +5,7 @@ import (
"time"
)
// Runtime 是 RAG Infra 对业务侧暴露的唯一稳定方法面。
// Runtime 是 RAG service 对业务侧暴露的唯一稳定方法面。
//
// 职责边界:
// 1. 负责承接 memory/web 两类语料的统一入库与检索入口;

View File

@@ -5,7 +5,7 @@ import (
"fmt"
"strings"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// TextChunker 是默认文本切块器。

View File

@@ -21,7 +21,7 @@ const (
// ObserveEvent 描述一次统一观测事件。
//
// 职责边界:
// 1. 只承载 RAG Infra 的结构化运行信息;
// 1. 只承载 RAG service 的结构化运行信息;
// 2. 不绑定具体日志系统、指标系统或 tracing 实现;
// 3. 字段内容应尽量稳定,便于后续统一接入全局观测平台。
type ObserveEvent struct {
@@ -31,7 +31,7 @@ type ObserveEvent struct {
Fields map[string]any
}
// Observer 是 RAG Infra 的最小观测接口。
// Observer 是 RAG service 的最小观测接口。
//
// 职责边界:
// 1. 负责消费结构化事件;

View File

@@ -7,7 +7,7 @@ import (
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
const memoryCorpusName = "memory"

View File

@@ -7,7 +7,7 @@ import (
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
const webCorpusName = "web"

View File

@@ -7,12 +7,12 @@ import (
"os"
"strings"
ragchunk "github.com/LoveLosita/smartflow/backend/infra/rag/chunk"
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
ragembed "github.com/LoveLosita/smartflow/backend/infra/rag/embed"
ragrerank "github.com/LoveLosita/smartflow/backend/infra/rag/rerank"
ragstore "github.com/LoveLosita/smartflow/backend/infra/rag/store"
ragchunk "github.com/LoveLosita/smartflow/backend/services/rag/chunk"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
ragembed "github.com/LoveLosita/smartflow/backend/services/rag/embed"
ragrerank "github.com/LoveLosita/smartflow/backend/services/rag/rerank"
ragstore "github.com/LoveLosita/smartflow/backend/services/rag/store"
)
// FactoryDeps 描述 Runtime 工厂所需的可选依赖。

View File

@@ -3,7 +3,7 @@ package rag
import (
"log"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// ObserveLevel 对外暴露统一观测等级别名,避免启动层直接依赖 core 细节。

View File

@@ -1,11 +1,11 @@
package rag
import (
"github.com/LoveLosita/smartflow/backend/infra/rag/chunk"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/infra/rag/embed"
"github.com/LoveLosita/smartflow/backend/infra/rag/rerank"
"github.com/LoveLosita/smartflow/backend/infra/rag/store"
"github.com/LoveLosita/smartflow/backend/services/rag/chunk"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/embed"
"github.com/LoveLosita/smartflow/backend/services/rag/rerank"
"github.com/LoveLosita/smartflow/backend/services/rag/store"
)
// NewDefaultPipeline 构造默认可运行的 RAG Pipeline。

View File

@@ -4,7 +4,7 @@ import (
"context"
"errors"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// EinoReranker 是 Eino 重排器占位实现。

View File

@@ -4,7 +4,7 @@ import (
"context"
"sort"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// NoopReranker 是默认重排器(仅按原 score 排序)。

View File

@@ -5,7 +5,7 @@ import (
"fmt"
"strings"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// VectorRetriever 是通用检索器embed + vector search

View File

@@ -7,9 +7,9 @@ import (
"strings"
"time"
ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/infra/rag/corpus"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/corpus"
)
type runtime struct {
@@ -343,7 +343,7 @@ func (r *runtime) recoverPublicPanic(
return
}
// 1. runtime 是 RAG Infra 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。
// 1. runtime 是 RAG service 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。
// 2. 这里统一把 panic 转成 error并补一条结构化观测方便继续排查是哪一层依赖失控。
// 3. 保留 stack 是为了在“进程不崩”的前提下仍能定位根因,避免只剩一句 recovered 无法复盘。
panicErr := fmt.Errorf("rag runtime panic recovered: corpus=%s operation=%s panic=%v", corpusName, operation, recovered)

View File

@@ -0,0 +1,111 @@
package rag
import (
"context"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
)
// Options 描述 rag-service 需要持有的底层运行时。
type Options struct {
Runtime Runtime
}
// Service 是 rag-service 对外暴露的统一入口。
//
// 职责边界:
// 1. 负责持有运行时,并把 memory / web 两条能力线统一收口到服务层。
// 2. 负责在服务入口内完成基于配置的运行时装配。
// 3. 不直接承载 chunk / embed / store 的实现细节,这些细节下沉到服务树内部子包。
type Service struct {
runtime Runtime
}
// New 使用调用方传入的运行时构造服务。
func New(opts Options) *Service {
return &Service{runtime: opts.Runtime}
}
// NewFromConfig 基于服务树内的配置与工厂能力构造自给自足的 RAG 服务。
func NewFromConfig(ctx context.Context, cfg ragconfig.Config, deps FactoryDeps) (*Service, error) {
if !cfg.Enabled {
return New(Options{}), nil
}
runtime, err := NewRuntimeFromConfig(ctx, cfg, deps)
if err != nil {
return nil, err
}
return NewWithRuntime(runtime), nil
}
// Runtime 返回当前服务持有的运行时。
func (s *Service) Runtime() Runtime {
if s == nil {
return nil
}
return s.runtime
}
// IngestMemory 写入记忆语料。
func (s *Service) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.IngestMemory(ctx, req)
}
// RetrieveMemory 检索记忆语料。
func (s *Service) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.RetrieveMemory(ctx, req)
}
// DeleteMemory 删除指定记忆文档。
func (s *Service) DeleteMemory(ctx context.Context, documentIDs []string) error {
if s == nil || s.runtime == nil {
return nil
}
if ctx == nil {
ctx = context.Background()
}
return s.runtime.DeleteMemory(ctx, documentIDs)
}
// IngestWeb 写入网页语料。
func (s *Service) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.IngestWeb(ctx, req)
}
// RetrieveWeb 检索网页语料。
func (s *Service) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error) {
if s == nil || s.runtime == nil {
return nil, nil
}
return s.runtime.RetrieveWeb(ctx, req)
}
// EnsureRuntime 返回一个可继续向下传递的运行时引用。
func (s *Service) EnsureRuntime() Runtime {
if s == nil {
return nil
}
return s.runtime
}
// SetRuntime 允许在装配阶段延迟注入运行时。
func (s *Service) SetRuntime(runtime Runtime) {
if s == nil {
return
}
s.runtime = runtime
}
// NewWithRuntime 用显式运行时构造服务。
func NewWithRuntime(runtime Runtime) *Service {
return New(Options{Runtime: runtime})
}

View File

@@ -10,7 +10,7 @@ import (
"sync"
"time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// InMemoryVectorStore 是本地开发用向量存储实现。

View File

@@ -14,7 +14,7 @@ import (
"sync"
"time"
"github.com/LoveLosita/smartflow/backend/infra/rag/core"
"github.com/LoveLosita/smartflow/backend/services/rag/core"
)
// MilvusConfig 描述 Milvus REST 存储配置。

View File

@@ -1,6 +1,6 @@
package store
import "github.com/LoveLosita/smartflow/backend/infra/rag/core"
import "github.com/LoveLosita/smartflow/backend/services/rag/core"
// EnsureCompile 用于静态校验实现是否满足接口。
func EnsureCompile() {

View File

@@ -36,6 +36,8 @@
4. 消费侧已经按服务 consumer group 隔离,不再用一个 worker 吃全部事件。
5. 当前仍是单体进程内多 worker 装配worker 后续会跟随对应服务一起迁出,不在阶段 1 直接拆进程。
阶段 1.5 / 1.6 也已经先落地完毕:`backend/services/llm``backend/services/rag` 已经成为当前 canonical 入口,`backend/infra/llm``backend/infra/rag``.go` 旧实现已删除,仅保留迁移说明文档。当前仍然是单体进程内多 worker 装配llm / rag 先完成服务化收口,还没有进入 gozero 进程拆分。
所以后续路线不是再补一次 outbox 基建,而是在这个阶段 1 基线上,按服务边界逐个把 gozero 服务、DAO / model / worker 和启动入口迁出去。
---
@@ -78,6 +80,8 @@ gozero 服务负责领域能力:
> 说明:`agent` 和 `memory` 都可以单独成服务,不应再被写成“公共能力”;其中 `agent` 更像对外对话编排服务,`memory` 更像其支撑服务/worker 服务。
>
> 说明:`llm-service` 先抽成全仓统一模型出口,`rag-service` 再抽成检索基础设施服务;`rag-service` 只能依赖 `llm-service`,不反向依赖具体业务服务。
>
> 当前状态:`llm-service` / `rag-service` 这两个边界已经先做成 `backend/services/*` 的服务内模块,调用仍由 `backend/cmd/start.go` 在同一进程内装配,不是 gozero 独立进程。
### 3.3 事件层
@@ -118,8 +122,8 @@ gozero 服务负责领域能力:
| --- | --- | --- | --- |
| 0 | 语义冻结和基线确认(已完成) | 阶段 0 已作为历史基线保存;后续只在契约变化时回看 | `go test ./...``api / worker / all` 启动 smoke |
| 1 | Outbox v2 基建(已完成,当前基线) | 当前已具备阶段 1 保存点:服务级 outbox 表、topic、group 和多 worker 装配已打通 | 已完成健康检查、服务级 outbox 写入/投递/消费 smoke、Kafka group lag 核对 |
| 1.5 | 先抽 llm-service | 统一模型调用、provider 路由、流式输出和审计后 commit | course / active-scheduler / memory 模型调用 smoke |
| 1.6 | 再抽 rag-service | 向量化、召回、重排、检索能力跑通后 commit | memory retrieve / rerank smoke |
| 1.5 | 先抽 llm-service(已完成) | 已完成,`backend/services/llm` 作为当前 canonical 入口 | `go test ./...` + course / active-scheduler / memory 模型调用 smoke |
| 1.6 | 再抽 rag-service(已完成) | 已完成,`backend/services/rag` 作为当前 canonical 入口 | `go test ./...` + memory retrieve / rerank smoke |
| 2 | 先拆 user/auth | user 路由、JWT 签发和 token 额度治理独立后 commit | 注册/登录/刷新/登出 smoke + token quota 回归 |
| 3 | 再拆 notification | notification 服务能独立消费和重试后 commit | notification E2E smoke + worker-only smoke |
| 4 | 再拆 active-scheduler | 预览生成和确认链路通过 gozero 服务跑通后 commit | dry-run / preview / confirm smoke |
@@ -229,7 +233,7 @@ flowchart LR
---
### 4.4 阶段 1.5:先抽 llm-service
### 4.4 阶段 1.5:先抽 llm-service(已完成)
目标:
@@ -237,6 +241,12 @@ flowchart LR
2.`course``active-scheduler``memory``agent` 对模型调用的依赖先收口到统一服务。
3. 先把模型 provider 路由、流式输出、限流、审计这些共性收束起来,避免每个服务各写一份。
当前状态:
1. 代码已经落到 `backend/services/llm`
2. `backend/infra/llm``.go` 旧实现已删除,仅保留迁移说明。
3. 仍由 `backend/cmd/start.go` 在同一进程内装配,尚未引入 gozero 独立服务进程。
这一步要做的事:
1. 把当前分散在业务服务里的模型调用入口改成统一调用 `llm-service`
@@ -260,7 +270,7 @@ flowchart LR
---
### 4.5 阶段 1.6:再抽 rag-service
### 4.5 阶段 1.6:再抽 rag-service(已完成)
目标:
@@ -268,6 +278,12 @@ flowchart LR
2. 让向量化、召回、重排、向量库读写先进入独立服务。
3. 明确 `rag-service` 只能依赖 `llm-service` 做 embedding / rerank不反向依赖业务服务。
当前状态:
1. 代码已经落到 `backend/services/rag`
2. `backend/infra/rag``.go` 旧实现已删除,仅保留迁移说明。
3. 仍由 `backend/cmd/start.go` 在同一进程内装配,尚未引入 gozero 独立服务进程。
这一步要做的事:
1. 把当前分散在 `memory``agent` 里的检索逻辑改成统一调用 `rag-service`
@@ -474,6 +490,8 @@ flowchart LR
当前建议按这个顺序推进:
注:阶段 1.5 / 1.6 已完成,当前实际推进可从阶段 2 开始。
1. 以阶段 1 的服务级 outbox 为当前基线,不再回头做共享 outbox 方案。
2. 先切 llm-service把统一模型出口从各业务服务里抽出去。
3. 再切 rag-service把检索基础设施从 memory / agent 里抽出去。