Version: 0.9.76.dev.260505

后端:
1.阶段 6 agent / memory 服务化收口
- 新增 cmd/agent 独立进程入口,承载 agent zrpc server、agent outbox relay / consumer 和运行时依赖初始化
- 补齐 services/agent/rpc 的 Chat stream 与 conversation meta/list/timeline、schedule-preview、context-stats、schedule-state unary RPC
- 新增 gateway/client/agent 与 shared/contracts/agent,将 /api/v1/agent chat 和非 chat 门面切到 agent zrpc
- 收缩 gateway 本地 AgentService 装配,双 RPC 开关开启时不再初始化本地 agent 编排、LLM、RAG 和 memory reader fallback
- 将 backend/memory 物理迁入 services/memory,私有实现收入 internal,保留 module/model/observe 作为 memory 服务门面
- 调整 memory outbox、memory reader 和 agent 记忆渲染链路的 import 与服务边界,cmd/memory 独占 memory worker / consumer
- 关闭 gateway 侧 agent outbox worker 所有权,agent relay / consumer 由 cmd/agent 独占,gateway 仅保留 HTTP/SSE 门面与迁移期开关回退
- 更新阶段 6 文档,记录 agent / memory 当前切流点、smoke 结果,以及 backend/client 与 gateway/shared 的目录收口口径
This commit is contained in:
Losita
2026-05-05 19:31:39 +08:00
parent d7184b776b
commit 2a96f4c6f9
72 changed files with 2775 additions and 291 deletions

View File

@@ -0,0 +1,527 @@
package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"
rootdao "github.com/LoveLosita/smartflow/backend/dao"
gatewaymemory "github.com/LoveLosita/smartflow/backend/gateway/client/memory"
gatewayschedule "github.com/LoveLosita/smartflow/backend/gateway/client/schedule"
gatewaytask "github.com/LoveLosita/smartflow/backend/gateway/client/task"
gatewaytaskclass "github.com/LoveLosita/smartflow/backend/gateway/client/taskclass"
gatewayuserauth "github.com/LoveLosita/smartflow/backend/gateway/client/userauth"
kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
"github.com/LoveLosita/smartflow/backend/inits"
rootmiddleware "github.com/LoveLosita/smartflow/backend/middleware"
"github.com/LoveLosita/smartflow/backend/model"
rootsvc "github.com/LoveLosita/smartflow/backend/service"
eventsvc "github.com/LoveLosita/smartflow/backend/service/events"
activeadapters "github.com/LoveLosita/smartflow/backend/services/active_scheduler/core/adapters"
activefeedbacklocate "github.com/LoveLosita/smartflow/backend/services/active_scheduler/core/feedbacklocate"
activegraph "github.com/LoveLosita/smartflow/backend/services/active_scheduler/core/graph"
activesel "github.com/LoveLosita/smartflow/backend/services/active_scheduler/core/selection"
activesvc "github.com/LoveLosita/smartflow/backend/services/active_scheduler/core/service"
agentsv "github.com/LoveLosita/smartflow/backend/services/agent/sv"
agenttools "github.com/LoveLosita/smartflow/backend/services/agent/tools"
"github.com/LoveLosita/smartflow/backend/services/agent/tools/web"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
memorymodule "github.com/LoveLosita/smartflow/backend/services/memory"
memorymodel "github.com/LoveLosita/smartflow/backend/services/memory/model"
memoryobserve "github.com/LoveLosita/smartflow/backend/services/memory/observe"
ragservice "github.com/LoveLosita/smartflow/backend/services/rag"
ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config"
"github.com/LoveLosita/smartflow/backend/shared/ports"
"github.com/go-redis/redis/v8"
"github.com/spf13/viper"
"gorm.io/gorm"
)
type agentRuntime struct {
redisClient *redis.Client
eventBus eventsvc.OutboxBus
outboxRepo *outboxinfra.Repository
repoManager *rootdao.RepoManager
agentRepo *rootdao.AgentDAO
cacheRepo *rootdao.CacheDAO
userAuthClient *gatewayuserauth.Client
service *agentsv.AgentService
workersStarted bool
}
func buildAgentRuntime(ctx context.Context) (*agentRuntime, error) {
db, err := openAgentDBFromConfig()
if err != nil {
return nil, fmt.Errorf("connect agent database failed: %w", err)
}
redisClient, err := inits.OpenRedisFromConfig()
if err != nil {
return nil, fmt.Errorf("connect agent redis failed: %w", err)
}
fail := func(cause error) (*agentRuntime, error) {
_ = redisClient.Close()
return nil, cause
}
cacheRepo := rootdao.NewCacheDAO(redisClient)
if err = db.Use(rootmiddleware.NewGormCachePlugin(cacheRepo)); err != nil {
return fail(fmt.Errorf("initialize agent cache deleter failed: %w", err))
}
// 说明:
// 1. 本轮先在 cmd/agent 内平移一份启动装配,不直接改 cmd/start.go 的旧 gateway 本地链路。
// 2. 这样可以把独立进程入口先稳定下来,同时避免和主代理并行接的 rpc/pb 改动发生交叉覆盖。
// 3. 等阶段 6 的 agent/memory 启动边界都收稳后,再统一评估是否把 LLM/RAG/bootstrap 抽公共层。
llmService, err := buildAgentLLMService()
if err != nil {
return fail(fmt.Errorf("initialize agent llm service failed: %w", err))
}
ragService, err := buildAgentRAGService(ctx)
if err != nil {
return fail(err)
}
ragRuntime := ragService.Runtime()
memoryCfg := memorymodule.LoadConfigFromViper()
memoryObserver := memoryobserve.NewLoggerObserver(log.Default())
memoryMetrics := memoryobserve.NewMetricsRegistry()
manager := rootdao.NewManager(db)
agentRepo := rootdao.NewAgentDAO(db)
taskRepo := rootdao.NewTaskDAO(db)
taskClassRepo := rootdao.NewTaskClassDAO(db)
scheduleRepo := rootdao.NewScheduleDAO(db)
agentCacheRepo := rootdao.NewAgentCache(redisClient)
outboxRepo := outboxinfra.NewRepository(db)
eventBus, err := buildAgentEventBus(outboxRepo)
if err != nil {
return fail(err)
}
if err = eventsvc.RegisterTaskUrgencyPromoteRoute(); err != nil {
return fail(fmt.Errorf("register task outbox route failed: %w", err))
}
eventPublisher := buildAgentOutboxPublisher(outboxRepo)
taskOutboxPublisher := buildTaskOutboxPublisher(outboxRepo)
var userAuthClient *gatewayuserauth.Client
if eventBus != nil {
userAuthClient, err = gatewayuserauth.NewClient(gatewayuserauth.ClientConfig{
Endpoints: viper.GetStringSlice("userauth.rpc.endpoints"),
Target: viper.GetString("userauth.rpc.target"),
Timeout: viper.GetDuration("userauth.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize userauth zrpc client failed: %w", err))
}
}
taskClient, err := gatewaytask.NewClient(gatewaytask.ClientConfig{
Endpoints: viper.GetStringSlice("task.rpc.endpoints"),
Target: viper.GetString("task.rpc.target"),
Timeout: viper.GetDuration("task.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize task zrpc client failed: %w", err))
}
taskClassClient, err := gatewaytaskclass.NewClient(gatewaytaskclass.ClientConfig{
Endpoints: viper.GetStringSlice("taskClass.rpc.endpoints"),
Target: viper.GetString("taskClass.rpc.target"),
Timeout: viper.GetDuration("taskClass.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize task-class zrpc client failed: %w", err))
}
scheduleClient, err := gatewayschedule.NewClient(gatewayschedule.ClientConfig{
Endpoints: viper.GetStringSlice("schedule.rpc.endpoints"),
Target: viper.GetString("schedule.rpc.target"),
Timeout: viper.GetDuration("schedule.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize schedule zrpc client failed: %w", err))
}
memoryClient, err := gatewaymemory.NewClient(gatewaymemory.ClientConfig{
Endpoints: viper.GetStringSlice("memory.rpc.endpoints"),
Target: viper.GetString("memory.rpc.target"),
Timeout: viper.GetDuration("memory.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize memory zrpc client failed: %w", err))
}
taskService := rootsvc.NewTaskService(taskRepo, cacheRepo, taskOutboxPublisher)
taskService.SetActiveScheduleDAO(manager.ActiveSchedule)
scheduleService := rootsvc.NewScheduleService(scheduleRepo, taskClassRepo, manager, cacheRepo)
agentService := agentsv.NewAgentService(
llmService,
agentRepo,
taskRepo,
cacheRepo,
agentCacheRepo,
manager.ActiveSchedule,
manager.ActiveScheduleSession,
eventPublisher,
)
// 1. 迁移期仍由独立入口注入旧 schedule/task 领域能力,避免 agent/sv 反向 import 旧 service 形成循环依赖。
// 2. 等阶段 6 后续把这些残留 DAO 适配继续切成 RPC/read-model再从这里移除注入点。
agentService.SmartPlanningMultiRawFunc = scheduleService.SmartPlanningMultiRaw
agentService.HybridScheduleWithPlanMultiFunc = scheduleService.HybridScheduleWithPlanMulti
agentService.ResolvePlanningWindowFunc = scheduleService.ResolvePlanningWindowByTaskClasses
agentService.GetTasksWithUrgencyPromotionFunc = taskService.GetTasksWithUrgencyPromotion
configureAgentService(
agentService,
ragRuntime,
agentRepo,
cacheRepo,
taskClient,
taskClassClient,
scheduleClient,
memoryClient,
memoryCfg,
memoryObserver,
memoryMetrics,
)
activeTaskAdapter, err := activeadapters.NewTaskRPCAdapter(activeadapters.TaskRPCConfig{
Endpoints: viper.GetStringSlice("task.rpc.endpoints"),
Target: viper.GetString("task.rpc.target"),
Timeout: viper.GetDuration("task.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize task rpc adapter for agent rerun failed: %w", err))
}
activeScheduleAdapter, err := activeadapters.NewScheduleRPCAdapter(activeadapters.ScheduleRPCConfig{
Endpoints: viper.GetStringSlice("schedule.rpc.endpoints"),
Target: viper.GetString("schedule.rpc.target"),
Timeout: viper.GetDuration("schedule.rpc.timeout"),
})
if err != nil {
return fail(fmt.Errorf("initialize schedule rpc adapter for agent rerun failed: %w", err))
}
activeScheduleDryRun, err := activesvc.NewDryRunService(activeadapters.ReadersWithScheduleRPC(activeTaskAdapter, activeScheduleAdapter))
if err != nil {
return fail(err)
}
activeSchedulePreviewConfirm, err := buildActiveSchedulePreviewConfirmService(manager.ActiveSchedule, activeScheduleDryRun, activeScheduleAdapter)
if err != nil {
return fail(err)
}
activeScheduleLLMClient := llmService.ProClient()
activeScheduleSelector := activesel.NewService(activeScheduleLLMClient)
activeScheduleFeedbackLocator := activefeedbacklocate.NewService(activeScheduleAdapter, activeScheduleLLMClient)
activeScheduleGraphRunner, err := activegraph.NewRunner(activeScheduleDryRun.AsGraphDryRunFunc(), activeScheduleSelector)
if err != nil {
return fail(err)
}
agentService.SetActiveScheduleSessionRerunFunc(buildActiveScheduleSessionRerunFunc(
manager.ActiveSchedule,
activeScheduleGraphRunner,
activeSchedulePreviewConfirm,
activeScheduleFeedbackLocator,
))
return &agentRuntime{
redisClient: redisClient,
eventBus: eventBus,
outboxRepo: outboxRepo,
repoManager: manager,
agentRepo: agentRepo,
cacheRepo: cacheRepo,
userAuthClient: userAuthClient,
service: agentService,
}, nil
}
func (r *agentRuntime) startWorkers(ctx context.Context) error {
if r == nil || r.workersStarted {
return nil
}
if r.eventBus == nil {
log.Println("Agent outbox consumer is disabled")
return nil
}
if r.userAuthClient == nil {
return fmt.Errorf("agent outbox consumer requires userauth zrpc client")
}
// 1. 先登记 agent 自己消费的 handler同时补齐 memory.extract.requested 的服务路由。
// 2. 这里明确只接 agent 边界memory 消费仍归 cmd/memorytask 事件仍是 publish-only 写入 task outbox。
// 3. 注册完成后再启动总线,避免服务一起来就抢先消费到尚未挂 handler 的消息。
if err := eventsvc.RegisterCoreOutboxHandlers(
r.eventBus,
r.outboxRepo,
r.repoManager,
r.agentRepo,
r.cacheRepo,
nil,
r.userAuthClient,
); err != nil {
return fmt.Errorf("register agent outbox handlers failed: %w", err)
}
r.eventBus.Start(ctx)
r.workersStarted = true
log.Println("Agent outbox consumer started")
return nil
}
func (r *agentRuntime) close() {
if r == nil {
return
}
if r.eventBus != nil {
r.eventBus.Close()
}
if r.redisClient != nil {
_ = r.redisClient.Close()
}
}
func openAgentDBFromConfig() (*gorm.DB, error) {
db, err := inits.OpenDBFromConfig()
if err != nil {
return nil, err
}
if err = autoMigrateAgentOwnedTables(db); err != nil {
return nil, err
}
if err = autoMigrateAgentOutboxTable(db); err != nil {
return nil, err
}
if err = ensureAgentRuntimeDependencyTables(db); err != nil {
return nil, err
}
return db, nil
}
func autoMigrateAgentOwnedTables(db *gorm.DB) error {
if db == nil {
return fmt.Errorf("agent database is not initialized")
}
// 1. 独立 agent 进程启动时只负责补齐自有表结构,不在历史库上强制补外键约束。
// 2. 线上/本地历史数据可能存在旧 chat_history 记录找不到 agent_chat 的情况,硬补 FK 会阻断服务启动。
// 3. 迁移期保留应用层按 chat_id 关联的读写语义;真正清理孤儿历史和补 FK 应走单独数据治理脚本。
originalDisableFK := db.Config.DisableForeignKeyConstraintWhenMigrating
db.Config.DisableForeignKeyConstraintWhenMigrating = true
defer func() {
db.Config.DisableForeignKeyConstraintWhenMigrating = originalDisableFK
}()
if err := db.AutoMigrate(
&model.AgentChat{},
&model.ChatHistory{},
&model.AgentTimelineEvent{},
&model.AgentScheduleState{},
&model.ActiveScheduleSession{},
&model.AgentStateSnapshotRecord{},
); err != nil {
return fmt.Errorf("auto migrate agent owned tables failed: %w", err)
}
return nil
}
func autoMigrateAgentOutboxTable(db *gorm.DB) error {
cfg, ok := outboxinfra.ResolveServiceConfig(outboxinfra.ServiceAgent)
if !ok {
return fmt.Errorf("resolve agent outbox config failed")
}
if err := db.Table(cfg.TableName).AutoMigrate(&model.AgentOutboxMessage{}); err != nil {
return fmt.Errorf("auto migrate agent outbox table failed for %s (%s): %w", cfg.Name, cfg.TableName, err)
}
return nil
}
func ensureAgentRuntimeDependencyTables(db *gorm.DB) error {
// 1. agent 独立进程当前仍复用 task/schedule/active-scheduler 的部分读写表,不在这里越权迁移这些表。
// 2. 这里只做存在性检查,缺表时直接 fail fast避免聊天请求进入半初始化状态。
// 3. 等阶段 6 后续把这些直连改成 RPC/read-model 后,应同步缩减这份依赖清单。
for _, dependency := range []struct {
name string
model any
}{
{name: "tasks", model: &model.Task{}},
{name: "task_classes", model: &model.TaskClass{}},
{name: "task_items", model: &model.TaskClassItem{}},
{name: "schedules", model: &model.Schedule{}},
{name: "schedule_events", model: &model.ScheduleEvent{}},
{name: "active_schedule_triggers", model: &model.ActiveScheduleTrigger{}},
{name: "active_schedule_previews", model: &model.ActiveSchedulePreview{}},
} {
if !db.Migrator().HasTable(dependency.model) {
return fmt.Errorf("agent runtime dependency table missing: %s", dependency.name)
}
}
return nil
}
func buildAgentLLMService() (*llmservice.Service, error) {
aiHub, err := inits.InitEino()
if err != nil {
return nil, err
}
return llmservice.New(llmservice.Options{
AIHub: aiHub,
APIKey: os.Getenv("ARK_API_KEY"),
BaseURL: viper.GetString("agent.baseURL"),
CourseVisionModel: viper.GetString("courseImport.visionModel"),
}), nil
}
func buildAgentRAGService(ctx context.Context) (*ragservice.Service, error) {
ragCfg := ragconfig.LoadFromViper()
if !ragCfg.Enabled {
log.Println("RAG service is disabled for agent")
return ragservice.New(ragservice.Options{}), nil
}
ragLogger := log.Default()
ragService, err := ragservice.NewFromConfig(ctx, ragCfg, ragservice.FactoryDeps{
Logger: ragLogger,
Observer: ragservice.NewLoggerObserver(ragLogger),
})
if err != nil {
return nil, fmt.Errorf("failed to initialize agent RAG service: %w", err)
}
log.Printf("Agent RAG runtime initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
return ragService, nil
}
func buildAgentEventBus(outboxRepo *outboxinfra.Repository) (eventsvc.OutboxBus, error) {
kafkaCfg := kafkabus.LoadConfig()
bus, err := eventsvc.NewServiceOutboxBus(outboxRepo, kafkaCfg, outboxinfra.ServiceAgent)
if err != nil {
return nil, fmt.Errorf("failed to initialize outbox event bus for service %s: %w", outboxinfra.ServiceAgent, err)
}
serviceBuses := make(map[string]eventsvc.OutboxBus, 1)
if bus != nil {
serviceBuses[outboxinfra.ServiceAgent] = bus
}
eventBus := eventsvc.NewRoutedOutboxBus(serviceBuses)
if eventBus == nil {
log.Println("Agent outbox event bus is disabled")
}
return eventBus, nil
}
func buildAgentOutboxPublisher(outboxRepo *outboxinfra.Repository) outboxinfra.EventPublisher {
kafkaCfg := kafkabus.LoadConfig()
if !kafkaCfg.Enabled || outboxRepo == nil {
return nil
}
return &repositoryOutboxPublisher{
repo: outboxRepo,
maxRetry: kafkaCfg.MaxRetry,
}
}
func buildTaskOutboxPublisher(outboxRepo *outboxinfra.Repository) outboxinfra.EventPublisher {
kafkaCfg := kafkabus.LoadConfig()
if !kafkaCfg.Enabled || outboxRepo == nil {
return nil
}
return &repositoryOutboxPublisher{
repo: outboxRepo,
maxRetry: kafkaCfg.MaxRetry,
}
}
type repositoryOutboxPublisher struct {
repo *outboxinfra.Repository
maxRetry int
}
func (p *repositoryOutboxPublisher) Publish(ctx context.Context, req outboxinfra.PublishRequest) error {
if p == nil || p.repo == nil {
return fmt.Errorf("outbox publisher is not initialized")
}
eventType := strings.TrimSpace(req.EventType)
if eventType == "" {
return fmt.Errorf("eventType is empty")
}
eventVersion := strings.TrimSpace(req.EventVersion)
if eventVersion == "" {
eventVersion = outboxinfra.DefaultEventVersion
}
messageKey := strings.TrimSpace(req.MessageKey)
aggregateID := strings.TrimSpace(req.AggregateID)
if aggregateID == "" {
aggregateID = messageKey
}
payloadJSON, err := json.Marshal(req.Payload)
if err != nil {
return err
}
_, err = p.repo.CreateMessage(ctx, eventType, messageKey, outboxinfra.OutboxEventPayload{
EventID: strings.TrimSpace(req.EventID),
EventType: eventType,
EventVersion: eventVersion,
AggregateID: aggregateID,
Payload: payloadJSON,
}, p.maxRetry)
return err
}
func configureAgentService(
agentService *agentsv.AgentService,
ragRuntime ragservice.Runtime,
agentRepo *rootdao.AgentDAO,
cacheRepo *rootdao.CacheDAO,
taskClient agentsv.TaskRPCClient,
taskClassClient agentsv.TaskClassAgentRPCClient,
scheduleClient agentsv.ScheduleAgentRPCClient,
memoryReaderClient ports.MemoryReaderClient,
memoryCfg memorymodel.Config,
memoryObserver memoryobserve.Observer,
memoryMetrics memoryobserve.MetricsRecorder,
) {
if agentService == nil {
return
}
agentService.SetAgentStateStore(rootdao.NewAgentStateStoreAdapter(cacheRepo))
var webSearchProvider web.SearchProvider
webProvider := viper.GetString("websearch.provider")
switch webProvider {
case "bocha":
bochaKey := viper.GetString("websearch.apiKey")
if bochaKey == "" {
log.Println("WebSearch: 博查 API Key 为空,降级为 mock")
webSearchProvider = &web.MockProvider{}
} else {
webSearchProvider = web.NewBochaProvider(bochaKey, "")
log.Println("WebSearch provider: bocha")
}
case "mock", "":
webSearchProvider = &web.MockProvider{}
log.Println("WebSearch provider: mock模拟模式")
default:
log.Printf("WebSearch provider %q 未识别,降级为 mock", webProvider)
webSearchProvider = &web.MockProvider{}
}
agentService.SetToolRegistry(agenttools.NewDefaultRegistryWithDeps(agenttools.DefaultRegistryDeps{
RAGRuntime: ragRuntime,
WebSearchProvider: webSearchProvider,
TaskClassWriteDeps: agenttools.TaskClassWriteDeps{
UpsertTaskClass: agentsv.NewTaskClassRPCUpsertFunc(taskClassClient),
},
}))
agentService.SetScheduleProvider(agentsv.NewScheduleRPCProvider(scheduleClient, taskClassClient))
agentService.SetCompactionStore(agentRepo)
agentService.SetQuickTaskDeps(agentsv.NewTaskRPCQuickTaskDeps(taskClient))
agentService.SetMemoryReader(agentsv.NewMemoryRPCReader(memoryReaderClient, memoryObserver, memoryMetrics), memoryCfg)
}