Version: 0.9.64.dev.260503
后端: 1. 服务级 outbox 基础设施全量落地——新增 service route / service catalog / route registry,重构 outbox engine、repository、event bus 和 model,按 `event_type -> service -> table/topic/group` 统一写入与投递,保留 `agent` 兼容壳但不再依赖共享 outbox 2. Kafka 投递、消费与启动装配同步切换——更新 kafka config、consumer、envelope,接入服务级 topic 与 consumer group,并同步调整 mysql 初始化、start/main/router 装配,保证各服务 relay / consumer 独立装配 3. 业务事件处理器按服务归属重接新 bus——`active-scheduler` 触发链路,以及 `agent` / `memory` / `notification` / `task` 相关 outbox handler 统一切到新路由注册与服务目录,避免新流量回流共享表 4. 同步更新《微服务四步迁移与第二阶段并行开发计划》,把阶段 1 改成当前基线并补齐结构图、阶段快照、风险回退和多代理执行口径
This commit is contained in:
@@ -19,6 +19,8 @@ type Config struct {
|
||||
Brokers []string
|
||||
Topic string
|
||||
GroupID string
|
||||
// ServiceName 表示当前进程所属的 outbox 服务;为空时保持单体全量模式。
|
||||
ServiceName string
|
||||
// RetryScanInterval/RetryBatchSize/MaxRetry 作用于 outbox 扫描与失败重试。
|
||||
RetryScanInterval time.Duration
|
||||
RetryBatchSize int
|
||||
@@ -40,10 +42,14 @@ func LoadConfig() Config {
|
||||
Brokers: brokers,
|
||||
Topic: strings.TrimSpace(viper.GetString("kafka.topic")),
|
||||
GroupID: strings.TrimSpace(viper.GetString("kafka.groupID")),
|
||||
ServiceName: strings.TrimSpace(viper.GetString("outbox.serviceName")),
|
||||
RetryScanInterval: viper.GetDuration("kafka.retryScanInterval"),
|
||||
RetryBatchSize: viper.GetInt("kafka.retryBatchSize"),
|
||||
MaxRetry: viper.GetInt("kafka.maxRetry"),
|
||||
}
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = strings.TrimSpace(viper.GetString("kafka.serviceName"))
|
||||
}
|
||||
if cfg.Topic == "" {
|
||||
cfg.Topic = DefaultTopic
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package kafka
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
segmentkafka "github.com/segmentio/kafka-go"
|
||||
)
|
||||
@@ -13,7 +14,13 @@ type Consumer struct {
|
||||
|
||||
func NewConsumer(cfg Config) (*Consumer, error) {
|
||||
if len(cfg.Brokers) == 0 {
|
||||
return nil, errors.New("kafka brokers 未配置")
|
||||
return nil, errors.New("kafka brokers not configured")
|
||||
}
|
||||
if strings.TrimSpace(cfg.Topic) == "" {
|
||||
return nil, errors.New("kafka topic not configured")
|
||||
}
|
||||
if strings.TrimSpace(cfg.GroupID) == "" {
|
||||
return nil, errors.New("kafka groupID not configured")
|
||||
}
|
||||
reader := segmentkafka.NewReader(segmentkafka.ReaderConfig{
|
||||
Brokers: cfg.Brokers,
|
||||
@@ -30,14 +37,14 @@ func NewConsumer(cfg Config) (*Consumer, error) {
|
||||
// Dequeue 从 Kafka 拉取一条消息(不自动提交 offset)。
|
||||
func (c *Consumer) Dequeue(ctx context.Context) (segmentkafka.Message, error) {
|
||||
if c == nil || c.reader == nil {
|
||||
return segmentkafka.Message{}, errors.New("kafka consumer 未初始化")
|
||||
return segmentkafka.Message{}, errors.New("kafka consumer not initialized")
|
||||
}
|
||||
return c.reader.FetchMessage(ctx)
|
||||
}
|
||||
|
||||
func (c *Consumer) Commit(ctx context.Context, msg segmentkafka.Message) error {
|
||||
if c == nil || c.reader == nil {
|
||||
return errors.New("kafka consumer 未初始化")
|
||||
return errors.New("kafka consumer not initialized")
|
||||
}
|
||||
return c.reader.CommitMessages(ctx, msg)
|
||||
}
|
||||
|
||||
@@ -18,6 +18,8 @@ type Envelope struct {
|
||||
EventType string `json:"event_type"`
|
||||
// EventVersion 是事件版本号(默认 v1)。
|
||||
EventVersion string `json:"event_version,omitempty"`
|
||||
// ServiceName 是事件归属服务;空值通常表示旧兼容消息或全量模式。
|
||||
ServiceName string `json:"service_name,omitempty"`
|
||||
// AggregateID 是聚合主键(例如 conversation_id),用于追踪同一业务对象事件流。
|
||||
AggregateID string `json:"aggregate_id,omitempty"`
|
||||
|
||||
|
||||
@@ -40,19 +40,19 @@ type PublishRequest struct {
|
||||
Payload any
|
||||
}
|
||||
|
||||
// Engine 是 Outbox + Kafka 通用异步引擎。
|
||||
// Engine 是单个服务的 Outbox + Kafka 异步引擎。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责 outbox 扫描、kafka 投递、kafka 消费、状态机推进;
|
||||
// 1. 负责一个服务目录下的 outbox 扫描、Kafka 投递、Kafka 消费、状态机推进;
|
||||
// 2. 负责 event_type -> handler 路由;
|
||||
// 3. 不负责任何业务语义(业务由 handler 承担)。
|
||||
// 3. 不负责任何跨服务路由决策,跨服务分发由 EventBus 门面完成。
|
||||
type Engine struct {
|
||||
repo *Repository
|
||||
producer *kafkabus.Producer
|
||||
consumer *kafkabus.Consumer
|
||||
|
||||
brokers []string
|
||||
topic string
|
||||
route ServiceRoute
|
||||
maxRetry int
|
||||
scanEvery time.Duration
|
||||
scanBatch int
|
||||
@@ -61,11 +61,12 @@ type Engine struct {
|
||||
handlers map[string]MessageHandler
|
||||
}
|
||||
|
||||
// NewEngine 创建异步引擎。
|
||||
// NewEngine 创建单服务异步引擎。
|
||||
//
|
||||
// 规则:
|
||||
// 1. kafka.enabled=false 时返回 nil,调用方可降级同步;
|
||||
// 2. producer/consumer 任一步失败都会回收已创建资源。
|
||||
// 2. serviceName 非空时优先使用服务级默认目录,topic/group/table 不再沿用共享终态;
|
||||
// 3. producer/consumer 任一步失败都会回收已创建资源。
|
||||
func NewEngine(repo *Repository, cfg kafkabus.Config) (*Engine, error) {
|
||||
if !cfg.Enabled {
|
||||
return nil, nil
|
||||
@@ -74,6 +75,11 @@ func NewEngine(repo *Repository, cfg kafkabus.Config) (*Engine, error) {
|
||||
return nil, errors.New("outbox repository is nil")
|
||||
}
|
||||
|
||||
route := resolveEngineRoute(repo, cfg)
|
||||
cfg.Topic = route.Topic
|
||||
cfg.GroupID = route.GroupID
|
||||
|
||||
serviceRepo := repo.WithRoute(route)
|
||||
producer, err := kafkabus.NewProducer(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -85,11 +91,11 @@ func NewEngine(repo *Repository, cfg kafkabus.Config) (*Engine, error) {
|
||||
}
|
||||
|
||||
return &Engine{
|
||||
repo: repo,
|
||||
repo: serviceRepo,
|
||||
producer: producer,
|
||||
consumer: consumer,
|
||||
brokers: cfg.Brokers,
|
||||
topic: cfg.Topic,
|
||||
route: route,
|
||||
maxRetry: cfg.MaxRetry,
|
||||
scanEvery: cfg.RetryScanInterval,
|
||||
scanBatch: cfg.RetryBatchSize,
|
||||
@@ -118,7 +124,7 @@ func (e *Engine) RegisterEventHandler(eventType string, handler MessageHandler)
|
||||
e.handlersMu.Lock()
|
||||
defer e.handlersMu.Unlock()
|
||||
if _, exists := e.handlers[eventType]; exists {
|
||||
log.Printf("outbox handler 覆盖注册: event_type=%s", eventType)
|
||||
log.Printf("outbox handler 覆盖注册: service=%s event_type=%s", e.route.ServiceName, eventType)
|
||||
}
|
||||
e.handlers[eventType] = handler
|
||||
return nil
|
||||
@@ -137,11 +143,20 @@ func (e *Engine) Start(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("outbox engine starting: topic=%s brokers=%v retry_scan=%s batch=%d", e.topic, e.brokers, e.scanEvery, e.scanBatch)
|
||||
if err := kafkabus.WaitTopicReady(ctx, e.brokers, e.topic, 30*time.Second); err != nil {
|
||||
log.Printf(
|
||||
"outbox engine starting: service=%s table=%s topic=%s group=%s brokers=%v retry_scan=%s batch=%d",
|
||||
e.route.ServiceName,
|
||||
e.route.TableName,
|
||||
e.route.Topic,
|
||||
e.route.GroupID,
|
||||
e.brokers,
|
||||
e.scanEvery,
|
||||
e.scanBatch,
|
||||
)
|
||||
if err := kafkabus.WaitTopicReady(ctx, e.brokers, e.route.Topic, 30*time.Second); err != nil {
|
||||
log.Printf("Kafka topic not ready before consume loop start: %v", err)
|
||||
} else {
|
||||
log.Printf("Kafka topic is ready: %s", e.topic)
|
||||
log.Printf("Kafka topic is ready: %s", e.route.Topic)
|
||||
}
|
||||
|
||||
e.StartDispatch(ctx)
|
||||
@@ -149,11 +164,6 @@ func (e *Engine) Start(ctx context.Context) {
|
||||
}
|
||||
|
||||
// StartDispatch 单独启动 outbox -> Kafka 的投递循环。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责启动 dispatch 后台 goroutine,不负责启动 Kafka 消费;
|
||||
// 2. 不重复执行 Start 中的 topic readiness 等待,避免改变原 Start(ctx) 的启动语义;
|
||||
// 3. ctx 取消后由内部循环自行退出,调用方无需额外停止 goroutine。
|
||||
func (e *Engine) StartDispatch(ctx context.Context) {
|
||||
if e == nil {
|
||||
return
|
||||
@@ -162,11 +172,6 @@ func (e *Engine) StartDispatch(ctx context.Context) {
|
||||
}
|
||||
|
||||
// StartConsume 单独启动 Kafka -> handler 的消费循环。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责启动 consume 后台 goroutine,不负责扫描或投递 outbox;
|
||||
// 2. 不注册业务 handler,handler 仍由 RegisterEventHandler 显式注入;
|
||||
// 3. ctx 取消或 consumer 返回 context.Canceled 时,内部循环按既有逻辑退出。
|
||||
func (e *Engine) StartConsume(ctx context.Context) {
|
||||
if e == nil {
|
||||
return
|
||||
@@ -202,7 +207,7 @@ func (e *Engine) Enqueue(ctx context.Context, eventType, messageKey string, payl
|
||||
// 步骤:
|
||||
// 1. 标准化 event_type/version/key;
|
||||
// 2. payload 序列化;
|
||||
// 3. 写入 outbox(仅本地写库,不做 kafka 网络 IO)。
|
||||
// 3. 写入当前服务的 outbox 表,不再由调用方手传 topic。
|
||||
func (e *Engine) Publish(ctx context.Context, req PublishRequest) error {
|
||||
if e == nil {
|
||||
return errors.New("outbox engine is nil")
|
||||
@@ -227,7 +232,7 @@ func (e *Engine) Publish(ctx context.Context, req PublishRequest) error {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = e.repo.CreateMessage(ctx, eventType, e.topic, messageKey, OutboxEventPayload{
|
||||
_, err = e.repo.CreateMessage(ctx, eventType, messageKey, OutboxEventPayload{
|
||||
EventID: strings.TrimSpace(req.EventID),
|
||||
EventType: eventType,
|
||||
EventVersion: eventVersion,
|
||||
@@ -246,13 +251,13 @@ func (e *Engine) startDispatchLoop(ctx context.Context) {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
pendingMessages, err := e.repo.ListDueMessages(ctx, e.scanBatch)
|
||||
pendingMessages, err := e.repo.ListDueMessages(ctx, e.route.ServiceName, e.scanBatch)
|
||||
if err != nil {
|
||||
log.Printf("扫描 outbox 失败: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(pendingMessages) > 0 {
|
||||
log.Printf("outbox due messages=%d, start dispatch", len(pendingMessages))
|
||||
log.Printf("outbox due messages=%d, service=%s start dispatch", len(pendingMessages), e.route.ServiceName)
|
||||
}
|
||||
|
||||
for _, msg := range pendingMessages {
|
||||
@@ -287,18 +292,23 @@ func (e *Engine) dispatchOne(ctx context.Context, outboxID int64) error {
|
||||
if eventPayload.EventID == "" {
|
||||
eventPayload.EventID = strconv.FormatInt(outboxMsg.ID, 10)
|
||||
}
|
||||
serviceName := strings.TrimSpace(outboxMsg.ServiceName)
|
||||
if serviceName == "" {
|
||||
serviceName = e.route.ServiceName
|
||||
}
|
||||
|
||||
envelope := kafkabus.Envelope{
|
||||
OutboxID: outboxMsg.ID,
|
||||
EventID: eventPayload.EventID,
|
||||
EventType: eventPayload.EventType,
|
||||
EventVersion: eventPayload.EventVersion,
|
||||
ServiceName: serviceName,
|
||||
AggregateID: eventPayload.AggregateID,
|
||||
Payload: eventPayload.PayloadJSON,
|
||||
}
|
||||
raw, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 包装失败: "+err.Error())
|
||||
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 封装失败: "+err.Error())
|
||||
if markErr != nil {
|
||||
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
|
||||
}
|
||||
@@ -329,7 +339,7 @@ func (e *Engine) startConsumeLoop(ctx context.Context) {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
log.Printf("Kafka 消费拉取失败(topic=%s): %v", e.topic, err)
|
||||
log.Printf("Kafka 消费拉取失败(topic=%s): %v", e.route.Topic, err)
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
@@ -344,11 +354,11 @@ func (e *Engine) handleMessage(ctx context.Context, msg segmentkafka.Message) er
|
||||
var envelope kafkabus.Envelope
|
||||
if err := json.Unmarshal(msg.Value, &envelope); err != nil {
|
||||
_ = e.consumer.Commit(ctx, msg)
|
||||
return fmt.Errorf("解析 Kafka 包装失败: %w", err)
|
||||
return fmt.Errorf("解析 Kafka 封装失败: %w", err)
|
||||
}
|
||||
if envelope.OutboxID <= 0 {
|
||||
_ = e.consumer.Commit(ctx, msg)
|
||||
return errors.New("Kafka 包装缺少 outbox_id")
|
||||
return errors.New("Kafka 封装缺少 outbox_id")
|
||||
}
|
||||
|
||||
eventType := strings.TrimSpace(envelope.EventType)
|
||||
@@ -360,9 +370,36 @@ func (e *Engine) handleMessage(ctx context.Context, msg segmentkafka.Message) er
|
||||
return nil
|
||||
}
|
||||
|
||||
runtimeServiceName := strings.TrimSpace(e.route.ServiceName)
|
||||
if runtimeServiceName != "" {
|
||||
messageServiceName := strings.TrimSpace(envelope.ServiceName)
|
||||
if messageServiceName == "" {
|
||||
if resolvedServiceName, ok := ResolveEventService(eventType); ok {
|
||||
messageServiceName = resolvedServiceName
|
||||
}
|
||||
}
|
||||
if messageServiceName == "" || messageServiceName != runtimeServiceName {
|
||||
log.Printf(
|
||||
"跳过非本服务事件: runtime_service=%s message_service=%s event_type=%s outbox_id=%d",
|
||||
runtimeServiceName,
|
||||
messageServiceName,
|
||||
eventType,
|
||||
envelope.OutboxID,
|
||||
)
|
||||
if err := e.consumer.Commit(ctx, msg); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
handler, ok := e.getHandler(eventType)
|
||||
if !ok {
|
||||
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "未知事件类型: "+eventType)
|
||||
if runtimeServiceName == "" {
|
||||
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "未知事件类型: "+eventType)
|
||||
} else {
|
||||
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "本服务未注册 handler: "+eventType)
|
||||
}
|
||||
if err := e.consumer.Commit(ctx, msg); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -381,3 +418,51 @@ func (e *Engine) handleMessage(ctx context.Context, msg segmentkafka.Message) er
|
||||
|
||||
return e.consumer.Commit(ctx, msg)
|
||||
}
|
||||
|
||||
func resolveEngineRoute(repo *Repository, cfg kafkabus.Config) ServiceRoute {
|
||||
route := ServiceRoute{
|
||||
ServiceName: strings.TrimSpace(cfg.ServiceName),
|
||||
Topic: strings.TrimSpace(cfg.Topic),
|
||||
GroupID: strings.TrimSpace(cfg.GroupID),
|
||||
}
|
||||
if repo != nil {
|
||||
repoRoute := normalizeServiceRoute(repo.route)
|
||||
if route.ServiceName == "" {
|
||||
route.ServiceName = repoRoute.ServiceName
|
||||
}
|
||||
if route.TableName == "" {
|
||||
route.TableName = repoRoute.TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = repoRoute.Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = repoRoute.GroupID
|
||||
}
|
||||
}
|
||||
|
||||
if route.ServiceName != "" {
|
||||
defaultRoute := DefaultServiceRoute(route.ServiceName)
|
||||
if route.TableName == "" {
|
||||
route.TableName = defaultRoute.TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = defaultRoute.Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = defaultRoute.GroupID
|
||||
}
|
||||
return normalizeServiceRoute(route)
|
||||
}
|
||||
|
||||
if route.TableName == "" {
|
||||
route.TableName = DefaultServiceRoute(ServiceNameAgent).TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = kafkabus.DefaultTopic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = kafkabus.DefaultGroup
|
||||
}
|
||||
return normalizeServiceRoute(route)
|
||||
}
|
||||
|
||||
@@ -3,111 +3,175 @@ package outbox
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
|
||||
)
|
||||
|
||||
// EventPublisher 是通用事件发布能力接口。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只暴露“发布事件”这一件事,隐藏底层 outbox/kafka 实现细节;
|
||||
// 2. 业务层只依赖该接口,避免直接耦合具体引擎结构体;
|
||||
// 3. 该接口不承诺“立即消费成功”,只承诺“事件已入队或返回错误”。
|
||||
type EventPublisher interface {
|
||||
Publish(ctx context.Context, req PublishRequest) error
|
||||
}
|
||||
|
||||
// EventBus 是 outbox 异步总线的门面对象。
|
||||
// EventBus 是 outbox 多服务引擎的门面。
|
||||
//
|
||||
// 设计目的:
|
||||
// 1. 对外提供“发布 + 注册处理器 + 启停”三类最小能力;
|
||||
// 2. 对内复用 Engine,不重复实现状态机和调度逻辑;
|
||||
// 3. 为后续引入更多事件类型提供统一扩展点。
|
||||
// 职责边界:
|
||||
// 1. 对外只暴露“发布、注册 handler、启动、关闭”四类能力;
|
||||
// 2. 内部按事件归属把调用路由到对应 service engine;
|
||||
// 3. 不再把共享 topic 当主路径,服务级路由始终优先。
|
||||
type EventBus struct {
|
||||
engine *Engine
|
||||
repo *Repository
|
||||
cfg kafkabus.Config
|
||||
|
||||
mu sync.RWMutex
|
||||
engines map[string]*Engine
|
||||
}
|
||||
|
||||
// NewEventBus 创建通用事件总线。
|
||||
// NewEventBus 创建多服务事件门面。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 当 kafka.enabled=false 时返回 nil,调用方可直接降级为同步模式;
|
||||
// 2. 该方法只创建基础设施对象,不自动注册任何业务事件处理器;
|
||||
// 3. 业务事件处理器注册应由上层在启动阶段显式完成,避免隐式副作用。
|
||||
// 1. kafka.enabled=false 时返回 nil,调用方可直接降级;
|
||||
// 2. 实际 service engine 在需要时按服务目录懒加载;
|
||||
// 3. 懒加载不会改变既有事件契约,只是把物理资源拆到各自服务。
|
||||
func NewEventBus(repo *Repository, cfg kafkabus.Config) (*EventBus, error) {
|
||||
engine, err := NewEngine(repo, cfg)
|
||||
if !cfg.Enabled {
|
||||
return nil, nil
|
||||
}
|
||||
if repo == nil {
|
||||
return nil, errors.New("outbox repository is nil")
|
||||
}
|
||||
return &EventBus{
|
||||
repo: repo,
|
||||
cfg: cfg,
|
||||
engines: make(map[string]*Engine),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// RegisterEventHandler 注册事件处理器。
|
||||
func (b *EventBus) RegisterEventHandler(eventType string, handler MessageHandler) error {
|
||||
if b == nil {
|
||||
return errors.New("event bus is not initialized")
|
||||
}
|
||||
|
||||
route, err := b.routeForEvent(eventType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
engine, err := b.ensureEngine(route)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return engine.RegisterEventHandler(eventType, handler)
|
||||
}
|
||||
|
||||
// Publish 把事件路由到对应服务的 outbox 表与 Kafka 资源。
|
||||
func (b *EventBus) Publish(ctx context.Context, req PublishRequest) error {
|
||||
if b == nil {
|
||||
return errors.New("event bus is not initialized")
|
||||
}
|
||||
|
||||
route, err := b.routeForEvent(req.EventType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
engine, err := b.ensureEngine(route)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return engine.Publish(ctx, req)
|
||||
}
|
||||
|
||||
// Start 启动所有已创建的 service engine。
|
||||
func (b *EventBus) Start(ctx context.Context) {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
go engine.Start(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// StartDispatch 只启动所有已创建 engine 的 dispatch 循环。
|
||||
func (b *EventBus) StartDispatch(ctx context.Context) {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
go engine.StartDispatch(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// StartConsume 只启动所有已创建 engine 的消费循环。
|
||||
func (b *EventBus) StartConsume(ctx context.Context) {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
go engine.StartConsume(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// Close 关闭所有 service engine 的 Kafka 资源。
|
||||
func (b *EventBus) Close() {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
engine.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *EventBus) routeForEvent(eventType string) (ServiceRoute, error) {
|
||||
route, ok := ResolveEventRoute(eventType)
|
||||
if !ok {
|
||||
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", strings.TrimSpace(eventType))
|
||||
}
|
||||
return route, nil
|
||||
}
|
||||
|
||||
func (b *EventBus) ensureEngine(route ServiceRoute) (*Engine, error) {
|
||||
serviceName := route.ServiceName
|
||||
if serviceName == "" {
|
||||
return nil, errors.New("serviceName is empty")
|
||||
}
|
||||
|
||||
b.mu.RLock()
|
||||
if engine, ok := b.engines[serviceName]; ok {
|
||||
b.mu.RUnlock()
|
||||
return engine, nil
|
||||
}
|
||||
b.mu.RUnlock()
|
||||
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
if engine, ok := b.engines[serviceName]; ok {
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
cfg := b.cfg
|
||||
cfg.ServiceName = serviceName
|
||||
cfg.Topic = route.Topic
|
||||
cfg.GroupID = route.GroupID
|
||||
|
||||
engine, err := NewEngine(b.repo, cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if engine == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return &EventBus{engine: engine}, nil
|
||||
b.engines[serviceName] = engine
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
// RegisterEventHandler 注册事件处理器。
|
||||
//
|
||||
// 失败语义:
|
||||
// 1. bus 未初始化时直接返回错误;
|
||||
// 2. event_type 为空或 handler 为空时返回错误;
|
||||
// 3. 重复注册时采用“后者覆盖前者”并打日志(由 Engine 负责)。
|
||||
func (b *EventBus) RegisterEventHandler(eventType string, handler MessageHandler) error {
|
||||
if b == nil || b.engine == nil {
|
||||
return errors.New("event bus is not initialized")
|
||||
func (b *EventBus) snapshotEngines() []*Engine {
|
||||
b.mu.RLock()
|
||||
defer b.mu.RUnlock()
|
||||
engines := make([]*Engine, 0, len(b.engines))
|
||||
for _, engine := range b.engines {
|
||||
engines = append(engines, engine)
|
||||
}
|
||||
return b.engine.RegisterEventHandler(eventType, handler)
|
||||
}
|
||||
|
||||
// Publish 发布事件到 outbox 队列。
|
||||
//
|
||||
// 关键语义:
|
||||
// 1. 返回 nil 仅表示“已写入 outbox 成功”;
|
||||
// 2. 真正 Kafka 投递与业务消费由后台异步循环完成;
|
||||
// 3. 若返回 error,表示本次入队失败,调用方应按业务策略决定是否重试/降级。
|
||||
func (b *EventBus) Publish(ctx context.Context, req PublishRequest) error {
|
||||
if b == nil || b.engine == nil {
|
||||
return errors.New("event bus is not initialized")
|
||||
}
|
||||
return b.engine.Publish(ctx, req)
|
||||
}
|
||||
|
||||
// Start 启动事件总线后台循环(dispatch + consume)。
|
||||
func (b *EventBus) Start(ctx context.Context) {
|
||||
if b == nil || b.engine == nil {
|
||||
return
|
||||
}
|
||||
b.engine.Start(ctx)
|
||||
}
|
||||
|
||||
// StartDispatch 单独启动事件总线的 outbox 投递循环。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只暴露 relay/dispatch 运行职责,便于独立进程只负责投递;
|
||||
// 2. 不启动消费循环,避免与独立 consumer 进程争抢职责;
|
||||
// 3. 不改变 Start(ctx) 的既有组合启动行为。
|
||||
func (b *EventBus) StartDispatch(ctx context.Context) {
|
||||
if b == nil || b.engine == nil {
|
||||
return
|
||||
}
|
||||
b.engine.StartDispatch(ctx)
|
||||
}
|
||||
|
||||
// StartConsume 单独启动事件总线的 Kafka 消费循环。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只暴露 consumer 运行职责,便于独立进程只负责消费;
|
||||
// 2. 不扫描 outbox、不投递 Kafka,状态推进仍复用 Engine 既有逻辑;
|
||||
// 3. handler 注册仍由调用方在启动前显式完成。
|
||||
func (b *EventBus) StartConsume(ctx context.Context) {
|
||||
if b == nil || b.engine == nil {
|
||||
return
|
||||
}
|
||||
b.engine.StartConsume(ctx)
|
||||
}
|
||||
|
||||
// Close 关闭事件总线资源(producer/consumer)。
|
||||
func (b *EventBus) Close() {
|
||||
if b == nil || b.engine == nil {
|
||||
return
|
||||
}
|
||||
b.engine.Close()
|
||||
return engines
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
@@ -15,10 +17,11 @@ import (
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只负责 outbox 状态流转与通用事务编排;
|
||||
// 2. 不负责任何业务语义(例如聊天/任务/标题等具体落库);
|
||||
// 3. 消费成功时通过回调把业务动作注入同一事务,保证原子一致。
|
||||
// 2. 不负责聊天、任务、通知等具体业务语义;
|
||||
// 3. 同一仓储实例只面向一个服务级 outbox 目录,避免把共享表当成终态。
|
||||
type Repository struct {
|
||||
db *gorm.DB
|
||||
db *gorm.DB
|
||||
route ServiceRoute
|
||||
}
|
||||
|
||||
func NewRepository(db *gorm.DB) *Repository {
|
||||
@@ -27,20 +30,51 @@ func NewRepository(db *gorm.DB) *Repository {
|
||||
|
||||
// WithTx 用外部事务句柄构造同事务仓储实例。
|
||||
func (d *Repository) WithTx(tx *gorm.DB) *Repository {
|
||||
return &Repository{db: tx}
|
||||
if d == nil {
|
||||
return &Repository{db: tx}
|
||||
}
|
||||
return &Repository{db: tx, route: d.route}
|
||||
}
|
||||
|
||||
// CreateMessage 把事件写入 outbox(入队)。
|
||||
// WithRoute 用指定服务目录构造服务级仓储。
|
||||
//
|
||||
// 步骤:
|
||||
// 1. 序列化 payload;
|
||||
// 2. 初始化 pending 状态;
|
||||
// 3. 写入 outbox 并返回 outbox_id。
|
||||
func (d *Repository) CreateMessage(ctx context.Context, eventType, topic, messageKey string, payload any, maxRetry int) (int64, error) {
|
||||
// 职责边界:
|
||||
// 1. 只切换 outbox 物理目录,不改变事务句柄;
|
||||
// 2. 适合多个 service engine 共享同一 DB 连接;
|
||||
// 3. 保留 route 的 table/topic/group,避免回落到共享 topic。
|
||||
func (d *Repository) WithRoute(route ServiceRoute) *Repository {
|
||||
route = normalizeServiceRoute(route)
|
||||
if d == nil {
|
||||
return &Repository{route: route}
|
||||
}
|
||||
return &Repository{db: d.db, route: route}
|
||||
}
|
||||
|
||||
// CreateMessage 把事件写入 outbox。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只接受 eventType、messageKey、payload 和 maxRetry,不再允许业务侧显式传 topic;
|
||||
// 2. table/topic/group 统一由 eventType -> service -> route 解析,确保服务级路由是唯一入口;
|
||||
// 3. eventType 未注册时直接返回 error,避免消息静默落到默认表或默认 topic。
|
||||
func (d *Repository) CreateMessage(ctx context.Context, eventType string, messageKey string, payload any, maxRetry int) (int64, error) {
|
||||
if d == nil || d.db == nil {
|
||||
return 0, errors.New("outbox repository is nil")
|
||||
}
|
||||
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return 0, errors.New("eventType is empty")
|
||||
}
|
||||
messageKey = strings.TrimSpace(messageKey)
|
||||
if maxRetry <= 0 {
|
||||
maxRetry = 20
|
||||
}
|
||||
|
||||
route, err := d.resolvePublishRoute(eventType)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
raw, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -49,7 +83,8 @@ func (d *Repository) CreateMessage(ctx context.Context, eventType, topic, messag
|
||||
now := time.Now()
|
||||
msg := model.AgentOutboxMessage{
|
||||
EventType: eventType,
|
||||
Topic: topic,
|
||||
ServiceName: route.ServiceName,
|
||||
Topic: route.Topic,
|
||||
MessageKey: messageKey,
|
||||
Payload: string(raw),
|
||||
Status: model.OutboxStatusPending,
|
||||
@@ -58,39 +93,48 @@ func (d *Repository) CreateMessage(ctx context.Context, eventType, topic, messag
|
||||
NextRetryAt: &now,
|
||||
}
|
||||
|
||||
if err = d.db.WithContext(ctx).Create(&msg).Error; err != nil {
|
||||
if err = d.db.WithContext(ctx).Table(route.TableName).Create(&msg).Error; err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return msg.ID, nil
|
||||
}
|
||||
|
||||
// GetByID 从当前仓储绑定的 outbox 表读取指定消息。
|
||||
func (d *Repository) GetByID(ctx context.Context, id int64) (*model.AgentOutboxMessage, error) {
|
||||
var msg model.AgentOutboxMessage
|
||||
if err := d.db.WithContext(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
|
||||
if err := d.scopedDB(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &msg, nil
|
||||
}
|
||||
|
||||
// ListDueMessages 拉取到期可投递消息。
|
||||
func (d *Repository) ListDueMessages(ctx context.Context, limit int) ([]model.AgentOutboxMessage, error) {
|
||||
//
|
||||
// 说明:
|
||||
// 1. serviceName 为空时保持当前仓储目录内的扫描语义;
|
||||
// 2. serviceName 非空时只扫描对应服务的消息;
|
||||
// 3. 这样既能支持单服务 relay,也能支持后续多服务 relay。
|
||||
func (d *Repository) ListDueMessages(ctx context.Context, serviceName string, limit int) ([]model.AgentOutboxMessage, error) {
|
||||
if limit <= 0 {
|
||||
limit = 100
|
||||
}
|
||||
now := time.Now()
|
||||
var messages []model.AgentOutboxMessage
|
||||
err := d.db.WithContext(ctx).
|
||||
query := d.scopedDB(ctx).
|
||||
Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.OutboxStatusPending, now).
|
||||
Order("next_retry_at ASC, id ASC").
|
||||
Limit(limit).
|
||||
Find(&messages).Error
|
||||
if err != nil {
|
||||
Limit(limit)
|
||||
serviceName = strings.TrimSpace(serviceName)
|
||||
if serviceName != "" {
|
||||
query = query.Where("service_name = ?", serviceName)
|
||||
}
|
||||
if err := query.Find(&messages).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return messages, nil
|
||||
}
|
||||
|
||||
// MarkPublished 标记为已投递 Kafka。
|
||||
// MarkPublished 标记消息已成功投递到 Kafka。
|
||||
func (d *Repository) MarkPublished(ctx context.Context, id int64) error {
|
||||
now := time.Now()
|
||||
updates := map[string]interface{}{
|
||||
@@ -99,14 +143,14 @@ func (d *Repository) MarkPublished(ctx context.Context, id int64) error {
|
||||
"last_error": nil,
|
||||
"next_retry_at": nil,
|
||||
}
|
||||
result := d.db.WithContext(ctx).
|
||||
result := d.scopedDB(ctx).
|
||||
Model(&model.AgentOutboxMessage{}).
|
||||
Where("id = ? AND status NOT IN (?, ?)", id, model.OutboxStatusConsumed, model.OutboxStatusDead).
|
||||
Updates(updates)
|
||||
return result.Error
|
||||
}
|
||||
|
||||
// MarkDead 标记为死信。
|
||||
// MarkDead 把消息标记为死信。
|
||||
func (d *Repository) MarkDead(ctx context.Context, id int64, reason string) error {
|
||||
now := time.Now()
|
||||
lastErr := truncateError(reason)
|
||||
@@ -116,21 +160,20 @@ func (d *Repository) MarkDead(ctx context.Context, id int64, reason string) erro
|
||||
"next_retry_at": nil,
|
||||
"updated_at": now,
|
||||
}
|
||||
return d.db.WithContext(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
}
|
||||
|
||||
// MarkFailedForRetry 记录一次可重试失败并推进重试窗口。
|
||||
//
|
||||
// 步骤:
|
||||
// 1. 行级锁读取当前状态;
|
||||
// 2. 最终态幂等短路;
|
||||
// 3. retry_count+1;
|
||||
// 4. 计算 next_retry_at 或 dead;
|
||||
// 5. 写回状态快照。
|
||||
// 1. 行级锁读取当前消息状态;
|
||||
// 2. 已进入 consumed/dead 时幂等短路;
|
||||
// 3. retry_count+1,并根据最大次数决定继续 pending 还是转 dead;
|
||||
// 4. 写回 last_error 和 next_retry_at,交给下一轮扫描继续投递。
|
||||
func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason string) error {
|
||||
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||||
var msg model.AgentOutboxMessage
|
||||
err := tx.Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
|
||||
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -159,7 +202,7 @@ func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason st
|
||||
"next_retry_at": nextRetryAt,
|
||||
"updated_at": now,
|
||||
}
|
||||
return tx.Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
})
|
||||
}
|
||||
|
||||
@@ -167,13 +210,13 @@ func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason st
|
||||
//
|
||||
// 步骤:
|
||||
// 1. 事务内锁定 outbox 记录;
|
||||
// 2. 已 consumed/dead 时幂等返回;
|
||||
// 3. 执行业务回调 fn(tx);
|
||||
// 2. consumed/dead 状态幂等返回;
|
||||
// 3. 执行业务回调 fn(tx),让业务落库和 outbox 状态共用同一事务;
|
||||
// 4. 业务成功后统一标记 consumed。
|
||||
func (d *Repository) ConsumeAndMarkConsumed(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
|
||||
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||||
var outboxMsg model.AgentOutboxMessage
|
||||
err := tx.Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
|
||||
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
|
||||
if err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil
|
||||
@@ -198,10 +241,46 @@ func (d *Repository) ConsumeAndMarkConsumed(ctx context.Context, outboxID int64,
|
||||
"next_retry_at": nil,
|
||||
"updated_at": now,
|
||||
}
|
||||
return tx.Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
|
||||
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Repository) scopedDB(ctx context.Context) *gorm.DB {
|
||||
return d.db.WithContext(ctx).Table(d.tableName())
|
||||
}
|
||||
|
||||
func (d *Repository) tableName() string {
|
||||
if d == nil {
|
||||
return DefaultServiceRoute(ServiceNameAgent).TableName
|
||||
}
|
||||
|
||||
route := normalizeServiceRoute(d.route)
|
||||
if route.TableName != "" {
|
||||
return route.TableName
|
||||
}
|
||||
return DefaultServiceRoute(ServiceNameAgent).TableName
|
||||
}
|
||||
|
||||
func (d *Repository) resolvePublishRoute(eventType string) (ServiceRoute, error) {
|
||||
if d == nil {
|
||||
return ServiceRoute{}, errors.New("outbox repository is nil")
|
||||
}
|
||||
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return ServiceRoute{}, errors.New("eventType is empty")
|
||||
}
|
||||
|
||||
route, ok := ResolveEventRoute(eventType)
|
||||
if !ok {
|
||||
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", eventType)
|
||||
}
|
||||
if d.route.ServiceName != "" && route.ServiceName != d.route.ServiceName {
|
||||
return ServiceRoute{}, fmt.Errorf("eventType %s belongs to service %s, current repo service %s", eventType, route.ServiceName, d.route.ServiceName)
|
||||
}
|
||||
return normalizeServiceRoute(route), nil
|
||||
}
|
||||
|
||||
func calcRetryBackoff(retryCount int) time.Duration {
|
||||
if retryCount <= 0 {
|
||||
return time.Second
|
||||
|
||||
136
backend/infra/outbox/route_registry.go
Normal file
136
backend/infra/outbox/route_registry.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var outboxRouteRegistry = struct {
|
||||
sync.RWMutex
|
||||
eventToService map[string]string
|
||||
serviceRoutes map[string]ServiceRoute
|
||||
}{
|
||||
eventToService: make(map[string]string),
|
||||
serviceRoutes: make(map[string]ServiceRoute),
|
||||
}
|
||||
|
||||
// RegisterServiceRoute 注册或覆盖某个服务的物理 outbox 路由。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只登记“服务 -> table/topic/group”目录,不登记事件归属;
|
||||
// 2. 同服务重复注册时以后者覆盖前者,方便显式配置覆盖默认目录;
|
||||
// 3. 空服务名直接报错,避免把共享 topic 误当成新终态。
|
||||
func RegisterServiceRoute(route ServiceRoute) error {
|
||||
route = normalizeServiceRoute(route)
|
||||
if route.ServiceName == "" {
|
||||
return errors.New("serviceName is empty")
|
||||
}
|
||||
|
||||
outboxRouteRegistry.Lock()
|
||||
defer outboxRouteRegistry.Unlock()
|
||||
|
||||
outboxRouteRegistry.serviceRoutes[route.ServiceName] = route
|
||||
return nil
|
||||
}
|
||||
|
||||
// RegisterEventService 记录“事件类型 -> 服务归属”的全局路由。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只登记跨进程都要识别的事件归属,不承载 handler 逻辑;
|
||||
// 2. 同一 event_type 只能归属一个服务,重复登记同值视为幂等;
|
||||
// 3. 若该服务还没有显式路由,则先写入默认服务目录,保证后续能查到 table/topic/group。
|
||||
func RegisterEventService(eventType, serviceName string) error {
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return errors.New("eventType is empty")
|
||||
}
|
||||
serviceName = normalizeServiceName(serviceName)
|
||||
if serviceName == "" {
|
||||
return errors.New("serviceName is empty")
|
||||
}
|
||||
|
||||
outboxRouteRegistry.Lock()
|
||||
defer outboxRouteRegistry.Unlock()
|
||||
|
||||
if existing, ok := outboxRouteRegistry.eventToService[eventType]; ok {
|
||||
if existing != serviceName {
|
||||
return fmt.Errorf("eventType %s already registered to service %s", eventType, existing)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
outboxRouteRegistry.eventToService[eventType] = serviceName
|
||||
return nil
|
||||
}
|
||||
|
||||
// ResolveEventService 查询某个事件类型的归属服务。
|
||||
//
|
||||
// 返回值说明:
|
||||
// 1. serviceName 为登记结果;
|
||||
// 2. ok=false 表示当前路由表里还没有这个事件类型的归属信息。
|
||||
func ResolveEventService(eventType string) (serviceName string, ok bool) {
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
outboxRouteRegistry.RLock()
|
||||
defer outboxRouteRegistry.RUnlock()
|
||||
|
||||
serviceName, ok = outboxRouteRegistry.eventToService[eventType]
|
||||
return serviceName, ok
|
||||
}
|
||||
|
||||
// ResolveServiceRoute 查询某个服务的物理 outbox 配置。
|
||||
//
|
||||
// 返回值说明:
|
||||
// 1. route 始终返回一个可执行的目录结果,未显式注册时回退默认目录;
|
||||
// 2. ok=true 表示命中显式注册目录,ok=false 表示走默认目录;
|
||||
// 3. 这样既能支持显式配置覆盖,也能让基础设施在启动初期就有稳定默认值。
|
||||
func ResolveServiceRoute(serviceName string) (route ServiceRoute, ok bool) {
|
||||
serviceName = normalizeServiceName(serviceName)
|
||||
if serviceName == "" {
|
||||
return DefaultServiceRoute(""), false
|
||||
}
|
||||
|
||||
outboxRouteRegistry.RLock()
|
||||
route, ok = outboxRouteRegistry.serviceRoutes[serviceName]
|
||||
outboxRouteRegistry.RUnlock()
|
||||
if ok {
|
||||
return normalizeServiceRoute(route), true
|
||||
}
|
||||
if route, ok = configuredServiceRoute(serviceName); ok {
|
||||
return route, true
|
||||
}
|
||||
return DefaultServiceRoute(serviceName), false
|
||||
}
|
||||
|
||||
// ResolveEventRoute 先按事件查服务,再按服务查物理目录。
|
||||
//
|
||||
// 返回值说明:
|
||||
// 1. route 包含事件所在服务的 table/topic/group;
|
||||
// 2. ok=true 只表示“事件 -> 服务归属”已登记;
|
||||
// 3. 服务目录若未显式注册,会自动回退到默认目录。
|
||||
func ResolveEventRoute(eventType string) (route ServiceRoute, ok bool) {
|
||||
serviceName, ok := ResolveEventService(eventType)
|
||||
if !ok {
|
||||
return ServiceRoute{}, false
|
||||
}
|
||||
route, _ = ResolveServiceRoute(serviceName)
|
||||
return route, true
|
||||
}
|
||||
|
||||
func configuredServiceRoute(serviceName string) (ServiceRoute, bool) {
|
||||
cfg, ok := ResolveServiceConfig(serviceName)
|
||||
if !ok {
|
||||
return ServiceRoute{}, false
|
||||
}
|
||||
return normalizeServiceRoute(ServiceRoute{
|
||||
ServiceName: cfg.Name,
|
||||
TableName: cfg.TableName,
|
||||
Topic: cfg.Topic,
|
||||
GroupID: cfg.GroupID,
|
||||
}), true
|
||||
}
|
||||
168
backend/infra/outbox/service_catalog.go
Normal file
168
backend/infra/outbox/service_catalog.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
const (
|
||||
ServiceAgent = "agent"
|
||||
ServiceTask = "task"
|
||||
ServiceMemory = "memory"
|
||||
ServiceActiveScheduler = "active-scheduler"
|
||||
ServiceNotification = "notification"
|
||||
)
|
||||
|
||||
// ServiceConfig 描述一个服务级 outbox 的固定归属。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只描述“事件属于哪个服务、写哪张表、发哪个 topic、用哪个 group”。
|
||||
// 2. 不承载具体业务 handler,也不承载 Kafka 消息体格式。
|
||||
// 3. 服务级写入、扫描和消费都应从这里读取同一份映射,避免配置漂移。
|
||||
type ServiceConfig struct {
|
||||
Name string
|
||||
Topic string
|
||||
GroupID string
|
||||
TableName string
|
||||
}
|
||||
|
||||
var serviceCatalogCache = struct {
|
||||
sync.RWMutex
|
||||
loaded bool
|
||||
entries map[string]ServiceConfig
|
||||
}{
|
||||
entries: make(map[string]ServiceConfig),
|
||||
}
|
||||
|
||||
// LoadServiceConfigs 读取服务级 outbox 目录。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 先给出默认终态映射,再允许通过配置中心覆盖 topic/groupID/table;
|
||||
// 2. 该目录只负责服务级 outbox 基础设施,不混入业务逻辑;
|
||||
// 3. 若某个服务配置缺失,直接使用默认值,避免启动期因为非关键配置崩掉。
|
||||
func LoadServiceConfigs() map[string]ServiceConfig {
|
||||
serviceCatalogCache.Lock()
|
||||
defer serviceCatalogCache.Unlock()
|
||||
|
||||
if serviceCatalogCache.loaded {
|
||||
return cloneServiceConfigs(serviceCatalogCache.entries)
|
||||
}
|
||||
|
||||
entries := map[string]ServiceConfig{
|
||||
ServiceAgent: {
|
||||
Name: ServiceAgent,
|
||||
Topic: "smartflow.agent.outbox",
|
||||
GroupID: "smartflow-agent-outbox-consumer",
|
||||
TableName: "agent_outbox_messages",
|
||||
},
|
||||
ServiceTask: {
|
||||
Name: ServiceTask,
|
||||
Topic: "smartflow.task.outbox",
|
||||
GroupID: "smartflow-task-outbox-consumer",
|
||||
TableName: "task_outbox_messages",
|
||||
},
|
||||
ServiceMemory: {
|
||||
Name: ServiceMemory,
|
||||
Topic: "smartflow.memory.outbox",
|
||||
GroupID: "smartflow-memory-outbox-consumer",
|
||||
TableName: "memory_outbox_messages",
|
||||
},
|
||||
ServiceActiveScheduler: {
|
||||
Name: ServiceActiveScheduler,
|
||||
Topic: "smartflow.active-scheduler.outbox",
|
||||
GroupID: "smartflow-active-scheduler-outbox-consumer",
|
||||
TableName: "active_scheduler_outbox_messages",
|
||||
},
|
||||
ServiceNotification: {
|
||||
Name: ServiceNotification,
|
||||
Topic: "smartflow.notification.outbox",
|
||||
GroupID: "smartflow-notification-outbox-consumer",
|
||||
TableName: "notification_outbox_messages",
|
||||
},
|
||||
}
|
||||
|
||||
for name, entry := range entries {
|
||||
entries[name] = overrideServiceConfig(entry)
|
||||
}
|
||||
|
||||
serviceCatalogCache.entries = entries
|
||||
serviceCatalogCache.loaded = true
|
||||
return cloneServiceConfigs(entries)
|
||||
}
|
||||
|
||||
// ResolveServiceConfig 查询某个服务的 outbox 目录。
|
||||
func ResolveServiceConfig(serviceName string) (ServiceConfig, bool) {
|
||||
serviceName = strings.TrimSpace(serviceName)
|
||||
if serviceName == "" {
|
||||
return ServiceConfig{}, false
|
||||
}
|
||||
|
||||
entries := LoadServiceConfigs()
|
||||
cfg, ok := entries[serviceName]
|
||||
return cfg, ok
|
||||
}
|
||||
|
||||
// ResolveEventServiceConfig 先解析事件归属服务,再返回该服务的 outbox 目录。
|
||||
func ResolveEventServiceConfig(eventType string) (ServiceConfig, bool) {
|
||||
serviceName, ok := ResolveEventService(eventType)
|
||||
if !ok {
|
||||
return ServiceConfig{}, false
|
||||
}
|
||||
return ResolveServiceConfig(serviceName)
|
||||
}
|
||||
|
||||
// ServiceTables 返回当前目录中的所有 outbox 表名。
|
||||
func ServiceTables() []string {
|
||||
entries := LoadServiceConfigs()
|
||||
tables := make([]string, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
tables = append(tables, entry.TableName)
|
||||
}
|
||||
sort.Strings(tables)
|
||||
return tables
|
||||
}
|
||||
|
||||
// ServiceNames 返回当前目录中的所有服务名。
|
||||
func ServiceNames() []string {
|
||||
entries := LoadServiceConfigs()
|
||||
names := make([]string, 0, len(entries))
|
||||
for name := range entries {
|
||||
names = append(names, name)
|
||||
}
|
||||
sort.Strings(names)
|
||||
return names
|
||||
}
|
||||
|
||||
func overrideServiceConfig(entry ServiceConfig) ServiceConfig {
|
||||
upperName := strings.TrimSpace(entry.Name)
|
||||
if upperName == "" {
|
||||
return entry
|
||||
}
|
||||
|
||||
topicKey := fmt.Sprintf("outbox.services.%s.topic", upperName)
|
||||
groupKey := fmt.Sprintf("outbox.services.%s.groupID", upperName)
|
||||
tableKey := fmt.Sprintf("outbox.services.%s.table", upperName)
|
||||
|
||||
if topic := strings.TrimSpace(viper.GetString(topicKey)); topic != "" {
|
||||
entry.Topic = topic
|
||||
}
|
||||
if groupID := strings.TrimSpace(viper.GetString(groupKey)); groupID != "" {
|
||||
entry.GroupID = groupID
|
||||
}
|
||||
if tableName := strings.TrimSpace(viper.GetString(tableKey)); tableName != "" {
|
||||
entry.TableName = tableName
|
||||
}
|
||||
return entry
|
||||
}
|
||||
|
||||
func cloneServiceConfigs(entries map[string]ServiceConfig) map[string]ServiceConfig {
|
||||
cloned := make(map[string]ServiceConfig, len(entries))
|
||||
for name, entry := range entries {
|
||||
cloned[name] = entry
|
||||
}
|
||||
return cloned
|
||||
}
|
||||
145
backend/infra/outbox/service_route.go
Normal file
145
backend/infra/outbox/service_route.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
ServiceNameAgent = "agent"
|
||||
ServiceNameTask = "task"
|
||||
ServiceNameMemory = "memory"
|
||||
ServiceNameActiveScheduler = "active-scheduler"
|
||||
ServiceNameNotification = "notification"
|
||||
)
|
||||
|
||||
// ServiceRoute 描述一个 outbox 服务的终态路由信息。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只承载服务级 outbox 的 table/topic/group 目录信息;
|
||||
// 2. 不承载 handler、事务或 Kafka 连接对象;
|
||||
// 3. 允许上层按事件类型先查服务,再由服务查到自己的物理资源。
|
||||
type ServiceRoute struct {
|
||||
ServiceName string
|
||||
TableName string
|
||||
Topic string
|
||||
GroupID string
|
||||
}
|
||||
|
||||
var builtinServiceRoutes = map[string]ServiceRoute{
|
||||
ServiceNameAgent: {
|
||||
ServiceName: ServiceNameAgent,
|
||||
TableName: "agent_outbox_messages",
|
||||
Topic: "smartflow.agent.outbox",
|
||||
GroupID: "smartflow-agent-outbox-consumer",
|
||||
},
|
||||
ServiceNameTask: {
|
||||
ServiceName: ServiceNameTask,
|
||||
TableName: "task_outbox_messages",
|
||||
Topic: "smartflow.task.outbox",
|
||||
GroupID: "smartflow-task-outbox-consumer",
|
||||
},
|
||||
ServiceNameMemory: {
|
||||
ServiceName: ServiceNameMemory,
|
||||
TableName: "memory_outbox_messages",
|
||||
Topic: "smartflow.memory.outbox",
|
||||
GroupID: "smartflow-memory-outbox-consumer",
|
||||
},
|
||||
ServiceNameActiveScheduler: {
|
||||
ServiceName: ServiceNameActiveScheduler,
|
||||
TableName: "active_scheduler_outbox_messages",
|
||||
Topic: "smartflow.active-scheduler.outbox",
|
||||
GroupID: "smartflow-active-scheduler-outbox-consumer",
|
||||
},
|
||||
ServiceNameNotification: {
|
||||
ServiceName: ServiceNameNotification,
|
||||
TableName: "notification_outbox_messages",
|
||||
Topic: "smartflow.notification.outbox",
|
||||
GroupID: "smartflow-notification-outbox-consumer",
|
||||
},
|
||||
}
|
||||
|
||||
// DefaultServiceRoutes 返回当前已知服务的默认路由清单。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 这里是“目录初始值”,用于自动建表和首次注册时兜底;
|
||||
// 2. 运行时若显式注册了服务路由,会以显式注册结果为准;
|
||||
// 3. 返回值是拷贝,调用方可安全遍历,不会污染全局目录。
|
||||
func DefaultServiceRoutes() []ServiceRoute {
|
||||
return []ServiceRoute{
|
||||
builtinServiceRoutes[ServiceNameAgent],
|
||||
builtinServiceRoutes[ServiceNameTask],
|
||||
builtinServiceRoutes[ServiceNameMemory],
|
||||
builtinServiceRoutes[ServiceNameActiveScheduler],
|
||||
builtinServiceRoutes[ServiceNameNotification],
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultServiceRoute 根据服务名生成终态路由。
|
||||
//
|
||||
// 规则:
|
||||
// 1. 已知服务直接返回约定映射;
|
||||
// 2. 未知服务按命名约定生成 table/topic/group,避免继续落回共享 topic;
|
||||
// 3. 空服务名回退到 agent 兼容路径,保住历史单体模式。
|
||||
func DefaultServiceRoute(serviceName string) ServiceRoute {
|
||||
serviceName = normalizeServiceName(serviceName)
|
||||
if serviceName == "" {
|
||||
serviceName = ServiceNameAgent
|
||||
}
|
||||
if route, ok := builtinServiceRoutes[serviceName]; ok {
|
||||
return route
|
||||
}
|
||||
|
||||
tablePrefix := strings.NewReplacer("-", "_").Replace(serviceName)
|
||||
if tablePrefix == "" {
|
||||
tablePrefix = ServiceNameAgent
|
||||
}
|
||||
|
||||
return ServiceRoute{
|
||||
ServiceName: serviceName,
|
||||
TableName: tablePrefix + "_outbox_messages",
|
||||
Topic: "smartflow." + serviceName + ".outbox",
|
||||
GroupID: "smartflow-" + serviceName + "-outbox-consumer",
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeServiceName(serviceName string) string {
|
||||
return strings.TrimSpace(serviceName)
|
||||
}
|
||||
|
||||
// normalizeServiceRoute 把空字段补成可执行的默认值。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 只做字符串裁剪和缺省补齐,不做注册副作用;
|
||||
// 2. 服务名为空时只保留历史兼容路径,不强行把它当成新服务;
|
||||
// 3. 这一步是 route 目录的最后一道兜底,避免上层拿到半成品路由。
|
||||
func normalizeServiceRoute(route ServiceRoute) ServiceRoute {
|
||||
route.ServiceName = normalizeServiceName(route.ServiceName)
|
||||
route.TableName = strings.TrimSpace(route.TableName)
|
||||
route.Topic = strings.TrimSpace(route.Topic)
|
||||
route.GroupID = strings.TrimSpace(route.GroupID)
|
||||
|
||||
if route.ServiceName == "" {
|
||||
if route.TableName == "" {
|
||||
route.TableName = builtinServiceRoutes[ServiceNameAgent].TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = builtinServiceRoutes[ServiceNameAgent].Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = builtinServiceRoutes[ServiceNameAgent].GroupID
|
||||
}
|
||||
return route
|
||||
}
|
||||
|
||||
defaultRoute := DefaultServiceRoute(route.ServiceName)
|
||||
if route.TableName == "" {
|
||||
route.TableName = defaultRoute.TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = defaultRoute.Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = defaultRoute.GroupID
|
||||
}
|
||||
return route
|
||||
}
|
||||
Reference in New Issue
Block a user