Version: 0.9.77.dev.260505
后端:
1.阶段 6 CP4/CP5 目录收口与共享边界纯化
- 将 backend 根目录收口为 services、client、gateway、cmd、shared 五个一级目录
- 收拢 bootstrap、inits、infra/kafka、infra/outbox、conv、respond、pkg、middleware,移除根目录旧实现与空目录
- 将 utils 下沉到 services/userauth/internal/auth,将 logic 下沉到 services/schedule/core/planning
- 将迁移期 runtime 桥接实现统一收拢到 services/runtime/{conv,dao,eventsvc,model},删除 shared/legacy 与未再被 import 的旧 service 实现
- 将 gateway/shared/respond 收口为 HTTP/Gin 错误写回适配,shared/respond 仅保留共享错误语义与状态映射
- 将 HTTP IdempotencyMiddleware 与 RateLimitMiddleware 收口到 gateway/middleware
- 将 GormCachePlugin 下沉到 shared/infra/gormcache,将共享 RateLimiter 下沉到 shared/infra/ratelimit,将 agent token budget 下沉到 services/agent/shared
- 删除 InitEino 兼容壳,收缩 cmd/internal/coreinit 仅保留旧组合壳残留域初始化语义
- 更新微服务迁移计划与桌面 checklist,补齐 CP4/CP5 当前切流点、目录终态与验证结果
- 完成 go test ./...、git diff --check 与最终真实 smoke;health、register/login、task/create+get、schedule/today、task-class/list、memory/items、agent chat/meta/timeline/context-stats 全部 200,SSE 合并结果为 CP5_OK 且 [DONE] 只有 1 个
This commit is contained in:
487
backend/shared/infra/outbox/engine.go
Normal file
487
backend/shared/infra/outbox/engine.go
Normal file
@@ -0,0 +1,487 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
|
||||
kafkabus "github.com/LoveLosita/smartflow/backend/shared/infra/kafka"
|
||||
segmentkafka "github.com/segmentio/kafka-go"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
const defaultDispatchTimeout = 10 * time.Second
|
||||
|
||||
// MessageHandler 是事件消费处理器。
|
||||
//
|
||||
// 语义约束:
|
||||
// 1. 入参 envelope 已完成最外层解析;
|
||||
// 2. 返回 nil 表示处理成功,框架提交 offset;
|
||||
// 3. 返回 error 表示可重试失败,框架回写 retry 后提交 offset。
|
||||
type MessageHandler func(ctx context.Context, envelope kafkabus.Envelope) error
|
||||
|
||||
// PublishRequest 是通用事件发布入参。
|
||||
//
|
||||
// 设计目标:
|
||||
// 1. 业务只描述“要发什么事件”,不关心 outbox/kafka 细节;
|
||||
// 2. 统一收敛事件元数据(event_type/version/aggregate_id);
|
||||
// 3. payload 支持任意 DTO,由 infra 统一 JSON 序列化。
|
||||
type PublishRequest struct {
|
||||
EventType string
|
||||
EventVersion string
|
||||
MessageKey string
|
||||
AggregateID string
|
||||
EventID string
|
||||
Payload any
|
||||
}
|
||||
|
||||
// Engine 是单个服务的 Outbox + Kafka 异步引擎。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 负责一个服务目录下的 outbox 扫描、Kafka 投递、Kafka 消费、状态机推进;
|
||||
// 2. 负责 event_type -> handler 路由;
|
||||
// 3. 不负责任何跨服务路由决策,跨服务分发由 EventBus 门面完成。
|
||||
type Engine struct {
|
||||
repo *Repository
|
||||
producer *kafkabus.Producer
|
||||
consumer *kafkabus.Consumer
|
||||
|
||||
brokers []string
|
||||
route ServiceRoute
|
||||
maxRetry int
|
||||
scanEvery time.Duration
|
||||
scanBatch int
|
||||
|
||||
handlersMu sync.RWMutex
|
||||
handlers map[string]MessageHandler
|
||||
}
|
||||
|
||||
// NewEngine 创建单服务异步引擎。
|
||||
//
|
||||
// 规则:
|
||||
// 1. kafka.enabled=false 时返回 nil,调用方可降级同步;
|
||||
// 2. serviceName 非空时优先使用服务级默认目录,topic/group/table 不再沿用共享终态;
|
||||
// 3. producer/consumer 任一步失败都会回收已创建资源。
|
||||
func NewEngine(repo *Repository, cfg kafkabus.Config) (*Engine, error) {
|
||||
if !cfg.Enabled {
|
||||
return nil, nil
|
||||
}
|
||||
if repo == nil {
|
||||
return nil, errors.New("outbox repository is nil")
|
||||
}
|
||||
|
||||
route := resolveEngineRoute(repo, cfg)
|
||||
cfg.Topic = route.Topic
|
||||
cfg.GroupID = route.GroupID
|
||||
|
||||
serviceRepo := repo.WithRoute(route)
|
||||
producer, err := kafkabus.NewProducer(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
consumer, err := kafkabus.NewConsumer(cfg)
|
||||
if err != nil {
|
||||
_ = producer.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Engine{
|
||||
repo: serviceRepo,
|
||||
producer: producer,
|
||||
consumer: consumer,
|
||||
brokers: cfg.Brokers,
|
||||
route: route,
|
||||
maxRetry: cfg.MaxRetry,
|
||||
scanEvery: cfg.RetryScanInterval,
|
||||
scanBatch: cfg.RetryBatchSize,
|
||||
handlers: make(map[string]MessageHandler),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// RegisterHandler 是历史别名(等价 RegisterEventHandler)。
|
||||
func (e *Engine) RegisterHandler(eventType string, handler MessageHandler) error {
|
||||
return e.RegisterEventHandler(eventType, handler)
|
||||
}
|
||||
|
||||
// RegisterEventHandler 注册事件处理器。
|
||||
func (e *Engine) RegisterEventHandler(eventType string, handler MessageHandler) error {
|
||||
if e == nil {
|
||||
return errors.New("outbox engine is nil")
|
||||
}
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return errors.New("eventType is empty")
|
||||
}
|
||||
if handler == nil {
|
||||
return errors.New("handler is nil")
|
||||
}
|
||||
|
||||
e.handlersMu.Lock()
|
||||
defer e.handlersMu.Unlock()
|
||||
if _, exists := e.handlers[eventType]; exists {
|
||||
log.Printf("outbox handler 覆盖注册: service=%s event_type=%s", e.route.ServiceName, eventType)
|
||||
}
|
||||
e.handlers[eventType] = handler
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Engine) getHandler(eventType string) (MessageHandler, bool) {
|
||||
e.handlersMu.RLock()
|
||||
defer e.handlersMu.RUnlock()
|
||||
h, ok := e.handlers[eventType]
|
||||
return h, ok
|
||||
}
|
||||
|
||||
// Start 启动 dispatch + consume 两个后台循环。
|
||||
func (e *Engine) Start(ctx context.Context) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf(
|
||||
"outbox engine starting: service=%s table=%s topic=%s group=%s brokers=%v retry_scan=%s batch=%d",
|
||||
e.route.ServiceName,
|
||||
e.route.TableName,
|
||||
e.route.Topic,
|
||||
e.route.GroupID,
|
||||
e.brokers,
|
||||
e.scanEvery,
|
||||
e.scanBatch,
|
||||
)
|
||||
// 1. dispatch 先启动,保证已到期的 outbox 不会被 topic 探测阻塞在 pending。
|
||||
// 2. consume 仍等待 topic 探测,降低启动期消费者空转与 metadata 抖动。
|
||||
// 3. 若探测失败,继续启动消费者;真实错误交给消费循环记录并由运维日志暴露。
|
||||
e.StartDispatch(ctx)
|
||||
|
||||
if err := kafkabus.WaitTopicReady(ctx, e.brokers, e.route.Topic, 30*time.Second); err != nil {
|
||||
log.Printf("Kafka topic not ready before consume loop start: %v", err)
|
||||
} else {
|
||||
log.Printf("Kafka topic is ready: %s", e.route.Topic)
|
||||
}
|
||||
e.StartConsume(ctx)
|
||||
}
|
||||
|
||||
// StartDispatch 单独启动 outbox -> Kafka 的投递循环。
|
||||
func (e *Engine) StartDispatch(ctx context.Context) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
go e.startDispatchLoop(ctx)
|
||||
}
|
||||
|
||||
// StartConsume 单独启动 Kafka -> handler 的消费循环。
|
||||
func (e *Engine) StartConsume(ctx context.Context) {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
go e.startConsumeLoop(ctx)
|
||||
}
|
||||
|
||||
// Close 关闭 kafka 资源。
|
||||
func (e *Engine) Close() {
|
||||
if e == nil {
|
||||
return
|
||||
}
|
||||
if err := e.producer.Close(); err != nil {
|
||||
log.Printf("关闭 Kafka producer 失败: %v", err)
|
||||
}
|
||||
if err := e.consumer.Close(); err != nil {
|
||||
log.Printf("关闭 Kafka consumer 失败: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Enqueue 是历史别名(等价 Publish)。
|
||||
func (e *Engine) Enqueue(ctx context.Context, eventType, messageKey string, payload any) error {
|
||||
return e.Publish(ctx, PublishRequest{
|
||||
EventType: eventType,
|
||||
MessageKey: messageKey,
|
||||
AggregateID: messageKey,
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
|
||||
// Publish 发布事件到 outbox。
|
||||
//
|
||||
// 步骤:
|
||||
// 1. 标准化 event_type/version/key;
|
||||
// 2. payload 序列化;
|
||||
// 3. 写入当前服务的 outbox 表,不再由调用方手传 topic。
|
||||
func (e *Engine) Publish(ctx context.Context, req PublishRequest) error {
|
||||
if e == nil {
|
||||
return errors.New("outbox engine is nil")
|
||||
}
|
||||
|
||||
eventType := strings.TrimSpace(req.EventType)
|
||||
if eventType == "" {
|
||||
return errors.New("eventType is empty")
|
||||
}
|
||||
eventVersion := strings.TrimSpace(req.EventVersion)
|
||||
if eventVersion == "" {
|
||||
eventVersion = DefaultEventVersion
|
||||
}
|
||||
messageKey := strings.TrimSpace(req.MessageKey)
|
||||
aggregateID := strings.TrimSpace(req.AggregateID)
|
||||
if aggregateID == "" {
|
||||
aggregateID = messageKey
|
||||
}
|
||||
|
||||
payloadJSON, err := json.Marshal(req.Payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = e.repo.CreateMessage(ctx, eventType, messageKey, OutboxEventPayload{
|
||||
EventID: strings.TrimSpace(req.EventID),
|
||||
EventType: eventType,
|
||||
EventVersion: eventVersion,
|
||||
AggregateID: aggregateID,
|
||||
Payload: payloadJSON,
|
||||
}, e.maxRetry)
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Engine) startDispatchLoop(ctx context.Context) {
|
||||
ticker := time.NewTicker(e.scanEvery)
|
||||
defer ticker.Stop()
|
||||
|
||||
log.Printf("outbox dispatch loop started: service=%s scan=%s batch=%d", e.route.ServiceName, e.scanEvery, e.scanBatch)
|
||||
e.scanAndDispatchDueMessages(ctx)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
e.scanAndDispatchDueMessages(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) scanAndDispatchDueMessages(ctx context.Context) {
|
||||
// 1. 每轮只拉取当前服务到期消息,避免独立微服务误扫其它服务的 outbox 表。
|
||||
// 2. 单条投递失败只记录并进入 retry,不阻断本轮剩余消息。
|
||||
// 3. 启动时也会执行一次本函数,避免重启后必须等待下一次 ticker 才能推进历史 pending。
|
||||
pendingMessages, err := e.repo.ListDueMessages(ctx, e.route.ServiceName, e.scanBatch)
|
||||
if err != nil {
|
||||
log.Printf("扫描 outbox 失败: %v", err)
|
||||
return
|
||||
}
|
||||
if len(pendingMessages) > 0 {
|
||||
log.Printf("outbox due messages=%d, service=%s start dispatch", len(pendingMessages), e.route.ServiceName)
|
||||
}
|
||||
|
||||
for _, msg := range pendingMessages {
|
||||
if err = e.dispatchOne(ctx, msg.ID); err != nil {
|
||||
log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) dispatchOne(ctx context.Context, outboxID int64) error {
|
||||
outboxMsg, err := e.repo.GetByID(ctx, outboxID)
|
||||
if err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
|
||||
return nil
|
||||
}
|
||||
|
||||
eventPayload, payloadErr := parseOutboxEventPayload(outboxMsg.Payload)
|
||||
if payloadErr != nil {
|
||||
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "解析 outbox 事件包失败: "+payloadErr.Error())
|
||||
if markErr != nil {
|
||||
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
|
||||
}
|
||||
return payloadErr
|
||||
}
|
||||
if eventPayload.EventID == "" {
|
||||
eventPayload.EventID = strconv.FormatInt(outboxMsg.ID, 10)
|
||||
}
|
||||
serviceName := strings.TrimSpace(outboxMsg.ServiceName)
|
||||
if serviceName == "" {
|
||||
serviceName = e.route.ServiceName
|
||||
}
|
||||
|
||||
envelope := kafkabus.Envelope{
|
||||
OutboxID: outboxMsg.ID,
|
||||
EventID: eventPayload.EventID,
|
||||
EventType: eventPayload.EventType,
|
||||
EventVersion: eventPayload.EventVersion,
|
||||
ServiceName: serviceName,
|
||||
AggregateID: eventPayload.AggregateID,
|
||||
Payload: eventPayload.PayloadJSON,
|
||||
}
|
||||
raw, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 封装失败: "+err.Error())
|
||||
if markErr != nil {
|
||||
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// 1. Kafka 写入使用单条超时,避免 broker/metadata 卡住时消息长期停留在 pending。
|
||||
// 2. 超时失败后仍走统一 retry 状态机,由下一轮扫描继续补偿。
|
||||
dispatchCtx, cancel := context.WithTimeout(ctx, defaultDispatchTimeout)
|
||||
defer cancel()
|
||||
if err = e.producer.Enqueue(dispatchCtx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil {
|
||||
_ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "投递 Kafka 失败: "+err.Error())
|
||||
return err
|
||||
}
|
||||
if err = e.repo.MarkPublished(ctx, outboxMsg.ID); err != nil {
|
||||
_ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "更新已投递状态失败: "+err.Error())
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Engine) startConsumeLoop(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
msg, err := e.consumer.Dequeue(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
log.Printf("Kafka 消费拉取失败(topic=%s): %v", e.route.Topic, err)
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
if err = e.handleMessage(ctx, msg); err != nil {
|
||||
log.Printf("处理 Kafka 消息失败(topic=%s, partition=%d, offset=%d): %v", msg.Topic, msg.Partition, msg.Offset, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) handleMessage(ctx context.Context, msg segmentkafka.Message) error {
|
||||
var envelope kafkabus.Envelope
|
||||
if err := json.Unmarshal(msg.Value, &envelope); err != nil {
|
||||
_ = e.consumer.Commit(ctx, msg)
|
||||
return fmt.Errorf("解析 Kafka 封装失败: %w", err)
|
||||
}
|
||||
if envelope.OutboxID <= 0 {
|
||||
_ = e.consumer.Commit(ctx, msg)
|
||||
return errors.New("Kafka 封装缺少 outbox_id")
|
||||
}
|
||||
|
||||
eventType := strings.TrimSpace(envelope.EventType)
|
||||
if eventType == "" {
|
||||
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "消息缺少事件类型")
|
||||
if err := e.consumer.Commit(ctx, msg); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
runtimeServiceName := strings.TrimSpace(e.route.ServiceName)
|
||||
if runtimeServiceName != "" {
|
||||
messageServiceName := strings.TrimSpace(envelope.ServiceName)
|
||||
if messageServiceName == "" {
|
||||
if resolvedServiceName, ok := ResolveEventService(eventType); ok {
|
||||
messageServiceName = resolvedServiceName
|
||||
}
|
||||
}
|
||||
if messageServiceName == "" || messageServiceName != runtimeServiceName {
|
||||
log.Printf(
|
||||
"跳过非本服务事件: runtime_service=%s message_service=%s event_type=%s outbox_id=%d",
|
||||
runtimeServiceName,
|
||||
messageServiceName,
|
||||
eventType,
|
||||
envelope.OutboxID,
|
||||
)
|
||||
if err := e.consumer.Commit(ctx, msg); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
handler, ok := e.getHandler(eventType)
|
||||
if !ok {
|
||||
if runtimeServiceName == "" {
|
||||
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "未知事件类型: "+eventType)
|
||||
} else {
|
||||
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "本服务未注册 handler: "+eventType)
|
||||
}
|
||||
if err := e.consumer.Commit(ctx, msg); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := handler(ctx, envelope); err != nil {
|
||||
if markErr := e.repo.MarkFailedForRetry(ctx, envelope.OutboxID, "消费处理失败: "+err.Error()); markErr != nil {
|
||||
return markErr
|
||||
}
|
||||
if commitErr := e.consumer.Commit(ctx, msg); commitErr != nil {
|
||||
return commitErr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return e.consumer.Commit(ctx, msg)
|
||||
}
|
||||
|
||||
func resolveEngineRoute(repo *Repository, cfg kafkabus.Config) ServiceRoute {
|
||||
route := ServiceRoute{
|
||||
ServiceName: strings.TrimSpace(cfg.ServiceName),
|
||||
Topic: strings.TrimSpace(cfg.Topic),
|
||||
GroupID: strings.TrimSpace(cfg.GroupID),
|
||||
}
|
||||
if repo != nil {
|
||||
repoRoute := repo.route
|
||||
if route.ServiceName == "" {
|
||||
route.ServiceName = strings.TrimSpace(repoRoute.ServiceName)
|
||||
}
|
||||
if route.TableName == "" && strings.TrimSpace(repoRoute.TableName) != "" {
|
||||
route.TableName = strings.TrimSpace(repoRoute.TableName)
|
||||
}
|
||||
if route.Topic == "" && strings.TrimSpace(repoRoute.Topic) != "" {
|
||||
route.Topic = strings.TrimSpace(repoRoute.Topic)
|
||||
}
|
||||
if route.GroupID == "" && strings.TrimSpace(repoRoute.GroupID) != "" {
|
||||
route.GroupID = strings.TrimSpace(repoRoute.GroupID)
|
||||
}
|
||||
}
|
||||
|
||||
if route.ServiceName != "" {
|
||||
defaultRoute := DefaultServiceRoute(route.ServiceName)
|
||||
if route.TableName == "" {
|
||||
route.TableName = defaultRoute.TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = defaultRoute.Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = defaultRoute.GroupID
|
||||
}
|
||||
return normalizeServiceRoute(route)
|
||||
}
|
||||
|
||||
if route.TableName == "" {
|
||||
route.TableName = DefaultServiceRoute(ServiceNameAgent).TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = kafkabus.DefaultTopic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = kafkabus.DefaultGroup
|
||||
}
|
||||
return normalizeServiceRoute(route)
|
||||
}
|
||||
177
backend/shared/infra/outbox/event_bus.go
Normal file
177
backend/shared/infra/outbox/event_bus.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
kafkabus "github.com/LoveLosita/smartflow/backend/shared/infra/kafka"
|
||||
)
|
||||
|
||||
// EventPublisher 是通用事件发布能力接口。
|
||||
type EventPublisher interface {
|
||||
Publish(ctx context.Context, req PublishRequest) error
|
||||
}
|
||||
|
||||
// EventBus 是 outbox 多服务引擎的门面。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 对外只暴露“发布、注册 handler、启动、关闭”四类能力;
|
||||
// 2. 内部按事件归属把调用路由到对应 service engine;
|
||||
// 3. 不再把共享 topic 当主路径,服务级路由始终优先。
|
||||
type EventBus struct {
|
||||
repo *Repository
|
||||
cfg kafkabus.Config
|
||||
|
||||
mu sync.RWMutex
|
||||
engines map[string]*Engine
|
||||
}
|
||||
|
||||
// NewEventBus 创建多服务事件门面。
|
||||
//
|
||||
// 说明:
|
||||
// 1. kafka.enabled=false 时返回 nil,调用方可直接降级;
|
||||
// 2. 实际 service engine 在需要时按服务目录懒加载;
|
||||
// 3. 懒加载不会改变既有事件契约,只是把物理资源拆到各自服务。
|
||||
func NewEventBus(repo *Repository, cfg kafkabus.Config) (*EventBus, error) {
|
||||
if !cfg.Enabled {
|
||||
return nil, nil
|
||||
}
|
||||
if repo == nil {
|
||||
return nil, errors.New("outbox repository is nil")
|
||||
}
|
||||
return &EventBus{
|
||||
repo: repo,
|
||||
cfg: cfg,
|
||||
engines: make(map[string]*Engine),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// RegisterEventHandler 注册事件处理器。
|
||||
func (b *EventBus) RegisterEventHandler(eventType string, handler MessageHandler) error {
|
||||
if b == nil {
|
||||
return errors.New("event bus is not initialized")
|
||||
}
|
||||
|
||||
route, err := b.routeForEvent(eventType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
engine, err := b.ensureEngine(route)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return engine.RegisterEventHandler(eventType, handler)
|
||||
}
|
||||
|
||||
// Publish 把事件路由到对应服务的 outbox 表与 Kafka 资源。
|
||||
func (b *EventBus) Publish(ctx context.Context, req PublishRequest) error {
|
||||
if b == nil {
|
||||
return errors.New("event bus is not initialized")
|
||||
}
|
||||
|
||||
route, err := b.routeForEvent(req.EventType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
engine, err := b.ensureEngine(route)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return engine.Publish(ctx, req)
|
||||
}
|
||||
|
||||
// Start 启动所有已创建的 service engine。
|
||||
func (b *EventBus) Start(ctx context.Context) {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
go engine.Start(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// StartDispatch 只启动所有已创建 engine 的 dispatch 循环。
|
||||
func (b *EventBus) StartDispatch(ctx context.Context) {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
go engine.StartDispatch(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// StartConsume 只启动所有已创建 engine 的消费循环。
|
||||
func (b *EventBus) StartConsume(ctx context.Context) {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
go engine.StartConsume(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// Close 关闭所有 service engine 的 Kafka 资源。
|
||||
func (b *EventBus) Close() {
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
for _, engine := range b.snapshotEngines() {
|
||||
engine.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *EventBus) routeForEvent(eventType string) (ServiceRoute, error) {
|
||||
route, ok := ResolveEventRoute(eventType)
|
||||
if !ok {
|
||||
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", strings.TrimSpace(eventType))
|
||||
}
|
||||
return route, nil
|
||||
}
|
||||
|
||||
func (b *EventBus) ensureEngine(route ServiceRoute) (*Engine, error) {
|
||||
serviceName := route.ServiceName
|
||||
if serviceName == "" {
|
||||
return nil, errors.New("serviceName is empty")
|
||||
}
|
||||
|
||||
b.mu.RLock()
|
||||
if engine, ok := b.engines[serviceName]; ok {
|
||||
b.mu.RUnlock()
|
||||
return engine, nil
|
||||
}
|
||||
b.mu.RUnlock()
|
||||
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
if engine, ok := b.engines[serviceName]; ok {
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
cfg := b.cfg
|
||||
cfg.ServiceName = serviceName
|
||||
cfg.Topic = route.Topic
|
||||
cfg.GroupID = route.GroupID
|
||||
|
||||
engine, err := NewEngine(b.repo, cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if engine == nil {
|
||||
return nil, nil
|
||||
}
|
||||
b.engines[serviceName] = engine
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
func (b *EventBus) snapshotEngines() []*Engine {
|
||||
b.mu.RLock()
|
||||
defer b.mu.RUnlock()
|
||||
engines := make([]*Engine, 0, len(b.engines))
|
||||
for _, engine := range b.engines {
|
||||
engines = append(engines, engine)
|
||||
}
|
||||
return engines
|
||||
}
|
||||
64
backend/shared/infra/outbox/event_contract.go
Normal file
64
backend/shared/infra/outbox/event_contract.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultEventVersion 是通用事件协议默认版本。
|
||||
DefaultEventVersion = "v1"
|
||||
)
|
||||
|
||||
// OutboxEventPayload 是 outbox.payload 的统一事件外壳。
|
||||
type OutboxEventPayload struct {
|
||||
EventID string `json:"event_id,omitempty"`
|
||||
EventType string `json:"event_type"`
|
||||
EventVersion string `json:"event_version,omitempty"`
|
||||
AggregateID string `json:"aggregate_id,omitempty"`
|
||||
Payload json.RawMessage `json:"payload"`
|
||||
}
|
||||
|
||||
// ParsedOutboxEventPayload 是 dispatch 阶段使用的标准化结构。
|
||||
type ParsedOutboxEventPayload struct {
|
||||
EventID string
|
||||
EventType string
|
||||
EventVersion string
|
||||
AggregateID string
|
||||
PayloadJSON json.RawMessage
|
||||
}
|
||||
|
||||
// parseOutboxEventPayload 解析 outbox.payload。
|
||||
//
|
||||
// 当前策略(极致清理版):
|
||||
// 1. 只接受“统一事件外壳”格式;
|
||||
// 2. 不再支持旧格式纯业务 JSON 回退;
|
||||
// 3. event_type 缺失时直接报错,交由上层标 dead。
|
||||
func parseOutboxEventPayload(rawPayload string) (*ParsedOutboxEventPayload, error) {
|
||||
var wrapped OutboxEventPayload
|
||||
if err := json.Unmarshal([]byte(rawPayload), &wrapped); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
eventType := strings.TrimSpace(wrapped.EventType)
|
||||
if eventType == "" {
|
||||
return nil, errors.New("event type is empty")
|
||||
}
|
||||
if len(wrapped.Payload) == 0 {
|
||||
return nil, errors.New("payload is empty")
|
||||
}
|
||||
|
||||
eventVersion := strings.TrimSpace(wrapped.EventVersion)
|
||||
if eventVersion == "" {
|
||||
eventVersion = DefaultEventVersion
|
||||
}
|
||||
|
||||
return &ParsedOutboxEventPayload{
|
||||
EventID: strings.TrimSpace(wrapped.EventID),
|
||||
EventType: eventType,
|
||||
EventVersion: eventVersion,
|
||||
AggregateID: strings.TrimSpace(wrapped.AggregateID),
|
||||
PayloadJSON: wrapped.Payload,
|
||||
}, nil
|
||||
}
|
||||
327
backend/shared/infra/outbox/repository.go
Normal file
327
backend/shared/infra/outbox/repository.go
Normal file
@@ -0,0 +1,327 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
|
||||
"gorm.io/gorm"
|
||||
"gorm.io/gorm/clause"
|
||||
)
|
||||
|
||||
// Repository 是 outbox 状态仓储。
|
||||
// 职责边界:
|
||||
// 1. 只负责 outbox 状态流转与通用事务编排;
|
||||
// 2. 不负责聊天、任务、通知等具体业务语义;
|
||||
// 3. 同一仓储实例只面向一个服务级 outbox 目录。
|
||||
type Repository struct {
|
||||
db *gorm.DB
|
||||
route ServiceRoute
|
||||
}
|
||||
|
||||
func NewRepository(db *gorm.DB) *Repository {
|
||||
return &Repository{db: db}
|
||||
}
|
||||
|
||||
// WithTx 使用外部事务句柄构造同事务仓储实例。
|
||||
func (d *Repository) WithTx(tx *gorm.DB) *Repository {
|
||||
if d == nil {
|
||||
return &Repository{db: tx}
|
||||
}
|
||||
return &Repository{db: tx, route: d.route}
|
||||
}
|
||||
|
||||
// WithRoute 使用指定服务路由构造仓储实例。
|
||||
// 1. 只切换 outbox 物理目录,不改变业务事务语义;
|
||||
// 2. 适合多个 service engine 共用同一 DB 连接时分别绑定各自 route;
|
||||
// 3. 保留 route 的 table/topic/group,避免回落到共享 topic。
|
||||
func (d *Repository) WithRoute(route ServiceRoute) *Repository {
|
||||
route = normalizeServiceRoute(route)
|
||||
if d == nil {
|
||||
return &Repository{route: route}
|
||||
}
|
||||
return &Repository{db: d.db, route: route}
|
||||
}
|
||||
|
||||
// CreateMessage 将事件写入 outbox。
|
||||
// 1. 只接收 eventType、messageKey、payload 和 maxRetry,不再允许业务侧显式传 topic;
|
||||
// 2. table/topic/group 统一由 eventType -> service -> route 解析,保证服务级路由是唯一入口;
|
||||
// 3. eventType 未注册时直接返回 error,避免消息静默落到默认表或默认 topic。
|
||||
func (d *Repository) CreateMessage(ctx context.Context, eventType string, messageKey string, payload any, maxRetry int) (int64, error) {
|
||||
if d == nil || d.db == nil {
|
||||
return 0, errors.New("outbox repository is nil")
|
||||
}
|
||||
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return 0, errors.New("eventType is empty")
|
||||
}
|
||||
messageKey = strings.TrimSpace(messageKey)
|
||||
if maxRetry <= 0 {
|
||||
maxRetry = 20
|
||||
}
|
||||
|
||||
route, err := d.resolvePublishRoute(eventType)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
raw, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
msg := model.AgentOutboxMessage{
|
||||
EventType: eventType,
|
||||
ServiceName: route.ServiceName,
|
||||
Topic: route.Topic,
|
||||
MessageKey: messageKey,
|
||||
Payload: string(raw),
|
||||
Status: model.OutboxStatusPending,
|
||||
RetryCount: 0,
|
||||
MaxRetry: maxRetry,
|
||||
NextRetryAt: &now,
|
||||
}
|
||||
|
||||
if err = d.db.WithContext(ctx).Table(route.TableName).Create(&msg).Error; err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return msg.ID, nil
|
||||
}
|
||||
|
||||
// GetByID 从当前仓储绑定的 outbox 表读取指定消息。
|
||||
func (d *Repository) GetByID(ctx context.Context, id int64) (*model.AgentOutboxMessage, error) {
|
||||
var msg model.AgentOutboxMessage
|
||||
if err := d.scopedDB(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &msg, nil
|
||||
}
|
||||
|
||||
// ListDueMessages 拉取到期可投递消息。
|
||||
// 1. serviceName 为空时保持当前仓储目录内的扫描语义;
|
||||
// 2. serviceName 非空时只扫描对应服务的消息;
|
||||
// 3. 这样既能支持单服务 relay,也能支持后续多服务 relay。
|
||||
func (d *Repository) ListDueMessages(ctx context.Context, serviceName string, limit int) ([]model.AgentOutboxMessage, error) {
|
||||
if limit <= 0 {
|
||||
limit = 100
|
||||
}
|
||||
now := time.Now()
|
||||
var messages []model.AgentOutboxMessage
|
||||
query := d.scopedDB(ctx).
|
||||
Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.OutboxStatusPending, now).
|
||||
Order("next_retry_at ASC, id ASC").
|
||||
Limit(limit)
|
||||
serviceName = strings.TrimSpace(serviceName)
|
||||
if serviceName != "" {
|
||||
query = query.Where("service_name = ?", serviceName)
|
||||
}
|
||||
if err := query.Find(&messages).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return messages, nil
|
||||
}
|
||||
|
||||
// MarkPublished 标记消息已经成功投递到 Kafka。
|
||||
func (d *Repository) MarkPublished(ctx context.Context, id int64) error {
|
||||
now := time.Now()
|
||||
updates := map[string]interface{}{
|
||||
"status": model.OutboxStatusPublished,
|
||||
"published_at": &now,
|
||||
"last_error": nil,
|
||||
"next_retry_at": nil,
|
||||
}
|
||||
result := d.scopedDB(ctx).
|
||||
Model(&model.AgentOutboxMessage{}).
|
||||
Where("id = ? AND status NOT IN (?, ?)", id, model.OutboxStatusConsumed, model.OutboxStatusDead).
|
||||
Updates(updates)
|
||||
return result.Error
|
||||
}
|
||||
|
||||
// MarkConsumed 标记消息已经在处理侧成功完成。
|
||||
func (d *Repository) MarkConsumed(ctx context.Context, id int64) error {
|
||||
now := time.Now()
|
||||
updates := map[string]interface{}{
|
||||
"status": model.OutboxStatusConsumed,
|
||||
"consumed_at": &now,
|
||||
"last_error": nil,
|
||||
"next_retry_at": nil,
|
||||
"updated_at": now,
|
||||
}
|
||||
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
}
|
||||
|
||||
// MarkDead 将消息标记为死信。
|
||||
func (d *Repository) MarkDead(ctx context.Context, id int64, reason string) error {
|
||||
now := time.Now()
|
||||
lastErr := truncateError(reason)
|
||||
updates := map[string]interface{}{
|
||||
"status": model.OutboxStatusDead,
|
||||
"last_error": &lastErr,
|
||||
"next_retry_at": nil,
|
||||
"updated_at": now,
|
||||
}
|
||||
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
}
|
||||
|
||||
// MarkFailedForRetry 记录一次可重试失败并推进重试窗口。
|
||||
// 1. 行级锁读取当前消息状态;
|
||||
// 2. consumed/dead 状态直接短路;
|
||||
// 3. retry_count + 1,并根据最大次数决定继续 pending 还是转 dead;
|
||||
// 4. 写回 last_error 与 next_retry_at,交给下一轮扫描继续投递。
|
||||
func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason string) error {
|
||||
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||||
var msg model.AgentOutboxMessage
|
||||
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if msg.Status == model.OutboxStatusConsumed || msg.Status == model.OutboxStatusDead {
|
||||
return nil
|
||||
}
|
||||
|
||||
nextRetryCount := msg.RetryCount + 1
|
||||
now := time.Now()
|
||||
status := model.OutboxStatusPending
|
||||
var nextRetryAt *time.Time
|
||||
if nextRetryCount >= msg.MaxRetry {
|
||||
status = model.OutboxStatusDead
|
||||
nextRetryAt = nil
|
||||
} else {
|
||||
t := now.Add(calcRetryBackoff(nextRetryCount))
|
||||
nextRetryAt = &t
|
||||
}
|
||||
|
||||
lastErr := truncateError(reason)
|
||||
updates := map[string]interface{}{
|
||||
"status": status,
|
||||
"retry_count": nextRetryCount,
|
||||
"last_error": &lastErr,
|
||||
"next_retry_at": nextRetryAt,
|
||||
"updated_at": now,
|
||||
}
|
||||
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
|
||||
})
|
||||
}
|
||||
|
||||
// ConsumeAndMarkConsumed 是通用“消费成功事务入口”。
|
||||
// 1. 在事务内锁定 outbox 记录;
|
||||
// 2. consumed/dead 状态直接返回;
|
||||
// 3. 执行业务回调 fn(tx),让业务落库和 outbox 状态共享同一事务;
|
||||
// 4. 业务成功后统一标记 consumed。
|
||||
func (d *Repository) ConsumeAndMarkConsumed(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
|
||||
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||||
var outboxMsg model.AgentOutboxMessage
|
||||
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
|
||||
if err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
|
||||
return nil
|
||||
}
|
||||
|
||||
if fn != nil {
|
||||
if err = fn(tx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
updates := map[string]interface{}{
|
||||
"status": model.OutboxStatusConsumed,
|
||||
"consumed_at": &now,
|
||||
"last_error": nil,
|
||||
"next_retry_at": nil,
|
||||
"updated_at": now,
|
||||
}
|
||||
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
|
||||
})
|
||||
}
|
||||
|
||||
// ConsumeInTx 执行 outbox 业务事务,但不负责标记 consumed。
|
||||
// 1. 先锁定当前 outbox 记录,避免并发消费者同时处理同一条消息;
|
||||
// 2. 只要业务函数返回错误,就保持消息为 pending,交给上层 retry;
|
||||
// 3. 业务成功后再由上层单独标记 consumed,这样可以把远端 RPC 移到事务外。
|
||||
func (d *Repository) ConsumeInTx(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
|
||||
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
||||
var outboxMsg model.AgentOutboxMessage
|
||||
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
|
||||
if err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
|
||||
return nil
|
||||
}
|
||||
if fn != nil {
|
||||
if err = fn(tx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Repository) scopedDB(ctx context.Context) *gorm.DB {
|
||||
return d.db.WithContext(ctx).Table(d.tableName())
|
||||
}
|
||||
|
||||
func (d *Repository) tableName() string {
|
||||
if d == nil {
|
||||
return DefaultServiceRoute(ServiceNameAgent).TableName
|
||||
}
|
||||
|
||||
route := normalizeServiceRoute(d.route)
|
||||
if route.TableName != "" {
|
||||
return route.TableName
|
||||
}
|
||||
return DefaultServiceRoute(ServiceNameAgent).TableName
|
||||
}
|
||||
|
||||
func (d *Repository) resolvePublishRoute(eventType string) (ServiceRoute, error) {
|
||||
if d == nil {
|
||||
return ServiceRoute{}, errors.New("outbox repository is nil")
|
||||
}
|
||||
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return ServiceRoute{}, errors.New("eventType is empty")
|
||||
}
|
||||
|
||||
route, ok := ResolveEventRoute(eventType)
|
||||
if !ok {
|
||||
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", eventType)
|
||||
}
|
||||
if d.route.ServiceName != "" && route.ServiceName != d.route.ServiceName {
|
||||
return ServiceRoute{}, fmt.Errorf("eventType %s belongs to service %s, current repo service %s", eventType, route.ServiceName, d.route.ServiceName)
|
||||
}
|
||||
return normalizeServiceRoute(route), nil
|
||||
}
|
||||
|
||||
func calcRetryBackoff(retryCount int) time.Duration {
|
||||
if retryCount <= 0 {
|
||||
return time.Second
|
||||
}
|
||||
if retryCount > 10 {
|
||||
retryCount = 10
|
||||
}
|
||||
return time.Duration(retryCount*retryCount) * time.Second
|
||||
}
|
||||
|
||||
func truncateError(reason string) string {
|
||||
if len(reason) <= 2000 {
|
||||
return reason
|
||||
}
|
||||
return reason[:2000]
|
||||
}
|
||||
136
backend/shared/infra/outbox/route_registry.go
Normal file
136
backend/shared/infra/outbox/route_registry.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var outboxRouteRegistry = struct {
|
||||
sync.RWMutex
|
||||
eventToService map[string]string
|
||||
serviceRoutes map[string]ServiceRoute
|
||||
}{
|
||||
eventToService: make(map[string]string),
|
||||
serviceRoutes: make(map[string]ServiceRoute),
|
||||
}
|
||||
|
||||
// RegisterServiceRoute 注册或覆盖某个服务的物理 outbox 路由。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只登记“服务 -> table/topic/group”目录,不登记事件归属;
|
||||
// 2. 同服务重复注册时以后者覆盖前者,方便显式配置覆盖默认目录;
|
||||
// 3. 空服务名直接报错,避免把共享 topic 误当成新终态。
|
||||
func RegisterServiceRoute(route ServiceRoute) error {
|
||||
route = normalizeServiceRoute(route)
|
||||
if route.ServiceName == "" {
|
||||
return errors.New("serviceName is empty")
|
||||
}
|
||||
|
||||
outboxRouteRegistry.Lock()
|
||||
defer outboxRouteRegistry.Unlock()
|
||||
|
||||
outboxRouteRegistry.serviceRoutes[route.ServiceName] = route
|
||||
return nil
|
||||
}
|
||||
|
||||
// RegisterEventService 记录“事件类型 -> 服务归属”的全局路由。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只登记跨进程都要识别的事件归属,不承载 handler 逻辑;
|
||||
// 2. 同一 event_type 只能归属一个服务,重复登记同值视为幂等;
|
||||
// 3. 若该服务还没有显式路由,则先写入默认服务目录,保证后续能查到 table/topic/group。
|
||||
func RegisterEventService(eventType, serviceName string) error {
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return errors.New("eventType is empty")
|
||||
}
|
||||
serviceName = normalizeServiceName(serviceName)
|
||||
if serviceName == "" {
|
||||
return errors.New("serviceName is empty")
|
||||
}
|
||||
|
||||
outboxRouteRegistry.Lock()
|
||||
defer outboxRouteRegistry.Unlock()
|
||||
|
||||
if existing, ok := outboxRouteRegistry.eventToService[eventType]; ok {
|
||||
if existing != serviceName {
|
||||
return fmt.Errorf("eventType %s already registered to service %s", eventType, existing)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
outboxRouteRegistry.eventToService[eventType] = serviceName
|
||||
return nil
|
||||
}
|
||||
|
||||
// ResolveEventService 查询某个事件类型的归属服务。
|
||||
//
|
||||
// 返回值说明:
|
||||
// 1. serviceName 为登记结果;
|
||||
// 2. ok=false 表示当前路由表里还没有这个事件类型的归属信息。
|
||||
func ResolveEventService(eventType string) (serviceName string, ok bool) {
|
||||
eventType = strings.TrimSpace(eventType)
|
||||
if eventType == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
outboxRouteRegistry.RLock()
|
||||
defer outboxRouteRegistry.RUnlock()
|
||||
|
||||
serviceName, ok = outboxRouteRegistry.eventToService[eventType]
|
||||
return serviceName, ok
|
||||
}
|
||||
|
||||
// ResolveServiceRoute 查询某个服务的物理 outbox 配置。
|
||||
//
|
||||
// 返回值说明:
|
||||
// 1. route 始终返回一个可执行的目录结果,未显式注册时回退默认目录;
|
||||
// 2. ok=true 表示命中显式注册目录,ok=false 表示走默认目录;
|
||||
// 3. 这样既能支持显式配置覆盖,也能让基础设施在启动初期就有稳定默认值。
|
||||
func ResolveServiceRoute(serviceName string) (route ServiceRoute, ok bool) {
|
||||
serviceName = normalizeServiceName(serviceName)
|
||||
if serviceName == "" {
|
||||
return DefaultServiceRoute(""), false
|
||||
}
|
||||
|
||||
outboxRouteRegistry.RLock()
|
||||
route, ok = outboxRouteRegistry.serviceRoutes[serviceName]
|
||||
outboxRouteRegistry.RUnlock()
|
||||
if ok {
|
||||
return normalizeServiceRoute(route), true
|
||||
}
|
||||
if route, ok = configuredServiceRoute(serviceName); ok {
|
||||
return route, true
|
||||
}
|
||||
return DefaultServiceRoute(serviceName), false
|
||||
}
|
||||
|
||||
// ResolveEventRoute 先按事件查服务,再按服务查物理目录。
|
||||
//
|
||||
// 返回值说明:
|
||||
// 1. route 包含事件所在服务的 table/topic/group;
|
||||
// 2. ok=true 只表示“事件 -> 服务归属”已登记;
|
||||
// 3. 服务目录若未显式注册,会自动回退到默认目录。
|
||||
func ResolveEventRoute(eventType string) (route ServiceRoute, ok bool) {
|
||||
serviceName, ok := ResolveEventService(eventType)
|
||||
if !ok {
|
||||
return ServiceRoute{}, false
|
||||
}
|
||||
route, _ = ResolveServiceRoute(serviceName)
|
||||
return route, true
|
||||
}
|
||||
|
||||
func configuredServiceRoute(serviceName string) (ServiceRoute, bool) {
|
||||
cfg, ok := ResolveServiceConfig(serviceName)
|
||||
if !ok {
|
||||
return ServiceRoute{}, false
|
||||
}
|
||||
return normalizeServiceRoute(ServiceRoute{
|
||||
ServiceName: cfg.Name,
|
||||
TableName: cfg.TableName,
|
||||
Topic: cfg.Topic,
|
||||
GroupID: cfg.GroupID,
|
||||
}), true
|
||||
}
|
||||
168
backend/shared/infra/outbox/service_catalog.go
Normal file
168
backend/shared/infra/outbox/service_catalog.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
const (
|
||||
ServiceAgent = "agent"
|
||||
ServiceTask = "task"
|
||||
ServiceMemory = "memory"
|
||||
ServiceActiveScheduler = "active-scheduler"
|
||||
ServiceNotification = "notification"
|
||||
)
|
||||
|
||||
// ServiceConfig 描述一个服务级 outbox 的固定归属。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只描述“事件属于哪个服务、写哪张表、发哪个 topic、用哪个 group”。
|
||||
// 2. 不承载具体业务 handler,也不承载 Kafka 消息体格式。
|
||||
// 3. 服务级写入、扫描和消费都应从这里读取同一份映射,避免配置漂移。
|
||||
type ServiceConfig struct {
|
||||
Name string
|
||||
Topic string
|
||||
GroupID string
|
||||
TableName string
|
||||
}
|
||||
|
||||
var serviceCatalogCache = struct {
|
||||
sync.RWMutex
|
||||
loaded bool
|
||||
entries map[string]ServiceConfig
|
||||
}{
|
||||
entries: make(map[string]ServiceConfig),
|
||||
}
|
||||
|
||||
// LoadServiceConfigs 读取服务级 outbox 目录。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 先给出默认终态映射,再允许通过配置中心覆盖 topic/groupID/table;
|
||||
// 2. 该目录只负责服务级 outbox 基础设施,不混入业务逻辑;
|
||||
// 3. 若某个服务配置缺失,直接使用默认值,避免启动期因为非关键配置崩掉。
|
||||
func LoadServiceConfigs() map[string]ServiceConfig {
|
||||
serviceCatalogCache.Lock()
|
||||
defer serviceCatalogCache.Unlock()
|
||||
|
||||
if serviceCatalogCache.loaded {
|
||||
return cloneServiceConfigs(serviceCatalogCache.entries)
|
||||
}
|
||||
|
||||
entries := map[string]ServiceConfig{
|
||||
ServiceAgent: {
|
||||
Name: ServiceAgent,
|
||||
Topic: "smartflow.agent.outbox",
|
||||
GroupID: "smartflow-agent-outbox-consumer",
|
||||
TableName: "agent_outbox_messages",
|
||||
},
|
||||
ServiceTask: {
|
||||
Name: ServiceTask,
|
||||
Topic: "smartflow.task.outbox",
|
||||
GroupID: "smartflow-task-outbox-consumer",
|
||||
TableName: "task_outbox_messages",
|
||||
},
|
||||
ServiceMemory: {
|
||||
Name: ServiceMemory,
|
||||
Topic: "smartflow.memory.outbox",
|
||||
GroupID: "smartflow-memory-outbox-consumer",
|
||||
TableName: "memory_outbox_messages",
|
||||
},
|
||||
ServiceActiveScheduler: {
|
||||
Name: ServiceActiveScheduler,
|
||||
Topic: "smartflow.active-scheduler.outbox",
|
||||
GroupID: "smartflow-active-scheduler-outbox-consumer",
|
||||
TableName: "active_scheduler_outbox_messages",
|
||||
},
|
||||
ServiceNotification: {
|
||||
Name: ServiceNotification,
|
||||
Topic: "smartflow.notification.outbox",
|
||||
GroupID: "smartflow-notification-outbox-consumer",
|
||||
TableName: "notification_outbox_messages",
|
||||
},
|
||||
}
|
||||
|
||||
for name, entry := range entries {
|
||||
entries[name] = overrideServiceConfig(entry)
|
||||
}
|
||||
|
||||
serviceCatalogCache.entries = entries
|
||||
serviceCatalogCache.loaded = true
|
||||
return cloneServiceConfigs(entries)
|
||||
}
|
||||
|
||||
// ResolveServiceConfig 查询某个服务的 outbox 目录。
|
||||
func ResolveServiceConfig(serviceName string) (ServiceConfig, bool) {
|
||||
serviceName = strings.TrimSpace(serviceName)
|
||||
if serviceName == "" {
|
||||
return ServiceConfig{}, false
|
||||
}
|
||||
|
||||
entries := LoadServiceConfigs()
|
||||
cfg, ok := entries[serviceName]
|
||||
return cfg, ok
|
||||
}
|
||||
|
||||
// ResolveEventServiceConfig 先解析事件归属服务,再返回该服务的 outbox 目录。
|
||||
func ResolveEventServiceConfig(eventType string) (ServiceConfig, bool) {
|
||||
serviceName, ok := ResolveEventService(eventType)
|
||||
if !ok {
|
||||
return ServiceConfig{}, false
|
||||
}
|
||||
return ResolveServiceConfig(serviceName)
|
||||
}
|
||||
|
||||
// ServiceTables 返回当前目录中的所有 outbox 表名。
|
||||
func ServiceTables() []string {
|
||||
entries := LoadServiceConfigs()
|
||||
tables := make([]string, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
tables = append(tables, entry.TableName)
|
||||
}
|
||||
sort.Strings(tables)
|
||||
return tables
|
||||
}
|
||||
|
||||
// ServiceNames 返回当前目录中的所有服务名。
|
||||
func ServiceNames() []string {
|
||||
entries := LoadServiceConfigs()
|
||||
names := make([]string, 0, len(entries))
|
||||
for name := range entries {
|
||||
names = append(names, name)
|
||||
}
|
||||
sort.Strings(names)
|
||||
return names
|
||||
}
|
||||
|
||||
func overrideServiceConfig(entry ServiceConfig) ServiceConfig {
|
||||
upperName := strings.TrimSpace(entry.Name)
|
||||
if upperName == "" {
|
||||
return entry
|
||||
}
|
||||
|
||||
topicKey := fmt.Sprintf("outbox.services.%s.topic", upperName)
|
||||
groupKey := fmt.Sprintf("outbox.services.%s.groupID", upperName)
|
||||
tableKey := fmt.Sprintf("outbox.services.%s.table", upperName)
|
||||
|
||||
if topic := strings.TrimSpace(viper.GetString(topicKey)); topic != "" {
|
||||
entry.Topic = topic
|
||||
}
|
||||
if groupID := strings.TrimSpace(viper.GetString(groupKey)); groupID != "" {
|
||||
entry.GroupID = groupID
|
||||
}
|
||||
if tableName := strings.TrimSpace(viper.GetString(tableKey)); tableName != "" {
|
||||
entry.TableName = tableName
|
||||
}
|
||||
return entry
|
||||
}
|
||||
|
||||
func cloneServiceConfigs(entries map[string]ServiceConfig) map[string]ServiceConfig {
|
||||
cloned := make(map[string]ServiceConfig, len(entries))
|
||||
for name, entry := range entries {
|
||||
cloned[name] = entry
|
||||
}
|
||||
return cloned
|
||||
}
|
||||
145
backend/shared/infra/outbox/service_route.go
Normal file
145
backend/shared/infra/outbox/service_route.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package outbox
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
ServiceNameAgent = "agent"
|
||||
ServiceNameTask = "task"
|
||||
ServiceNameMemory = "memory"
|
||||
ServiceNameActiveScheduler = "active-scheduler"
|
||||
ServiceNameNotification = "notification"
|
||||
)
|
||||
|
||||
// ServiceRoute 描述一个 outbox 服务的终态路由信息。
|
||||
//
|
||||
// 职责边界:
|
||||
// 1. 只承载服务级 outbox 的 table/topic/group 目录信息;
|
||||
// 2. 不承载 handler、事务或 Kafka 连接对象;
|
||||
// 3. 允许上层按事件类型先查服务,再由服务查到自己的物理资源。
|
||||
type ServiceRoute struct {
|
||||
ServiceName string
|
||||
TableName string
|
||||
Topic string
|
||||
GroupID string
|
||||
}
|
||||
|
||||
var builtinServiceRoutes = map[string]ServiceRoute{
|
||||
ServiceNameAgent: {
|
||||
ServiceName: ServiceNameAgent,
|
||||
TableName: "agent_outbox_messages",
|
||||
Topic: "smartflow.agent.outbox",
|
||||
GroupID: "smartflow-agent-outbox-consumer",
|
||||
},
|
||||
ServiceNameTask: {
|
||||
ServiceName: ServiceNameTask,
|
||||
TableName: "task_outbox_messages",
|
||||
Topic: "smartflow.task.outbox",
|
||||
GroupID: "smartflow-task-outbox-consumer",
|
||||
},
|
||||
ServiceNameMemory: {
|
||||
ServiceName: ServiceNameMemory,
|
||||
TableName: "memory_outbox_messages",
|
||||
Topic: "smartflow.memory.outbox",
|
||||
GroupID: "smartflow-memory-outbox-consumer",
|
||||
},
|
||||
ServiceNameActiveScheduler: {
|
||||
ServiceName: ServiceNameActiveScheduler,
|
||||
TableName: "active_scheduler_outbox_messages",
|
||||
Topic: "smartflow.active-scheduler.outbox",
|
||||
GroupID: "smartflow-active-scheduler-outbox-consumer",
|
||||
},
|
||||
ServiceNameNotification: {
|
||||
ServiceName: ServiceNameNotification,
|
||||
TableName: "notification_outbox_messages",
|
||||
Topic: "smartflow.notification.outbox",
|
||||
GroupID: "smartflow-notification-outbox-consumer",
|
||||
},
|
||||
}
|
||||
|
||||
// DefaultServiceRoutes 返回当前已知服务的默认路由清单。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 这里是“目录初始值”,用于自动建表和首次注册时兜底;
|
||||
// 2. 运行时若显式注册了服务路由,会以显式注册结果为准;
|
||||
// 3. 返回值是拷贝,调用方可安全遍历,不会污染全局目录。
|
||||
func DefaultServiceRoutes() []ServiceRoute {
|
||||
return []ServiceRoute{
|
||||
builtinServiceRoutes[ServiceNameAgent],
|
||||
builtinServiceRoutes[ServiceNameTask],
|
||||
builtinServiceRoutes[ServiceNameMemory],
|
||||
builtinServiceRoutes[ServiceNameActiveScheduler],
|
||||
builtinServiceRoutes[ServiceNameNotification],
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultServiceRoute 根据服务名生成终态路由。
|
||||
//
|
||||
// 规则:
|
||||
// 1. 已知服务直接返回约定映射;
|
||||
// 2. 未知服务按命名约定生成 table/topic/group,避免继续落回共享 topic;
|
||||
// 3. 空服务名回退到 agent 兼容路径,保住历史单体模式。
|
||||
func DefaultServiceRoute(serviceName string) ServiceRoute {
|
||||
serviceName = normalizeServiceName(serviceName)
|
||||
if serviceName == "" {
|
||||
serviceName = ServiceNameAgent
|
||||
}
|
||||
if route, ok := builtinServiceRoutes[serviceName]; ok {
|
||||
return route
|
||||
}
|
||||
|
||||
tablePrefix := strings.NewReplacer("-", "_").Replace(serviceName)
|
||||
if tablePrefix == "" {
|
||||
tablePrefix = ServiceNameAgent
|
||||
}
|
||||
|
||||
return ServiceRoute{
|
||||
ServiceName: serviceName,
|
||||
TableName: tablePrefix + "_outbox_messages",
|
||||
Topic: "smartflow." + serviceName + ".outbox",
|
||||
GroupID: "smartflow-" + serviceName + "-outbox-consumer",
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeServiceName(serviceName string) string {
|
||||
return strings.TrimSpace(serviceName)
|
||||
}
|
||||
|
||||
// normalizeServiceRoute 把空字段补成可执行的默认值。
|
||||
//
|
||||
// 说明:
|
||||
// 1. 只做字符串裁剪和缺省补齐,不做注册副作用;
|
||||
// 2. 服务名为空时只保留历史兼容路径,不强行把它当成新服务;
|
||||
// 3. 这一步是 route 目录的最后一道兜底,避免上层拿到半成品路由。
|
||||
func normalizeServiceRoute(route ServiceRoute) ServiceRoute {
|
||||
route.ServiceName = normalizeServiceName(route.ServiceName)
|
||||
route.TableName = strings.TrimSpace(route.TableName)
|
||||
route.Topic = strings.TrimSpace(route.Topic)
|
||||
route.GroupID = strings.TrimSpace(route.GroupID)
|
||||
|
||||
if route.ServiceName == "" {
|
||||
if route.TableName == "" {
|
||||
route.TableName = builtinServiceRoutes[ServiceNameAgent].TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = builtinServiceRoutes[ServiceNameAgent].Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = builtinServiceRoutes[ServiceNameAgent].GroupID
|
||||
}
|
||||
return route
|
||||
}
|
||||
|
||||
defaultRoute := DefaultServiceRoute(route.ServiceName)
|
||||
if route.TableName == "" {
|
||||
route.TableName = defaultRoute.TableName
|
||||
}
|
||||
if route.Topic == "" {
|
||||
route.Topic = defaultRoute.Topic
|
||||
}
|
||||
if route.GroupID == "" {
|
||||
route.GroupID = defaultRoute.GroupID
|
||||
}
|
||||
return route
|
||||
}
|
||||
Reference in New Issue
Block a user