Version: 0.9.77.dev.260505

后端:
1.阶段 6 CP4/CP5 目录收口与共享边界纯化
- 将 backend 根目录收口为 services、client、gateway、cmd、shared 五个一级目录
- 收拢 bootstrap、inits、infra/kafka、infra/outbox、conv、respond、pkg、middleware,移除根目录旧实现与空目录
- 将 utils 下沉到 services/userauth/internal/auth,将 logic 下沉到 services/schedule/core/planning
- 将迁移期 runtime 桥接实现统一收拢到 services/runtime/{conv,dao,eventsvc,model},删除 shared/legacy 与未再被 import 的旧 service 实现
- 将 gateway/shared/respond 收口为 HTTP/Gin 错误写回适配,shared/respond 仅保留共享错误语义与状态映射
- 将 HTTP IdempotencyMiddleware 与 RateLimitMiddleware 收口到 gateway/middleware
- 将 GormCachePlugin 下沉到 shared/infra/gormcache,将共享 RateLimiter 下沉到 shared/infra/ratelimit,将 agent token budget 下沉到 services/agent/shared
- 删除 InitEino 兼容壳,收缩 cmd/internal/coreinit 仅保留旧组合壳残留域初始化语义
- 更新微服务迁移计划与桌面 checklist,补齐 CP4/CP5 当前切流点、目录终态与验证结果
- 完成 go test ./...、git diff --check 与最终真实 smoke;health、register/login、task/create+get、schedule/today、task-class/list、memory/items、agent chat/meta/timeline/context-stats 全部 200,SSE 合并结果为 CP5_OK 且 [DONE] 只有 1 个
This commit is contained in:
Losita
2026-05-05 23:25:07 +08:00
parent 2a96f4c6f9
commit 3b6fca44a6
226 changed files with 731 additions and 3497 deletions

View File

@@ -0,0 +1,487 @@
package outbox
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"strconv"
"strings"
"sync"
"time"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
kafkabus "github.com/LoveLosita/smartflow/backend/shared/infra/kafka"
segmentkafka "github.com/segmentio/kafka-go"
"gorm.io/gorm"
)
const defaultDispatchTimeout = 10 * time.Second
// MessageHandler 是事件消费处理器。
//
// 语义约束:
// 1. 入参 envelope 已完成最外层解析;
// 2. 返回 nil 表示处理成功,框架提交 offset
// 3. 返回 error 表示可重试失败,框架回写 retry 后提交 offset。
type MessageHandler func(ctx context.Context, envelope kafkabus.Envelope) error
// PublishRequest 是通用事件发布入参。
//
// 设计目标:
// 1. 业务只描述“要发什么事件”,不关心 outbox/kafka 细节;
// 2. 统一收敛事件元数据event_type/version/aggregate_id
// 3. payload 支持任意 DTO由 infra 统一 JSON 序列化。
type PublishRequest struct {
EventType string
EventVersion string
MessageKey string
AggregateID string
EventID string
Payload any
}
// Engine 是单个服务的 Outbox + Kafka 异步引擎。
//
// 职责边界:
// 1. 负责一个服务目录下的 outbox 扫描、Kafka 投递、Kafka 消费、状态机推进;
// 2. 负责 event_type -> handler 路由;
// 3. 不负责任何跨服务路由决策,跨服务分发由 EventBus 门面完成。
type Engine struct {
repo *Repository
producer *kafkabus.Producer
consumer *kafkabus.Consumer
brokers []string
route ServiceRoute
maxRetry int
scanEvery time.Duration
scanBatch int
handlersMu sync.RWMutex
handlers map[string]MessageHandler
}
// NewEngine 创建单服务异步引擎。
//
// 规则:
// 1. kafka.enabled=false 时返回 nil调用方可降级同步
// 2. serviceName 非空时优先使用服务级默认目录topic/group/table 不再沿用共享终态;
// 3. producer/consumer 任一步失败都会回收已创建资源。
func NewEngine(repo *Repository, cfg kafkabus.Config) (*Engine, error) {
if !cfg.Enabled {
return nil, nil
}
if repo == nil {
return nil, errors.New("outbox repository is nil")
}
route := resolveEngineRoute(repo, cfg)
cfg.Topic = route.Topic
cfg.GroupID = route.GroupID
serviceRepo := repo.WithRoute(route)
producer, err := kafkabus.NewProducer(cfg)
if err != nil {
return nil, err
}
consumer, err := kafkabus.NewConsumer(cfg)
if err != nil {
_ = producer.Close()
return nil, err
}
return &Engine{
repo: serviceRepo,
producer: producer,
consumer: consumer,
brokers: cfg.Brokers,
route: route,
maxRetry: cfg.MaxRetry,
scanEvery: cfg.RetryScanInterval,
scanBatch: cfg.RetryBatchSize,
handlers: make(map[string]MessageHandler),
}, nil
}
// RegisterHandler 是历史别名(等价 RegisterEventHandler
func (e *Engine) RegisterHandler(eventType string, handler MessageHandler) error {
return e.RegisterEventHandler(eventType, handler)
}
// RegisterEventHandler 注册事件处理器。
func (e *Engine) RegisterEventHandler(eventType string, handler MessageHandler) error {
if e == nil {
return errors.New("outbox engine is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return errors.New("eventType is empty")
}
if handler == nil {
return errors.New("handler is nil")
}
e.handlersMu.Lock()
defer e.handlersMu.Unlock()
if _, exists := e.handlers[eventType]; exists {
log.Printf("outbox handler 覆盖注册: service=%s event_type=%s", e.route.ServiceName, eventType)
}
e.handlers[eventType] = handler
return nil
}
func (e *Engine) getHandler(eventType string) (MessageHandler, bool) {
e.handlersMu.RLock()
defer e.handlersMu.RUnlock()
h, ok := e.handlers[eventType]
return h, ok
}
// Start 启动 dispatch + consume 两个后台循环。
func (e *Engine) Start(ctx context.Context) {
if e == nil {
return
}
log.Printf(
"outbox engine starting: service=%s table=%s topic=%s group=%s brokers=%v retry_scan=%s batch=%d",
e.route.ServiceName,
e.route.TableName,
e.route.Topic,
e.route.GroupID,
e.brokers,
e.scanEvery,
e.scanBatch,
)
// 1. dispatch 先启动,保证已到期的 outbox 不会被 topic 探测阻塞在 pending。
// 2. consume 仍等待 topic 探测,降低启动期消费者空转与 metadata 抖动。
// 3. 若探测失败,继续启动消费者;真实错误交给消费循环记录并由运维日志暴露。
e.StartDispatch(ctx)
if err := kafkabus.WaitTopicReady(ctx, e.brokers, e.route.Topic, 30*time.Second); err != nil {
log.Printf("Kafka topic not ready before consume loop start: %v", err)
} else {
log.Printf("Kafka topic is ready: %s", e.route.Topic)
}
e.StartConsume(ctx)
}
// StartDispatch 单独启动 outbox -> Kafka 的投递循环。
func (e *Engine) StartDispatch(ctx context.Context) {
if e == nil {
return
}
go e.startDispatchLoop(ctx)
}
// StartConsume 单独启动 Kafka -> handler 的消费循环。
func (e *Engine) StartConsume(ctx context.Context) {
if e == nil {
return
}
go e.startConsumeLoop(ctx)
}
// Close 关闭 kafka 资源。
func (e *Engine) Close() {
if e == nil {
return
}
if err := e.producer.Close(); err != nil {
log.Printf("关闭 Kafka producer 失败: %v", err)
}
if err := e.consumer.Close(); err != nil {
log.Printf("关闭 Kafka consumer 失败: %v", err)
}
}
// Enqueue 是历史别名(等价 Publish
func (e *Engine) Enqueue(ctx context.Context, eventType, messageKey string, payload any) error {
return e.Publish(ctx, PublishRequest{
EventType: eventType,
MessageKey: messageKey,
AggregateID: messageKey,
Payload: payload,
})
}
// Publish 发布事件到 outbox。
//
// 步骤:
// 1. 标准化 event_type/version/key
// 2. payload 序列化;
// 3. 写入当前服务的 outbox 表,不再由调用方手传 topic。
func (e *Engine) Publish(ctx context.Context, req PublishRequest) error {
if e == nil {
return errors.New("outbox engine is nil")
}
eventType := strings.TrimSpace(req.EventType)
if eventType == "" {
return errors.New("eventType is empty")
}
eventVersion := strings.TrimSpace(req.EventVersion)
if eventVersion == "" {
eventVersion = DefaultEventVersion
}
messageKey := strings.TrimSpace(req.MessageKey)
aggregateID := strings.TrimSpace(req.AggregateID)
if aggregateID == "" {
aggregateID = messageKey
}
payloadJSON, err := json.Marshal(req.Payload)
if err != nil {
return err
}
_, err = e.repo.CreateMessage(ctx, eventType, messageKey, OutboxEventPayload{
EventID: strings.TrimSpace(req.EventID),
EventType: eventType,
EventVersion: eventVersion,
AggregateID: aggregateID,
Payload: payloadJSON,
}, e.maxRetry)
return err
}
func (e *Engine) startDispatchLoop(ctx context.Context) {
ticker := time.NewTicker(e.scanEvery)
defer ticker.Stop()
log.Printf("outbox dispatch loop started: service=%s scan=%s batch=%d", e.route.ServiceName, e.scanEvery, e.scanBatch)
e.scanAndDispatchDueMessages(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
e.scanAndDispatchDueMessages(ctx)
}
}
}
func (e *Engine) scanAndDispatchDueMessages(ctx context.Context) {
// 1. 每轮只拉取当前服务到期消息,避免独立微服务误扫其它服务的 outbox 表。
// 2. 单条投递失败只记录并进入 retry不阻断本轮剩余消息。
// 3. 启动时也会执行一次本函数,避免重启后必须等待下一次 ticker 才能推进历史 pending。
pendingMessages, err := e.repo.ListDueMessages(ctx, e.route.ServiceName, e.scanBatch)
if err != nil {
log.Printf("扫描 outbox 失败: %v", err)
return
}
if len(pendingMessages) > 0 {
log.Printf("outbox due messages=%d, service=%s start dispatch", len(pendingMessages), e.route.ServiceName)
}
for _, msg := range pendingMessages {
if err = e.dispatchOne(ctx, msg.ID); err != nil {
log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err)
}
}
}
func (e *Engine) dispatchOne(ctx context.Context, outboxID int64) error {
outboxMsg, err := e.repo.GetByID(ctx, outboxID)
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
eventPayload, payloadErr := parseOutboxEventPayload(outboxMsg.Payload)
if payloadErr != nil {
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "解析 outbox 事件包失败: "+payloadErr.Error())
if markErr != nil {
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
}
return payloadErr
}
if eventPayload.EventID == "" {
eventPayload.EventID = strconv.FormatInt(outboxMsg.ID, 10)
}
serviceName := strings.TrimSpace(outboxMsg.ServiceName)
if serviceName == "" {
serviceName = e.route.ServiceName
}
envelope := kafkabus.Envelope{
OutboxID: outboxMsg.ID,
EventID: eventPayload.EventID,
EventType: eventPayload.EventType,
EventVersion: eventPayload.EventVersion,
ServiceName: serviceName,
AggregateID: eventPayload.AggregateID,
Payload: eventPayload.PayloadJSON,
}
raw, err := json.Marshal(envelope)
if err != nil {
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 封装失败: "+err.Error())
if markErr != nil {
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
}
return err
}
// 1. Kafka 写入使用单条超时,避免 broker/metadata 卡住时消息长期停留在 pending。
// 2. 超时失败后仍走统一 retry 状态机,由下一轮扫描继续补偿。
dispatchCtx, cancel := context.WithTimeout(ctx, defaultDispatchTimeout)
defer cancel()
if err = e.producer.Enqueue(dispatchCtx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil {
_ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "投递 Kafka 失败: "+err.Error())
return err
}
if err = e.repo.MarkPublished(ctx, outboxMsg.ID); err != nil {
_ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "更新已投递状态失败: "+err.Error())
return err
}
return nil
}
func (e *Engine) startConsumeLoop(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
default:
}
msg, err := e.consumer.Dequeue(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
return
}
log.Printf("Kafka 消费拉取失败(topic=%s): %v", e.route.Topic, err)
time.Sleep(300 * time.Millisecond)
continue
}
if err = e.handleMessage(ctx, msg); err != nil {
log.Printf("处理 Kafka 消息失败(topic=%s, partition=%d, offset=%d): %v", msg.Topic, msg.Partition, msg.Offset, err)
}
}
}
func (e *Engine) handleMessage(ctx context.Context, msg segmentkafka.Message) error {
var envelope kafkabus.Envelope
if err := json.Unmarshal(msg.Value, &envelope); err != nil {
_ = e.consumer.Commit(ctx, msg)
return fmt.Errorf("解析 Kafka 封装失败: %w", err)
}
if envelope.OutboxID <= 0 {
_ = e.consumer.Commit(ctx, msg)
return errors.New("Kafka 封装缺少 outbox_id")
}
eventType := strings.TrimSpace(envelope.EventType)
if eventType == "" {
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "消息缺少事件类型")
if err := e.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
runtimeServiceName := strings.TrimSpace(e.route.ServiceName)
if runtimeServiceName != "" {
messageServiceName := strings.TrimSpace(envelope.ServiceName)
if messageServiceName == "" {
if resolvedServiceName, ok := ResolveEventService(eventType); ok {
messageServiceName = resolvedServiceName
}
}
if messageServiceName == "" || messageServiceName != runtimeServiceName {
log.Printf(
"跳过非本服务事件: runtime_service=%s message_service=%s event_type=%s outbox_id=%d",
runtimeServiceName,
messageServiceName,
eventType,
envelope.OutboxID,
)
if err := e.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
}
handler, ok := e.getHandler(eventType)
if !ok {
if runtimeServiceName == "" {
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "未知事件类型: "+eventType)
} else {
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "本服务未注册 handler: "+eventType)
}
if err := e.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
if err := handler(ctx, envelope); err != nil {
if markErr := e.repo.MarkFailedForRetry(ctx, envelope.OutboxID, "消费处理失败: "+err.Error()); markErr != nil {
return markErr
}
if commitErr := e.consumer.Commit(ctx, msg); commitErr != nil {
return commitErr
}
return err
}
return e.consumer.Commit(ctx, msg)
}
func resolveEngineRoute(repo *Repository, cfg kafkabus.Config) ServiceRoute {
route := ServiceRoute{
ServiceName: strings.TrimSpace(cfg.ServiceName),
Topic: strings.TrimSpace(cfg.Topic),
GroupID: strings.TrimSpace(cfg.GroupID),
}
if repo != nil {
repoRoute := repo.route
if route.ServiceName == "" {
route.ServiceName = strings.TrimSpace(repoRoute.ServiceName)
}
if route.TableName == "" && strings.TrimSpace(repoRoute.TableName) != "" {
route.TableName = strings.TrimSpace(repoRoute.TableName)
}
if route.Topic == "" && strings.TrimSpace(repoRoute.Topic) != "" {
route.Topic = strings.TrimSpace(repoRoute.Topic)
}
if route.GroupID == "" && strings.TrimSpace(repoRoute.GroupID) != "" {
route.GroupID = strings.TrimSpace(repoRoute.GroupID)
}
}
if route.ServiceName != "" {
defaultRoute := DefaultServiceRoute(route.ServiceName)
if route.TableName == "" {
route.TableName = defaultRoute.TableName
}
if route.Topic == "" {
route.Topic = defaultRoute.Topic
}
if route.GroupID == "" {
route.GroupID = defaultRoute.GroupID
}
return normalizeServiceRoute(route)
}
if route.TableName == "" {
route.TableName = DefaultServiceRoute(ServiceNameAgent).TableName
}
if route.Topic == "" {
route.Topic = kafkabus.DefaultTopic
}
if route.GroupID == "" {
route.GroupID = kafkabus.DefaultGroup
}
return normalizeServiceRoute(route)
}

View File

@@ -0,0 +1,177 @@
package outbox
import (
"context"
"errors"
"fmt"
"strings"
"sync"
kafkabus "github.com/LoveLosita/smartflow/backend/shared/infra/kafka"
)
// EventPublisher 是通用事件发布能力接口。
type EventPublisher interface {
Publish(ctx context.Context, req PublishRequest) error
}
// EventBus 是 outbox 多服务引擎的门面。
//
// 职责边界:
// 1. 对外只暴露“发布、注册 handler、启动、关闭”四类能力
// 2. 内部按事件归属把调用路由到对应 service engine
// 3. 不再把共享 topic 当主路径,服务级路由始终优先。
type EventBus struct {
repo *Repository
cfg kafkabus.Config
mu sync.RWMutex
engines map[string]*Engine
}
// NewEventBus 创建多服务事件门面。
//
// 说明:
// 1. kafka.enabled=false 时返回 nil调用方可直接降级
// 2. 实际 service engine 在需要时按服务目录懒加载;
// 3. 懒加载不会改变既有事件契约,只是把物理资源拆到各自服务。
func NewEventBus(repo *Repository, cfg kafkabus.Config) (*EventBus, error) {
if !cfg.Enabled {
return nil, nil
}
if repo == nil {
return nil, errors.New("outbox repository is nil")
}
return &EventBus{
repo: repo,
cfg: cfg,
engines: make(map[string]*Engine),
}, nil
}
// RegisterEventHandler 注册事件处理器。
func (b *EventBus) RegisterEventHandler(eventType string, handler MessageHandler) error {
if b == nil {
return errors.New("event bus is not initialized")
}
route, err := b.routeForEvent(eventType)
if err != nil {
return err
}
engine, err := b.ensureEngine(route)
if err != nil {
return err
}
return engine.RegisterEventHandler(eventType, handler)
}
// Publish 把事件路由到对应服务的 outbox 表与 Kafka 资源。
func (b *EventBus) Publish(ctx context.Context, req PublishRequest) error {
if b == nil {
return errors.New("event bus is not initialized")
}
route, err := b.routeForEvent(req.EventType)
if err != nil {
return err
}
engine, err := b.ensureEngine(route)
if err != nil {
return err
}
return engine.Publish(ctx, req)
}
// Start 启动所有已创建的 service engine。
func (b *EventBus) Start(ctx context.Context) {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
go engine.Start(ctx)
}
}
// StartDispatch 只启动所有已创建 engine 的 dispatch 循环。
func (b *EventBus) StartDispatch(ctx context.Context) {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
go engine.StartDispatch(ctx)
}
}
// StartConsume 只启动所有已创建 engine 的消费循环。
func (b *EventBus) StartConsume(ctx context.Context) {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
go engine.StartConsume(ctx)
}
}
// Close 关闭所有 service engine 的 Kafka 资源。
func (b *EventBus) Close() {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
engine.Close()
}
}
func (b *EventBus) routeForEvent(eventType string) (ServiceRoute, error) {
route, ok := ResolveEventRoute(eventType)
if !ok {
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", strings.TrimSpace(eventType))
}
return route, nil
}
func (b *EventBus) ensureEngine(route ServiceRoute) (*Engine, error) {
serviceName := route.ServiceName
if serviceName == "" {
return nil, errors.New("serviceName is empty")
}
b.mu.RLock()
if engine, ok := b.engines[serviceName]; ok {
b.mu.RUnlock()
return engine, nil
}
b.mu.RUnlock()
b.mu.Lock()
defer b.mu.Unlock()
if engine, ok := b.engines[serviceName]; ok {
return engine, nil
}
cfg := b.cfg
cfg.ServiceName = serviceName
cfg.Topic = route.Topic
cfg.GroupID = route.GroupID
engine, err := NewEngine(b.repo, cfg)
if err != nil {
return nil, err
}
if engine == nil {
return nil, nil
}
b.engines[serviceName] = engine
return engine, nil
}
func (b *EventBus) snapshotEngines() []*Engine {
b.mu.RLock()
defer b.mu.RUnlock()
engines := make([]*Engine, 0, len(b.engines))
for _, engine := range b.engines {
engines = append(engines, engine)
}
return engines
}

View File

@@ -0,0 +1,64 @@
package outbox
import (
"encoding/json"
"errors"
"strings"
)
const (
// DefaultEventVersion 是通用事件协议默认版本。
DefaultEventVersion = "v1"
)
// OutboxEventPayload 是 outbox.payload 的统一事件外壳。
type OutboxEventPayload struct {
EventID string `json:"event_id,omitempty"`
EventType string `json:"event_type"`
EventVersion string `json:"event_version,omitempty"`
AggregateID string `json:"aggregate_id,omitempty"`
Payload json.RawMessage `json:"payload"`
}
// ParsedOutboxEventPayload 是 dispatch 阶段使用的标准化结构。
type ParsedOutboxEventPayload struct {
EventID string
EventType string
EventVersion string
AggregateID string
PayloadJSON json.RawMessage
}
// parseOutboxEventPayload 解析 outbox.payload。
//
// 当前策略(极致清理版):
// 1. 只接受“统一事件外壳”格式;
// 2. 不再支持旧格式纯业务 JSON 回退;
// 3. event_type 缺失时直接报错,交由上层标 dead。
func parseOutboxEventPayload(rawPayload string) (*ParsedOutboxEventPayload, error) {
var wrapped OutboxEventPayload
if err := json.Unmarshal([]byte(rawPayload), &wrapped); err != nil {
return nil, err
}
eventType := strings.TrimSpace(wrapped.EventType)
if eventType == "" {
return nil, errors.New("event type is empty")
}
if len(wrapped.Payload) == 0 {
return nil, errors.New("payload is empty")
}
eventVersion := strings.TrimSpace(wrapped.EventVersion)
if eventVersion == "" {
eventVersion = DefaultEventVersion
}
return &ParsedOutboxEventPayload{
EventID: strings.TrimSpace(wrapped.EventID),
EventType: eventType,
EventVersion: eventVersion,
AggregateID: strings.TrimSpace(wrapped.AggregateID),
PayloadJSON: wrapped.Payload,
}, nil
}

View File

@@ -0,0 +1,327 @@
package outbox
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
// Repository 是 outbox 状态仓储。
// 职责边界:
// 1. 只负责 outbox 状态流转与通用事务编排;
// 2. 不负责聊天、任务、通知等具体业务语义;
// 3. 同一仓储实例只面向一个服务级 outbox 目录。
type Repository struct {
db *gorm.DB
route ServiceRoute
}
func NewRepository(db *gorm.DB) *Repository {
return &Repository{db: db}
}
// WithTx 使用外部事务句柄构造同事务仓储实例。
func (d *Repository) WithTx(tx *gorm.DB) *Repository {
if d == nil {
return &Repository{db: tx}
}
return &Repository{db: tx, route: d.route}
}
// WithRoute 使用指定服务路由构造仓储实例。
// 1. 只切换 outbox 物理目录,不改变业务事务语义;
// 2. 适合多个 service engine 共用同一 DB 连接时分别绑定各自 route
// 3. 保留 route 的 table/topic/group避免回落到共享 topic。
func (d *Repository) WithRoute(route ServiceRoute) *Repository {
route = normalizeServiceRoute(route)
if d == nil {
return &Repository{route: route}
}
return &Repository{db: d.db, route: route}
}
// CreateMessage 将事件写入 outbox。
// 1. 只接收 eventType、messageKey、payload 和 maxRetry不再允许业务侧显式传 topic
// 2. table/topic/group 统一由 eventType -> service -> route 解析,保证服务级路由是唯一入口;
// 3. eventType 未注册时直接返回 error避免消息静默落到默认表或默认 topic。
func (d *Repository) CreateMessage(ctx context.Context, eventType string, messageKey string, payload any, maxRetry int) (int64, error) {
if d == nil || d.db == nil {
return 0, errors.New("outbox repository is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return 0, errors.New("eventType is empty")
}
messageKey = strings.TrimSpace(messageKey)
if maxRetry <= 0 {
maxRetry = 20
}
route, err := d.resolvePublishRoute(eventType)
if err != nil {
return 0, err
}
raw, err := json.Marshal(payload)
if err != nil {
return 0, err
}
now := time.Now()
msg := model.AgentOutboxMessage{
EventType: eventType,
ServiceName: route.ServiceName,
Topic: route.Topic,
MessageKey: messageKey,
Payload: string(raw),
Status: model.OutboxStatusPending,
RetryCount: 0,
MaxRetry: maxRetry,
NextRetryAt: &now,
}
if err = d.db.WithContext(ctx).Table(route.TableName).Create(&msg).Error; err != nil {
return 0, err
}
return msg.ID, nil
}
// GetByID 从当前仓储绑定的 outbox 表读取指定消息。
func (d *Repository) GetByID(ctx context.Context, id int64) (*model.AgentOutboxMessage, error) {
var msg model.AgentOutboxMessage
if err := d.scopedDB(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
return nil, err
}
return &msg, nil
}
// ListDueMessages 拉取到期可投递消息。
// 1. serviceName 为空时保持当前仓储目录内的扫描语义;
// 2. serviceName 非空时只扫描对应服务的消息;
// 3. 这样既能支持单服务 relay也能支持后续多服务 relay。
func (d *Repository) ListDueMessages(ctx context.Context, serviceName string, limit int) ([]model.AgentOutboxMessage, error) {
if limit <= 0 {
limit = 100
}
now := time.Now()
var messages []model.AgentOutboxMessage
query := d.scopedDB(ctx).
Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.OutboxStatusPending, now).
Order("next_retry_at ASC, id ASC").
Limit(limit)
serviceName = strings.TrimSpace(serviceName)
if serviceName != "" {
query = query.Where("service_name = ?", serviceName)
}
if err := query.Find(&messages).Error; err != nil {
return nil, err
}
return messages, nil
}
// MarkPublished 标记消息已经成功投递到 Kafka。
func (d *Repository) MarkPublished(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusPublished,
"published_at": &now,
"last_error": nil,
"next_retry_at": nil,
}
result := d.scopedDB(ctx).
Model(&model.AgentOutboxMessage{}).
Where("id = ? AND status NOT IN (?, ?)", id, model.OutboxStatusConsumed, model.OutboxStatusDead).
Updates(updates)
return result.Error
}
// MarkConsumed 标记消息已经在处理侧成功完成。
func (d *Repository) MarkConsumed(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
// MarkDead 将消息标记为死信。
func (d *Repository) MarkDead(ctx context.Context, id int64, reason string) error {
now := time.Now()
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": model.OutboxStatusDead,
"last_error": &lastErr,
"next_retry_at": nil,
"updated_at": now,
}
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
// MarkFailedForRetry 记录一次可重试失败并推进重试窗口。
// 1. 行级锁读取当前消息状态;
// 2. consumed/dead 状态直接短路;
// 3. retry_count + 1并根据最大次数决定继续 pending 还是转 dead
// 4. 写回 last_error 与 next_retry_at交给下一轮扫描继续投递。
func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason string) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var msg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
if err != nil {
return err
}
if msg.Status == model.OutboxStatusConsumed || msg.Status == model.OutboxStatusDead {
return nil
}
nextRetryCount := msg.RetryCount + 1
now := time.Now()
status := model.OutboxStatusPending
var nextRetryAt *time.Time
if nextRetryCount >= msg.MaxRetry {
status = model.OutboxStatusDead
nextRetryAt = nil
} else {
t := now.Add(calcRetryBackoff(nextRetryCount))
nextRetryAt = &t
}
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": status,
"retry_count": nextRetryCount,
"last_error": &lastErr,
"next_retry_at": nextRetryAt,
"updated_at": now,
}
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
})
}
// ConsumeAndMarkConsumed 是通用“消费成功事务入口”。
// 1. 在事务内锁定 outbox 记录;
// 2. consumed/dead 状态直接返回;
// 3. 执行业务回调 fn(tx),让业务落库和 outbox 状态共享同一事务;
// 4. 业务成功后统一标记 consumed。
func (d *Repository) ConsumeAndMarkConsumed(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
if fn != nil {
if err = fn(tx); err != nil {
return err
}
}
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
})
}
// ConsumeInTx 执行 outbox 业务事务,但不负责标记 consumed。
// 1. 先锁定当前 outbox 记录,避免并发消费者同时处理同一条消息;
// 2. 只要业务函数返回错误,就保持消息为 pending交给上层 retry
// 3. 业务成功后再由上层单独标记 consumed这样可以把远端 RPC 移到事务外。
func (d *Repository) ConsumeInTx(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
if fn != nil {
if err = fn(tx); err != nil {
return err
}
}
return nil
})
}
func (d *Repository) scopedDB(ctx context.Context) *gorm.DB {
return d.db.WithContext(ctx).Table(d.tableName())
}
func (d *Repository) tableName() string {
if d == nil {
return DefaultServiceRoute(ServiceNameAgent).TableName
}
route := normalizeServiceRoute(d.route)
if route.TableName != "" {
return route.TableName
}
return DefaultServiceRoute(ServiceNameAgent).TableName
}
func (d *Repository) resolvePublishRoute(eventType string) (ServiceRoute, error) {
if d == nil {
return ServiceRoute{}, errors.New("outbox repository is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return ServiceRoute{}, errors.New("eventType is empty")
}
route, ok := ResolveEventRoute(eventType)
if !ok {
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", eventType)
}
if d.route.ServiceName != "" && route.ServiceName != d.route.ServiceName {
return ServiceRoute{}, fmt.Errorf("eventType %s belongs to service %s, current repo service %s", eventType, route.ServiceName, d.route.ServiceName)
}
return normalizeServiceRoute(route), nil
}
func calcRetryBackoff(retryCount int) time.Duration {
if retryCount <= 0 {
return time.Second
}
if retryCount > 10 {
retryCount = 10
}
return time.Duration(retryCount*retryCount) * time.Second
}
func truncateError(reason string) string {
if len(reason) <= 2000 {
return reason
}
return reason[:2000]
}

View File

@@ -0,0 +1,136 @@
package outbox
import (
"errors"
"fmt"
"strings"
"sync"
)
var outboxRouteRegistry = struct {
sync.RWMutex
eventToService map[string]string
serviceRoutes map[string]ServiceRoute
}{
eventToService: make(map[string]string),
serviceRoutes: make(map[string]ServiceRoute),
}
// RegisterServiceRoute 注册或覆盖某个服务的物理 outbox 路由。
//
// 职责边界:
// 1. 只登记“服务 -> table/topic/group”目录不登记事件归属
// 2. 同服务重复注册时以后者覆盖前者,方便显式配置覆盖默认目录;
// 3. 空服务名直接报错,避免把共享 topic 误当成新终态。
func RegisterServiceRoute(route ServiceRoute) error {
route = normalizeServiceRoute(route)
if route.ServiceName == "" {
return errors.New("serviceName is empty")
}
outboxRouteRegistry.Lock()
defer outboxRouteRegistry.Unlock()
outboxRouteRegistry.serviceRoutes[route.ServiceName] = route
return nil
}
// RegisterEventService 记录“事件类型 -> 服务归属”的全局路由。
//
// 职责边界:
// 1. 只登记跨进程都要识别的事件归属,不承载 handler 逻辑;
// 2. 同一 event_type 只能归属一个服务,重复登记同值视为幂等;
// 3. 若该服务还没有显式路由,则先写入默认服务目录,保证后续能查到 table/topic/group。
func RegisterEventService(eventType, serviceName string) error {
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return errors.New("eventType is empty")
}
serviceName = normalizeServiceName(serviceName)
if serviceName == "" {
return errors.New("serviceName is empty")
}
outboxRouteRegistry.Lock()
defer outboxRouteRegistry.Unlock()
if existing, ok := outboxRouteRegistry.eventToService[eventType]; ok {
if existing != serviceName {
return fmt.Errorf("eventType %s already registered to service %s", eventType, existing)
}
return nil
}
outboxRouteRegistry.eventToService[eventType] = serviceName
return nil
}
// ResolveEventService 查询某个事件类型的归属服务。
//
// 返回值说明:
// 1. serviceName 为登记结果;
// 2. ok=false 表示当前路由表里还没有这个事件类型的归属信息。
func ResolveEventService(eventType string) (serviceName string, ok bool) {
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return "", false
}
outboxRouteRegistry.RLock()
defer outboxRouteRegistry.RUnlock()
serviceName, ok = outboxRouteRegistry.eventToService[eventType]
return serviceName, ok
}
// ResolveServiceRoute 查询某个服务的物理 outbox 配置。
//
// 返回值说明:
// 1. route 始终返回一个可执行的目录结果,未显式注册时回退默认目录;
// 2. ok=true 表示命中显式注册目录ok=false 表示走默认目录;
// 3. 这样既能支持显式配置覆盖,也能让基础设施在启动初期就有稳定默认值。
func ResolveServiceRoute(serviceName string) (route ServiceRoute, ok bool) {
serviceName = normalizeServiceName(serviceName)
if serviceName == "" {
return DefaultServiceRoute(""), false
}
outboxRouteRegistry.RLock()
route, ok = outboxRouteRegistry.serviceRoutes[serviceName]
outboxRouteRegistry.RUnlock()
if ok {
return normalizeServiceRoute(route), true
}
if route, ok = configuredServiceRoute(serviceName); ok {
return route, true
}
return DefaultServiceRoute(serviceName), false
}
// ResolveEventRoute 先按事件查服务,再按服务查物理目录。
//
// 返回值说明:
// 1. route 包含事件所在服务的 table/topic/group
// 2. ok=true 只表示“事件 -> 服务归属”已登记;
// 3. 服务目录若未显式注册,会自动回退到默认目录。
func ResolveEventRoute(eventType string) (route ServiceRoute, ok bool) {
serviceName, ok := ResolveEventService(eventType)
if !ok {
return ServiceRoute{}, false
}
route, _ = ResolveServiceRoute(serviceName)
return route, true
}
func configuredServiceRoute(serviceName string) (ServiceRoute, bool) {
cfg, ok := ResolveServiceConfig(serviceName)
if !ok {
return ServiceRoute{}, false
}
return normalizeServiceRoute(ServiceRoute{
ServiceName: cfg.Name,
TableName: cfg.TableName,
Topic: cfg.Topic,
GroupID: cfg.GroupID,
}), true
}

View File

@@ -0,0 +1,168 @@
package outbox
import (
"fmt"
"sort"
"strings"
"sync"
"github.com/spf13/viper"
)
const (
ServiceAgent = "agent"
ServiceTask = "task"
ServiceMemory = "memory"
ServiceActiveScheduler = "active-scheduler"
ServiceNotification = "notification"
)
// ServiceConfig 描述一个服务级 outbox 的固定归属。
//
// 职责边界:
// 1. 只描述“事件属于哪个服务、写哪张表、发哪个 topic、用哪个 group”。
// 2. 不承载具体业务 handler也不承载 Kafka 消息体格式。
// 3. 服务级写入、扫描和消费都应从这里读取同一份映射,避免配置漂移。
type ServiceConfig struct {
Name string
Topic string
GroupID string
TableName string
}
var serviceCatalogCache = struct {
sync.RWMutex
loaded bool
entries map[string]ServiceConfig
}{
entries: make(map[string]ServiceConfig),
}
// LoadServiceConfigs 读取服务级 outbox 目录。
//
// 说明:
// 1. 先给出默认终态映射,再允许通过配置中心覆盖 topic/groupID/table
// 2. 该目录只负责服务级 outbox 基础设施,不混入业务逻辑;
// 3. 若某个服务配置缺失,直接使用默认值,避免启动期因为非关键配置崩掉。
func LoadServiceConfigs() map[string]ServiceConfig {
serviceCatalogCache.Lock()
defer serviceCatalogCache.Unlock()
if serviceCatalogCache.loaded {
return cloneServiceConfigs(serviceCatalogCache.entries)
}
entries := map[string]ServiceConfig{
ServiceAgent: {
Name: ServiceAgent,
Topic: "smartflow.agent.outbox",
GroupID: "smartflow-agent-outbox-consumer",
TableName: "agent_outbox_messages",
},
ServiceTask: {
Name: ServiceTask,
Topic: "smartflow.task.outbox",
GroupID: "smartflow-task-outbox-consumer",
TableName: "task_outbox_messages",
},
ServiceMemory: {
Name: ServiceMemory,
Topic: "smartflow.memory.outbox",
GroupID: "smartflow-memory-outbox-consumer",
TableName: "memory_outbox_messages",
},
ServiceActiveScheduler: {
Name: ServiceActiveScheduler,
Topic: "smartflow.active-scheduler.outbox",
GroupID: "smartflow-active-scheduler-outbox-consumer",
TableName: "active_scheduler_outbox_messages",
},
ServiceNotification: {
Name: ServiceNotification,
Topic: "smartflow.notification.outbox",
GroupID: "smartflow-notification-outbox-consumer",
TableName: "notification_outbox_messages",
},
}
for name, entry := range entries {
entries[name] = overrideServiceConfig(entry)
}
serviceCatalogCache.entries = entries
serviceCatalogCache.loaded = true
return cloneServiceConfigs(entries)
}
// ResolveServiceConfig 查询某个服务的 outbox 目录。
func ResolveServiceConfig(serviceName string) (ServiceConfig, bool) {
serviceName = strings.TrimSpace(serviceName)
if serviceName == "" {
return ServiceConfig{}, false
}
entries := LoadServiceConfigs()
cfg, ok := entries[serviceName]
return cfg, ok
}
// ResolveEventServiceConfig 先解析事件归属服务,再返回该服务的 outbox 目录。
func ResolveEventServiceConfig(eventType string) (ServiceConfig, bool) {
serviceName, ok := ResolveEventService(eventType)
if !ok {
return ServiceConfig{}, false
}
return ResolveServiceConfig(serviceName)
}
// ServiceTables 返回当前目录中的所有 outbox 表名。
func ServiceTables() []string {
entries := LoadServiceConfigs()
tables := make([]string, 0, len(entries))
for _, entry := range entries {
tables = append(tables, entry.TableName)
}
sort.Strings(tables)
return tables
}
// ServiceNames 返回当前目录中的所有服务名。
func ServiceNames() []string {
entries := LoadServiceConfigs()
names := make([]string, 0, len(entries))
for name := range entries {
names = append(names, name)
}
sort.Strings(names)
return names
}
func overrideServiceConfig(entry ServiceConfig) ServiceConfig {
upperName := strings.TrimSpace(entry.Name)
if upperName == "" {
return entry
}
topicKey := fmt.Sprintf("outbox.services.%s.topic", upperName)
groupKey := fmt.Sprintf("outbox.services.%s.groupID", upperName)
tableKey := fmt.Sprintf("outbox.services.%s.table", upperName)
if topic := strings.TrimSpace(viper.GetString(topicKey)); topic != "" {
entry.Topic = topic
}
if groupID := strings.TrimSpace(viper.GetString(groupKey)); groupID != "" {
entry.GroupID = groupID
}
if tableName := strings.TrimSpace(viper.GetString(tableKey)); tableName != "" {
entry.TableName = tableName
}
return entry
}
func cloneServiceConfigs(entries map[string]ServiceConfig) map[string]ServiceConfig {
cloned := make(map[string]ServiceConfig, len(entries))
for name, entry := range entries {
cloned[name] = entry
}
return cloned
}

View File

@@ -0,0 +1,145 @@
package outbox
import (
"strings"
)
const (
ServiceNameAgent = "agent"
ServiceNameTask = "task"
ServiceNameMemory = "memory"
ServiceNameActiveScheduler = "active-scheduler"
ServiceNameNotification = "notification"
)
// ServiceRoute 描述一个 outbox 服务的终态路由信息。
//
// 职责边界:
// 1. 只承载服务级 outbox 的 table/topic/group 目录信息;
// 2. 不承载 handler、事务或 Kafka 连接对象;
// 3. 允许上层按事件类型先查服务,再由服务查到自己的物理资源。
type ServiceRoute struct {
ServiceName string
TableName string
Topic string
GroupID string
}
var builtinServiceRoutes = map[string]ServiceRoute{
ServiceNameAgent: {
ServiceName: ServiceNameAgent,
TableName: "agent_outbox_messages",
Topic: "smartflow.agent.outbox",
GroupID: "smartflow-agent-outbox-consumer",
},
ServiceNameTask: {
ServiceName: ServiceNameTask,
TableName: "task_outbox_messages",
Topic: "smartflow.task.outbox",
GroupID: "smartflow-task-outbox-consumer",
},
ServiceNameMemory: {
ServiceName: ServiceNameMemory,
TableName: "memory_outbox_messages",
Topic: "smartflow.memory.outbox",
GroupID: "smartflow-memory-outbox-consumer",
},
ServiceNameActiveScheduler: {
ServiceName: ServiceNameActiveScheduler,
TableName: "active_scheduler_outbox_messages",
Topic: "smartflow.active-scheduler.outbox",
GroupID: "smartflow-active-scheduler-outbox-consumer",
},
ServiceNameNotification: {
ServiceName: ServiceNameNotification,
TableName: "notification_outbox_messages",
Topic: "smartflow.notification.outbox",
GroupID: "smartflow-notification-outbox-consumer",
},
}
// DefaultServiceRoutes 返回当前已知服务的默认路由清单。
//
// 说明:
// 1. 这里是“目录初始值”,用于自动建表和首次注册时兜底;
// 2. 运行时若显式注册了服务路由,会以显式注册结果为准;
// 3. 返回值是拷贝,调用方可安全遍历,不会污染全局目录。
func DefaultServiceRoutes() []ServiceRoute {
return []ServiceRoute{
builtinServiceRoutes[ServiceNameAgent],
builtinServiceRoutes[ServiceNameTask],
builtinServiceRoutes[ServiceNameMemory],
builtinServiceRoutes[ServiceNameActiveScheduler],
builtinServiceRoutes[ServiceNameNotification],
}
}
// DefaultServiceRoute 根据服务名生成终态路由。
//
// 规则:
// 1. 已知服务直接返回约定映射;
// 2. 未知服务按命名约定生成 table/topic/group避免继续落回共享 topic
// 3. 空服务名回退到 agent 兼容路径,保住历史单体模式。
func DefaultServiceRoute(serviceName string) ServiceRoute {
serviceName = normalizeServiceName(serviceName)
if serviceName == "" {
serviceName = ServiceNameAgent
}
if route, ok := builtinServiceRoutes[serviceName]; ok {
return route
}
tablePrefix := strings.NewReplacer("-", "_").Replace(serviceName)
if tablePrefix == "" {
tablePrefix = ServiceNameAgent
}
return ServiceRoute{
ServiceName: serviceName,
TableName: tablePrefix + "_outbox_messages",
Topic: "smartflow." + serviceName + ".outbox",
GroupID: "smartflow-" + serviceName + "-outbox-consumer",
}
}
func normalizeServiceName(serviceName string) string {
return strings.TrimSpace(serviceName)
}
// normalizeServiceRoute 把空字段补成可执行的默认值。
//
// 说明:
// 1. 只做字符串裁剪和缺省补齐,不做注册副作用;
// 2. 服务名为空时只保留历史兼容路径,不强行把它当成新服务;
// 3. 这一步是 route 目录的最后一道兜底,避免上层拿到半成品路由。
func normalizeServiceRoute(route ServiceRoute) ServiceRoute {
route.ServiceName = normalizeServiceName(route.ServiceName)
route.TableName = strings.TrimSpace(route.TableName)
route.Topic = strings.TrimSpace(route.Topic)
route.GroupID = strings.TrimSpace(route.GroupID)
if route.ServiceName == "" {
if route.TableName == "" {
route.TableName = builtinServiceRoutes[ServiceNameAgent].TableName
}
if route.Topic == "" {
route.Topic = builtinServiceRoutes[ServiceNameAgent].Topic
}
if route.GroupID == "" {
route.GroupID = builtinServiceRoutes[ServiceNameAgent].GroupID
}
return route
}
defaultRoute := DefaultServiceRoute(route.ServiceName)
if route.TableName == "" {
route.TableName = defaultRoute.TableName
}
if route.Topic == "" {
route.Topic = defaultRoute.Topic
}
if route.GroupID == "" {
route.GroupID = defaultRoute.GroupID
}
return route
}