Version: 0.9.77.dev.260505

后端:
1.阶段 6 CP4/CP5 目录收口与共享边界纯化
- 将 backend 根目录收口为 services、client、gateway、cmd、shared 五个一级目录
- 收拢 bootstrap、inits、infra/kafka、infra/outbox、conv、respond、pkg、middleware,移除根目录旧实现与空目录
- 将 utils 下沉到 services/userauth/internal/auth,将 logic 下沉到 services/schedule/core/planning
- 将迁移期 runtime 桥接实现统一收拢到 services/runtime/{conv,dao,eventsvc,model},删除 shared/legacy 与未再被 import 的旧 service 实现
- 将 gateway/shared/respond 收口为 HTTP/Gin 错误写回适配,shared/respond 仅保留共享错误语义与状态映射
- 将 HTTP IdempotencyMiddleware 与 RateLimitMiddleware 收口到 gateway/middleware
- 将 GormCachePlugin 下沉到 shared/infra/gormcache,将共享 RateLimiter 下沉到 shared/infra/ratelimit,将 agent token budget 下沉到 services/agent/shared
- 删除 InitEino 兼容壳,收缩 cmd/internal/coreinit 仅保留旧组合壳残留域初始化语义
- 更新微服务迁移计划与桌面 checklist,补齐 CP4/CP5 当前切流点、目录终态与验证结果
- 完成 go test ./...、git diff --check 与最终真实 smoke;health、register/login、task/create+get、schedule/today、task-class/list、memory/items、agent chat/meta/timeline/context-stats 全部 200,SSE 合并结果为 CP5_OK 且 [DONE] 只有 1 个
This commit is contained in:
Losita
2026-05-05 23:25:07 +08:00
parent 2a96f4c6f9
commit 3b6fca44a6
226 changed files with 731 additions and 3497 deletions

View File

@@ -0,0 +1,26 @@
package bootstrap
import (
"fmt"
"log"
"github.com/spf13/viper"
)
// LoadConfig 统一加载后端进程配置。
//
// 职责边界:
// 1. 只负责把 config.yaml 读入 viper不解释具体业务配置语义。
// 2. 同时兼容从仓库根目录和 backend 目录启动的两种路径。
// 3. 失败时返回 error由各进程入口决定是否退出。
func LoadConfig() error {
viper.SetConfigName("config")
viper.SetConfigType("yaml")
viper.AddConfigPath(".")
viper.AddConfigPath("backend")
if err := viper.ReadInConfig(); err != nil {
return fmt.Errorf("failed to read config file: %w", err)
}
log.Println("Config loaded successfully")
return nil
}

View File

@@ -0,0 +1,56 @@
package einoinfra
import (
"context"
"os"
"github.com/cloudwego/eino-ext/components/model/ark"
"github.com/spf13/viper"
)
// AIHub 承载当前进程内复用的 Ark ChatModel 句柄。
type AIHub struct {
Lite *ark.ChatModel
Pro *ark.ChatModel
Max *ark.ChatModel
}
// InitEino 按统一配置创建当前进程复用的 Eino 模型句柄。
func InitEino() (*AIHub, error) {
ctx := context.Background()
baseURL := viper.GetString("agent.baseURL")
apiKey := os.Getenv("ARK_API_KEY")
lite, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
Model: viper.GetString("agent.liteModel"),
BaseURL: baseURL,
APIKey: apiKey,
})
if err != nil {
return nil, err
}
pro, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
Model: viper.GetString("agent.proModel"),
BaseURL: baseURL,
APIKey: apiKey,
})
if err != nil {
return nil, err
}
maxModel, err := ark.NewChatModel(ctx, &ark.ChatModelConfig{
Model: viper.GetString("agent.maxModel"),
BaseURL: baseURL,
APIKey: apiKey,
})
if err != nil {
return nil, err
}
return &AIHub{
Lite: lite,
Pro: pro,
Max: maxModel,
}, nil
}

View File

@@ -0,0 +1,171 @@
package gormcache
import (
"context"
"log"
"reflect"
"strings"
"github.com/LoveLosita/smartflow/backend/services/runtime/dao"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
"gorm.io/gorm"
)
// GormCachePlugin 负责在 GORM 写操作成功后,按模型类型触发对应缓存失效。
//
// 职责边界:
// 1. 只负责“识别模型 -> 调用对应缓存删除逻辑”;
// 2. 不负责业务事务提交,也不负责缓存回填;
// 3. 只处理当前项目真正依赖的前台读取缓存,未接缓存的模型应静默忽略。
type GormCachePlugin struct {
cacheDAO *dao.CacheDAO
}
func NewGormCachePlugin(dao *dao.CacheDAO) *GormCachePlugin {
return &GormCachePlugin{
cacheDAO: dao,
}
}
// Name 返回 GORM 插件名。
func (p *GormCachePlugin) Name() string {
return "GormCachePlugin"
}
// Initialize 注册 create/update/delete 成功后的统一失效钩子。
func (p *GormCachePlugin) Initialize(db *gorm.DB) error {
_ = db.Callback().Create().After("gorm:create").Register("clear_related_cache_after_create", p.afterWrite)
_ = db.Callback().Update().After("gorm:update").Register("clear_related_cache_after_update", p.afterWrite)
_ = db.Callback().Delete().After("gorm:delete").Register("clear_related_cache_after_delete", p.afterWrite)
return nil
}
func (p *GormCachePlugin) afterWrite(db *gorm.DB) {
if db.Error != nil || db.Statement.Schema == nil {
return
}
// 1. 先剥掉所有指针,拿到真实模型值。
// 2. 若本次写入的是切片,按“切片元素类型”分发缓存逻辑即可。
val := reflect.Indirect(reflect.ValueOf(db.Statement.Model))
if val.Kind() == reflect.Slice {
if val.Len() > 0 {
p.dispatchCacheLogic(val.Index(0).Interface())
}
return
}
p.dispatchCacheLogic(val.Interface())
}
// dispatchCacheLogic 根据模型类型决定是否需要缓存失效。
//
// 步骤说明:
// 1. 先匹配真正有前台缓存读取依赖的模型,命中后执行对应删除逻辑;
// 2. 对已确认“不需要缓存失效”的模型显式静默忽略,避免正常链路反复刷屏;
// 3. 只有未知模型才打印日志,方便后续补齐遗漏的缓存策略。
func (p *GormCachePlugin) dispatchCacheLogic(modelObj interface{}) {
switch m := modelObj.(type) {
case model.Schedule:
p.invalidScheduleCache(m.UserID, m.Week)
case model.TaskClass:
p.invalidTaskClassCache(*m.UserID)
case model.Task:
p.invalidTaskCache(m.UserID)
case model.AgentScheduleState:
p.invalidSchedulePlanPreviewCache(m.UserID, m.ConversationID)
case model.MemoryItem:
// 1. 管理面删除/修改/恢复/新增记忆时,自动失效该用户所有会话的预取缓存;
// 2. repo 方法通过 Model(&model.MemoryItem{UserID: userID}) 携带 userID
// 此处从模型实例中提取 UserID 进行精准失效;
// 3. 若 UserID 为 0无 userID 参数的 repo 方法invalidMemoryPrefetchCache 内部守卫会直接跳过。
p.invalidMemoryPrefetchCache(m.UserID)
case model.AgentOutboxMessage,
model.ChatHistory,
model.AgentChat,
model.AgentTimelineEvent,
model.AgentStateSnapshotRecord,
model.MemoryJob,
model.MemoryAuditLog,
model.MemoryUserSetting:
// 这些模型当前没有前台缓存读取链路依赖,故意静默忽略。
return
default:
log.Printf("[GORM-Cache] No logic defined for model: %T", modelObj)
}
}
func (p *GormCachePlugin) invalidScheduleCache(userID int, week int) {
if userID == 0 || week == 0 {
return
}
// 1. 异步删除缓存,避免阻塞主事务提交。
// 2. 周视图变化后,同时清今天/最近完成/进行中缓存,保证口径一致。
go func() {
_ = p.cacheDAO.DeleteUserWeeklyScheduleFromCache(context.Background(), userID, week)
_ = p.cacheDAO.DeleteUserTodayScheduleFromCache(context.Background(), userID)
_ = p.cacheDAO.DeleteUserRecentCompletedSchedulesFromCache(context.Background(), userID)
_ = p.cacheDAO.DeleteUserOngoingScheduleFromCache(context.Background(), userID)
log.Printf("[GORM-Cache] Invalidated cache for user %d, week %d", userID, week)
}()
}
func (p *GormCachePlugin) invalidTaskClassCache(userID int) {
if userID == 0 {
return
}
go func() {
_ = p.cacheDAO.DeleteTaskClassList(context.Background(), userID)
log.Printf("[GORM-Cache] Invalidated task class list cache for user %d", userID)
}()
}
func (p *GormCachePlugin) invalidTaskCache(userID int) {
if userID == 0 {
return
}
go func() {
_ = p.cacheDAO.DeleteUserTasksFromCache(context.Background(), userID)
log.Printf("[GORM-Cache] Invalidated task list cache for user %d", userID)
}()
}
func (p *GormCachePlugin) invalidSchedulePlanPreviewCache(userID int, conversationID string) {
normalizedConversationID := strings.TrimSpace(conversationID)
if userID == 0 || normalizedConversationID == "" {
return
}
go func() {
// 1. 排程快照被覆盖后,预览缓存必须同步删除,避免 Redis 里继续挂旧结果。
// 2. 删除失败只记日志,不影响主事务,因为缓存永远是可回源的副本。
if err := p.cacheDAO.DeleteSchedulePlanPreviewFromCache(context.Background(), userID, normalizedConversationID); err != nil {
log.Printf("[GORM-Cache] Failed to invalidate schedule preview cache for user %d conversation %s: %v", userID, normalizedConversationID, err)
return
}
log.Printf("[GORM-Cache] Invalidated schedule preview cache for user %d conversation %s", userID, normalizedConversationID)
}()
}
// invalidMemoryPrefetchCache 失效指定用户所有会话的记忆预取缓存。
//
// 步骤化说明:
// 1. 先守卫 userID==0无 userID 的 repo 方法(如 UpdateContentByID触发 callback 时直接跳过;
// 2. 异步调用 DeleteMemoryPrefetchCacheByUser按模式 smartflow:memory_prefetch:u:{userID}:c:* 批量删除;
// 3. 失败只记日志不阻塞主事务30 分钟 TTL 自然过期兜底。
func (p *GormCachePlugin) invalidMemoryPrefetchCache(userID int) {
if userID == 0 {
return
}
go func() {
if err := p.cacheDAO.DeleteMemoryPrefetchCacheByUser(context.Background(), userID); err != nil {
log.Printf("[GORM-Cache] Failed to invalidate memory prefetch cache for user %d: %v", userID, err)
return
}
log.Printf("[GORM-Cache] Invalidated memory prefetch cache for user %d", userID)
}()
}

View File

@@ -0,0 +1,83 @@
package kafka
import (
"context"
"errors"
"fmt"
"time"
segmentkafka "github.com/segmentio/kafka-go"
)
// WaitTopicReady 在指定超时时间内等待 Kafka topic 可用。
// 背景:初次部署时 broker 可能已启动,但 topic/partition 还没就绪。
// 这里启动前先探测,可减少“应用已启动但实际无法消费”的静默窗口。
func WaitTopicReady(parent context.Context, brokers []string, topic string, timeout time.Duration) error {
if len(brokers) == 0 {
return errors.New("kafka brokers is empty")
}
if topic == "" {
return errors.New("kafka topic is empty")
}
if timeout <= 0 {
timeout = 30 * time.Second
}
ctx, cancel := context.WithTimeout(parent, timeout)
defer cancel()
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
var lastErr error
for {
if err := probeTopic(ctx, brokers, topic); err == nil {
return nil
} else {
lastErr = err
}
select {
case <-ctx.Done():
if lastErr != nil {
return fmt.Errorf("wait topic ready timeout, topic=%s: %w", topic, lastErr)
}
return fmt.Errorf("wait topic ready timeout, topic=%s", topic)
case <-ticker.C:
}
}
}
// probeTopic 轮询所有 broker只要任一 broker 能读到 topic 分区信息即视为就绪。
func probeTopic(ctx context.Context, brokers []string, topic string) error {
var lastErr error
for _, broker := range brokers {
conn, err := segmentkafka.DialContext(ctx, "tcp", broker)
if err != nil {
lastErr = err
continue
}
// 1. segmentio/kafka-go 的 ReadPartitions 不直接接收 context。
// 2. 这里必须给底层连接设置 I/O deadline避免 broker 已接受连接但 metadata 响应卡住时,
// 上层 WaitTopicReady 永远阻塞,导致 outbox dispatch / consume 循环无法启动。
// 3. deadline 命中后本轮探测失败,外层 ticker 会继续重试直到总 timeout 到期。
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
partitions, readErr := conn.ReadPartitions(topic)
_ = conn.Close()
if readErr != nil {
lastErr = readErr
continue
}
if len(partitions) == 0 {
lastErr = fmt.Errorf("topic %s has no partitions yet", topic)
continue
}
return nil
}
if lastErr != nil {
return lastErr
}
return errors.New("unable to probe topic readiness")
}

View File

@@ -0,0 +1,69 @@
package kafka
import (
"strings"
"time"
"github.com/spf13/viper"
)
const (
DefaultTopic = "smartflow.agent.outbox"
DefaultGroup = "smartflow-agent-outbox-consumer"
)
// Config 描述 outbox 异步链路所需的 Kafka 配置。
// 说明这些参数同时影响“发送端producer”与“消费端consumer”。
type Config struct {
Enabled bool
Brokers []string
Topic string
GroupID string
// ServiceName 表示当前进程所属的 outbox 服务;为空时保持单体全量模式。
ServiceName string
// RetryScanInterval/RetryBatchSize/MaxRetry 作用于 outbox 扫描与失败重试。
RetryScanInterval time.Duration
RetryBatchSize int
MaxRetry int
}
// LoadConfig 从配置中心读取 Kafka 配置,并做兜底默认值。
// 兼容性:优先读取 kafka.brokers数组为空时降级读取 kafka.broker单值
func LoadConfig() Config {
brokers := viper.GetStringSlice("kafka.brokers")
if len(brokers) == 0 {
single := strings.TrimSpace(viper.GetString("kafka.broker"))
if single != "" {
brokers = []string{single}
}
}
cfg := Config{
Enabled: viper.GetBool("kafka.enabled"),
Brokers: brokers,
Topic: strings.TrimSpace(viper.GetString("kafka.topic")),
GroupID: strings.TrimSpace(viper.GetString("kafka.groupID")),
ServiceName: strings.TrimSpace(viper.GetString("outbox.serviceName")),
RetryScanInterval: viper.GetDuration("kafka.retryScanInterval"),
RetryBatchSize: viper.GetInt("kafka.retryBatchSize"),
MaxRetry: viper.GetInt("kafka.maxRetry"),
}
if cfg.ServiceName == "" {
cfg.ServiceName = strings.TrimSpace(viper.GetString("kafka.serviceName"))
}
if cfg.Topic == "" {
cfg.Topic = DefaultTopic
}
if cfg.GroupID == "" {
cfg.GroupID = DefaultGroup
}
if cfg.RetryScanInterval <= 0 {
cfg.RetryScanInterval = time.Second
}
if cfg.RetryBatchSize <= 0 {
cfg.RetryBatchSize = 100
}
if cfg.MaxRetry <= 0 {
cfg.MaxRetry = 20
}
return cfg
}

View File

@@ -0,0 +1,57 @@
package kafka
import (
"context"
"errors"
"strings"
segmentkafka "github.com/segmentio/kafka-go"
)
type Consumer struct {
reader *segmentkafka.Reader
}
func NewConsumer(cfg Config) (*Consumer, error) {
if len(cfg.Brokers) == 0 {
return nil, errors.New("kafka brokers not configured")
}
if strings.TrimSpace(cfg.Topic) == "" {
return nil, errors.New("kafka topic not configured")
}
if strings.TrimSpace(cfg.GroupID) == "" {
return nil, errors.New("kafka groupID not configured")
}
reader := segmentkafka.NewReader(segmentkafka.ReaderConfig{
Brokers: cfg.Brokers,
Topic: cfg.Topic,
GroupID: cfg.GroupID,
MinBytes: 1,
MaxBytes: 10e6,
CommitInterval: 0,
StartOffset: segmentkafka.FirstOffset,
})
return &Consumer{reader: reader}, nil
}
// Dequeue 从 Kafka 拉取一条消息(不自动提交 offset
func (c *Consumer) Dequeue(ctx context.Context) (segmentkafka.Message, error) {
if c == nil || c.reader == nil {
return segmentkafka.Message{}, errors.New("kafka consumer not initialized")
}
return c.reader.FetchMessage(ctx)
}
func (c *Consumer) Commit(ctx context.Context, msg segmentkafka.Message) error {
if c == nil || c.reader == nil {
return errors.New("kafka consumer not initialized")
}
return c.reader.CommitMessages(ctx, msg)
}
func (c *Consumer) Close() error {
if c == nil || c.reader == nil {
return nil
}
return c.reader.Close()
}

View File

@@ -0,0 +1,28 @@
package kafka
import "encoding/json"
// Envelope 是 outbox 投递到 Kafka 的统一协议包。
//
// 协议边界:
// 1. 这是总线协议,不包含具体业务字段;
// 2. 路由只依赖 event_type不再保留 biz_type 兼容字段;
// 3. payload 为原始业务 JSON由业务 handler 决定如何反序列化。
type Envelope struct {
// OutboxID 是 outbox 状态机主键,用于消费者回写 consumed/retry/dead。
OutboxID int64 `json:"outbox_id"`
// EventID 是事件唯一标识(当前默认回退为 outbox_id 字符串)。
EventID string `json:"event_id,omitempty"`
// EventType 是唯一路由键(例如 chat.history.persist.requested
EventType string `json:"event_type"`
// EventVersion 是事件版本号(默认 v1
EventVersion string `json:"event_version,omitempty"`
// ServiceName 是事件归属服务;空值通常表示旧兼容消息或全量模式。
ServiceName string `json:"service_name,omitempty"`
// AggregateID 是聚合主键(例如 conversation_id用于追踪同一业务对象事件流。
AggregateID string `json:"aggregate_id,omitempty"`
// Payload 是业务载荷 JSON。
Payload json.RawMessage `json:"payload"`
}

View File

@@ -0,0 +1,45 @@
package kafka
import (
"context"
"errors"
segmentkafka "github.com/segmentio/kafka-go"
)
type Producer struct {
writer *segmentkafka.Writer
}
func NewProducer(cfg Config) (*Producer, error) {
if len(cfg.Brokers) == 0 {
return nil, errors.New("kafka brokers 未配置")
}
writer := &segmentkafka.Writer{
Addr: segmentkafka.TCP(cfg.Brokers...),
Balancer: &segmentkafka.Hash{},
RequiredAcks: segmentkafka.RequireOne,
Async: false,
}
return &Producer{writer: writer}, nil
}
// Enqueue 将消息写入 Kafka。
func (p *Producer) Enqueue(ctx context.Context, topic, key string, value []byte) error {
if p == nil || p.writer == nil {
return errors.New("kafka producer 未初始化")
}
msg := segmentkafka.Message{
Topic: topic,
Key: []byte(key),
Value: value,
}
return p.writer.WriteMessages(ctx, msg)
}
func (p *Producer) Close() error {
if p == nil || p.writer == nil {
return nil
}
return p.writer.Close()
}

View File

@@ -0,0 +1,34 @@
package mysqlinfra
import (
"fmt"
"github.com/spf13/viper"
"gorm.io/driver/mysql"
"gorm.io/gorm"
)
// OpenDBFromConfig 只按统一配置创建 MySQL 连接。
//
// 职责边界:
// 1. 只负责把 viper 中的 database 配置转换成 *gorm.DB。
// 2. 不执行任何 AutoMigrate、回填或服务私有依赖检查。
// 3. 调用方负责决定连接属于哪个服务以及后续初始化动作。
func OpenDBFromConfig() (*gorm.DB, error) {
host := viper.GetString("database.host")
port := viper.GetString("database.port")
user := viper.GetString("database.user")
password := viper.GetString("database.password")
dbname := viper.GetString("database.dbname")
dsn := fmt.Sprintf(
"%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=True&loc=Local",
user, password, host, port, dbname,
)
db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
if err != nil {
return nil, err
}
return db, nil
}

View File

@@ -0,0 +1,487 @@
package outbox
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"strconv"
"strings"
"sync"
"time"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
kafkabus "github.com/LoveLosita/smartflow/backend/shared/infra/kafka"
segmentkafka "github.com/segmentio/kafka-go"
"gorm.io/gorm"
)
const defaultDispatchTimeout = 10 * time.Second
// MessageHandler 是事件消费处理器。
//
// 语义约束:
// 1. 入参 envelope 已完成最外层解析;
// 2. 返回 nil 表示处理成功,框架提交 offset
// 3. 返回 error 表示可重试失败,框架回写 retry 后提交 offset。
type MessageHandler func(ctx context.Context, envelope kafkabus.Envelope) error
// PublishRequest 是通用事件发布入参。
//
// 设计目标:
// 1. 业务只描述“要发什么事件”,不关心 outbox/kafka 细节;
// 2. 统一收敛事件元数据event_type/version/aggregate_id
// 3. payload 支持任意 DTO由 infra 统一 JSON 序列化。
type PublishRequest struct {
EventType string
EventVersion string
MessageKey string
AggregateID string
EventID string
Payload any
}
// Engine 是单个服务的 Outbox + Kafka 异步引擎。
//
// 职责边界:
// 1. 负责一个服务目录下的 outbox 扫描、Kafka 投递、Kafka 消费、状态机推进;
// 2. 负责 event_type -> handler 路由;
// 3. 不负责任何跨服务路由决策,跨服务分发由 EventBus 门面完成。
type Engine struct {
repo *Repository
producer *kafkabus.Producer
consumer *kafkabus.Consumer
brokers []string
route ServiceRoute
maxRetry int
scanEvery time.Duration
scanBatch int
handlersMu sync.RWMutex
handlers map[string]MessageHandler
}
// NewEngine 创建单服务异步引擎。
//
// 规则:
// 1. kafka.enabled=false 时返回 nil调用方可降级同步
// 2. serviceName 非空时优先使用服务级默认目录topic/group/table 不再沿用共享终态;
// 3. producer/consumer 任一步失败都会回收已创建资源。
func NewEngine(repo *Repository, cfg kafkabus.Config) (*Engine, error) {
if !cfg.Enabled {
return nil, nil
}
if repo == nil {
return nil, errors.New("outbox repository is nil")
}
route := resolveEngineRoute(repo, cfg)
cfg.Topic = route.Topic
cfg.GroupID = route.GroupID
serviceRepo := repo.WithRoute(route)
producer, err := kafkabus.NewProducer(cfg)
if err != nil {
return nil, err
}
consumer, err := kafkabus.NewConsumer(cfg)
if err != nil {
_ = producer.Close()
return nil, err
}
return &Engine{
repo: serviceRepo,
producer: producer,
consumer: consumer,
brokers: cfg.Brokers,
route: route,
maxRetry: cfg.MaxRetry,
scanEvery: cfg.RetryScanInterval,
scanBatch: cfg.RetryBatchSize,
handlers: make(map[string]MessageHandler),
}, nil
}
// RegisterHandler 是历史别名(等价 RegisterEventHandler
func (e *Engine) RegisterHandler(eventType string, handler MessageHandler) error {
return e.RegisterEventHandler(eventType, handler)
}
// RegisterEventHandler 注册事件处理器。
func (e *Engine) RegisterEventHandler(eventType string, handler MessageHandler) error {
if e == nil {
return errors.New("outbox engine is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return errors.New("eventType is empty")
}
if handler == nil {
return errors.New("handler is nil")
}
e.handlersMu.Lock()
defer e.handlersMu.Unlock()
if _, exists := e.handlers[eventType]; exists {
log.Printf("outbox handler 覆盖注册: service=%s event_type=%s", e.route.ServiceName, eventType)
}
e.handlers[eventType] = handler
return nil
}
func (e *Engine) getHandler(eventType string) (MessageHandler, bool) {
e.handlersMu.RLock()
defer e.handlersMu.RUnlock()
h, ok := e.handlers[eventType]
return h, ok
}
// Start 启动 dispatch + consume 两个后台循环。
func (e *Engine) Start(ctx context.Context) {
if e == nil {
return
}
log.Printf(
"outbox engine starting: service=%s table=%s topic=%s group=%s brokers=%v retry_scan=%s batch=%d",
e.route.ServiceName,
e.route.TableName,
e.route.Topic,
e.route.GroupID,
e.brokers,
e.scanEvery,
e.scanBatch,
)
// 1. dispatch 先启动,保证已到期的 outbox 不会被 topic 探测阻塞在 pending。
// 2. consume 仍等待 topic 探测,降低启动期消费者空转与 metadata 抖动。
// 3. 若探测失败,继续启动消费者;真实错误交给消费循环记录并由运维日志暴露。
e.StartDispatch(ctx)
if err := kafkabus.WaitTopicReady(ctx, e.brokers, e.route.Topic, 30*time.Second); err != nil {
log.Printf("Kafka topic not ready before consume loop start: %v", err)
} else {
log.Printf("Kafka topic is ready: %s", e.route.Topic)
}
e.StartConsume(ctx)
}
// StartDispatch 单独启动 outbox -> Kafka 的投递循环。
func (e *Engine) StartDispatch(ctx context.Context) {
if e == nil {
return
}
go e.startDispatchLoop(ctx)
}
// StartConsume 单独启动 Kafka -> handler 的消费循环。
func (e *Engine) StartConsume(ctx context.Context) {
if e == nil {
return
}
go e.startConsumeLoop(ctx)
}
// Close 关闭 kafka 资源。
func (e *Engine) Close() {
if e == nil {
return
}
if err := e.producer.Close(); err != nil {
log.Printf("关闭 Kafka producer 失败: %v", err)
}
if err := e.consumer.Close(); err != nil {
log.Printf("关闭 Kafka consumer 失败: %v", err)
}
}
// Enqueue 是历史别名(等价 Publish
func (e *Engine) Enqueue(ctx context.Context, eventType, messageKey string, payload any) error {
return e.Publish(ctx, PublishRequest{
EventType: eventType,
MessageKey: messageKey,
AggregateID: messageKey,
Payload: payload,
})
}
// Publish 发布事件到 outbox。
//
// 步骤:
// 1. 标准化 event_type/version/key
// 2. payload 序列化;
// 3. 写入当前服务的 outbox 表,不再由调用方手传 topic。
func (e *Engine) Publish(ctx context.Context, req PublishRequest) error {
if e == nil {
return errors.New("outbox engine is nil")
}
eventType := strings.TrimSpace(req.EventType)
if eventType == "" {
return errors.New("eventType is empty")
}
eventVersion := strings.TrimSpace(req.EventVersion)
if eventVersion == "" {
eventVersion = DefaultEventVersion
}
messageKey := strings.TrimSpace(req.MessageKey)
aggregateID := strings.TrimSpace(req.AggregateID)
if aggregateID == "" {
aggregateID = messageKey
}
payloadJSON, err := json.Marshal(req.Payload)
if err != nil {
return err
}
_, err = e.repo.CreateMessage(ctx, eventType, messageKey, OutboxEventPayload{
EventID: strings.TrimSpace(req.EventID),
EventType: eventType,
EventVersion: eventVersion,
AggregateID: aggregateID,
Payload: payloadJSON,
}, e.maxRetry)
return err
}
func (e *Engine) startDispatchLoop(ctx context.Context) {
ticker := time.NewTicker(e.scanEvery)
defer ticker.Stop()
log.Printf("outbox dispatch loop started: service=%s scan=%s batch=%d", e.route.ServiceName, e.scanEvery, e.scanBatch)
e.scanAndDispatchDueMessages(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
e.scanAndDispatchDueMessages(ctx)
}
}
}
func (e *Engine) scanAndDispatchDueMessages(ctx context.Context) {
// 1. 每轮只拉取当前服务到期消息,避免独立微服务误扫其它服务的 outbox 表。
// 2. 单条投递失败只记录并进入 retry不阻断本轮剩余消息。
// 3. 启动时也会执行一次本函数,避免重启后必须等待下一次 ticker 才能推进历史 pending。
pendingMessages, err := e.repo.ListDueMessages(ctx, e.route.ServiceName, e.scanBatch)
if err != nil {
log.Printf("扫描 outbox 失败: %v", err)
return
}
if len(pendingMessages) > 0 {
log.Printf("outbox due messages=%d, service=%s start dispatch", len(pendingMessages), e.route.ServiceName)
}
for _, msg := range pendingMessages {
if err = e.dispatchOne(ctx, msg.ID); err != nil {
log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err)
}
}
}
func (e *Engine) dispatchOne(ctx context.Context, outboxID int64) error {
outboxMsg, err := e.repo.GetByID(ctx, outboxID)
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
eventPayload, payloadErr := parseOutboxEventPayload(outboxMsg.Payload)
if payloadErr != nil {
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "解析 outbox 事件包失败: "+payloadErr.Error())
if markErr != nil {
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
}
return payloadErr
}
if eventPayload.EventID == "" {
eventPayload.EventID = strconv.FormatInt(outboxMsg.ID, 10)
}
serviceName := strings.TrimSpace(outboxMsg.ServiceName)
if serviceName == "" {
serviceName = e.route.ServiceName
}
envelope := kafkabus.Envelope{
OutboxID: outboxMsg.ID,
EventID: eventPayload.EventID,
EventType: eventPayload.EventType,
EventVersion: eventPayload.EventVersion,
ServiceName: serviceName,
AggregateID: eventPayload.AggregateID,
Payload: eventPayload.PayloadJSON,
}
raw, err := json.Marshal(envelope)
if err != nil {
markErr := e.repo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 封装失败: "+err.Error())
if markErr != nil {
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
}
return err
}
// 1. Kafka 写入使用单条超时,避免 broker/metadata 卡住时消息长期停留在 pending。
// 2. 超时失败后仍走统一 retry 状态机,由下一轮扫描继续补偿。
dispatchCtx, cancel := context.WithTimeout(ctx, defaultDispatchTimeout)
defer cancel()
if err = e.producer.Enqueue(dispatchCtx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil {
_ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "投递 Kafka 失败: "+err.Error())
return err
}
if err = e.repo.MarkPublished(ctx, outboxMsg.ID); err != nil {
_ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "更新已投递状态失败: "+err.Error())
return err
}
return nil
}
func (e *Engine) startConsumeLoop(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
default:
}
msg, err := e.consumer.Dequeue(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
return
}
log.Printf("Kafka 消费拉取失败(topic=%s): %v", e.route.Topic, err)
time.Sleep(300 * time.Millisecond)
continue
}
if err = e.handleMessage(ctx, msg); err != nil {
log.Printf("处理 Kafka 消息失败(topic=%s, partition=%d, offset=%d): %v", msg.Topic, msg.Partition, msg.Offset, err)
}
}
}
func (e *Engine) handleMessage(ctx context.Context, msg segmentkafka.Message) error {
var envelope kafkabus.Envelope
if err := json.Unmarshal(msg.Value, &envelope); err != nil {
_ = e.consumer.Commit(ctx, msg)
return fmt.Errorf("解析 Kafka 封装失败: %w", err)
}
if envelope.OutboxID <= 0 {
_ = e.consumer.Commit(ctx, msg)
return errors.New("Kafka 封装缺少 outbox_id")
}
eventType := strings.TrimSpace(envelope.EventType)
if eventType == "" {
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "消息缺少事件类型")
if err := e.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
runtimeServiceName := strings.TrimSpace(e.route.ServiceName)
if runtimeServiceName != "" {
messageServiceName := strings.TrimSpace(envelope.ServiceName)
if messageServiceName == "" {
if resolvedServiceName, ok := ResolveEventService(eventType); ok {
messageServiceName = resolvedServiceName
}
}
if messageServiceName == "" || messageServiceName != runtimeServiceName {
log.Printf(
"跳过非本服务事件: runtime_service=%s message_service=%s event_type=%s outbox_id=%d",
runtimeServiceName,
messageServiceName,
eventType,
envelope.OutboxID,
)
if err := e.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
}
handler, ok := e.getHandler(eventType)
if !ok {
if runtimeServiceName == "" {
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "未知事件类型: "+eventType)
} else {
_ = e.repo.MarkDead(ctx, envelope.OutboxID, "本服务未注册 handler: "+eventType)
}
if err := e.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
if err := handler(ctx, envelope); err != nil {
if markErr := e.repo.MarkFailedForRetry(ctx, envelope.OutboxID, "消费处理失败: "+err.Error()); markErr != nil {
return markErr
}
if commitErr := e.consumer.Commit(ctx, msg); commitErr != nil {
return commitErr
}
return err
}
return e.consumer.Commit(ctx, msg)
}
func resolveEngineRoute(repo *Repository, cfg kafkabus.Config) ServiceRoute {
route := ServiceRoute{
ServiceName: strings.TrimSpace(cfg.ServiceName),
Topic: strings.TrimSpace(cfg.Topic),
GroupID: strings.TrimSpace(cfg.GroupID),
}
if repo != nil {
repoRoute := repo.route
if route.ServiceName == "" {
route.ServiceName = strings.TrimSpace(repoRoute.ServiceName)
}
if route.TableName == "" && strings.TrimSpace(repoRoute.TableName) != "" {
route.TableName = strings.TrimSpace(repoRoute.TableName)
}
if route.Topic == "" && strings.TrimSpace(repoRoute.Topic) != "" {
route.Topic = strings.TrimSpace(repoRoute.Topic)
}
if route.GroupID == "" && strings.TrimSpace(repoRoute.GroupID) != "" {
route.GroupID = strings.TrimSpace(repoRoute.GroupID)
}
}
if route.ServiceName != "" {
defaultRoute := DefaultServiceRoute(route.ServiceName)
if route.TableName == "" {
route.TableName = defaultRoute.TableName
}
if route.Topic == "" {
route.Topic = defaultRoute.Topic
}
if route.GroupID == "" {
route.GroupID = defaultRoute.GroupID
}
return normalizeServiceRoute(route)
}
if route.TableName == "" {
route.TableName = DefaultServiceRoute(ServiceNameAgent).TableName
}
if route.Topic == "" {
route.Topic = kafkabus.DefaultTopic
}
if route.GroupID == "" {
route.GroupID = kafkabus.DefaultGroup
}
return normalizeServiceRoute(route)
}

View File

@@ -0,0 +1,177 @@
package outbox
import (
"context"
"errors"
"fmt"
"strings"
"sync"
kafkabus "github.com/LoveLosita/smartflow/backend/shared/infra/kafka"
)
// EventPublisher 是通用事件发布能力接口。
type EventPublisher interface {
Publish(ctx context.Context, req PublishRequest) error
}
// EventBus 是 outbox 多服务引擎的门面。
//
// 职责边界:
// 1. 对外只暴露“发布、注册 handler、启动、关闭”四类能力
// 2. 内部按事件归属把调用路由到对应 service engine
// 3. 不再把共享 topic 当主路径,服务级路由始终优先。
type EventBus struct {
repo *Repository
cfg kafkabus.Config
mu sync.RWMutex
engines map[string]*Engine
}
// NewEventBus 创建多服务事件门面。
//
// 说明:
// 1. kafka.enabled=false 时返回 nil调用方可直接降级
// 2. 实际 service engine 在需要时按服务目录懒加载;
// 3. 懒加载不会改变既有事件契约,只是把物理资源拆到各自服务。
func NewEventBus(repo *Repository, cfg kafkabus.Config) (*EventBus, error) {
if !cfg.Enabled {
return nil, nil
}
if repo == nil {
return nil, errors.New("outbox repository is nil")
}
return &EventBus{
repo: repo,
cfg: cfg,
engines: make(map[string]*Engine),
}, nil
}
// RegisterEventHandler 注册事件处理器。
func (b *EventBus) RegisterEventHandler(eventType string, handler MessageHandler) error {
if b == nil {
return errors.New("event bus is not initialized")
}
route, err := b.routeForEvent(eventType)
if err != nil {
return err
}
engine, err := b.ensureEngine(route)
if err != nil {
return err
}
return engine.RegisterEventHandler(eventType, handler)
}
// Publish 把事件路由到对应服务的 outbox 表与 Kafka 资源。
func (b *EventBus) Publish(ctx context.Context, req PublishRequest) error {
if b == nil {
return errors.New("event bus is not initialized")
}
route, err := b.routeForEvent(req.EventType)
if err != nil {
return err
}
engine, err := b.ensureEngine(route)
if err != nil {
return err
}
return engine.Publish(ctx, req)
}
// Start 启动所有已创建的 service engine。
func (b *EventBus) Start(ctx context.Context) {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
go engine.Start(ctx)
}
}
// StartDispatch 只启动所有已创建 engine 的 dispatch 循环。
func (b *EventBus) StartDispatch(ctx context.Context) {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
go engine.StartDispatch(ctx)
}
}
// StartConsume 只启动所有已创建 engine 的消费循环。
func (b *EventBus) StartConsume(ctx context.Context) {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
go engine.StartConsume(ctx)
}
}
// Close 关闭所有 service engine 的 Kafka 资源。
func (b *EventBus) Close() {
if b == nil {
return
}
for _, engine := range b.snapshotEngines() {
engine.Close()
}
}
func (b *EventBus) routeForEvent(eventType string) (ServiceRoute, error) {
route, ok := ResolveEventRoute(eventType)
if !ok {
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", strings.TrimSpace(eventType))
}
return route, nil
}
func (b *EventBus) ensureEngine(route ServiceRoute) (*Engine, error) {
serviceName := route.ServiceName
if serviceName == "" {
return nil, errors.New("serviceName is empty")
}
b.mu.RLock()
if engine, ok := b.engines[serviceName]; ok {
b.mu.RUnlock()
return engine, nil
}
b.mu.RUnlock()
b.mu.Lock()
defer b.mu.Unlock()
if engine, ok := b.engines[serviceName]; ok {
return engine, nil
}
cfg := b.cfg
cfg.ServiceName = serviceName
cfg.Topic = route.Topic
cfg.GroupID = route.GroupID
engine, err := NewEngine(b.repo, cfg)
if err != nil {
return nil, err
}
if engine == nil {
return nil, nil
}
b.engines[serviceName] = engine
return engine, nil
}
func (b *EventBus) snapshotEngines() []*Engine {
b.mu.RLock()
defer b.mu.RUnlock()
engines := make([]*Engine, 0, len(b.engines))
for _, engine := range b.engines {
engines = append(engines, engine)
}
return engines
}

View File

@@ -0,0 +1,64 @@
package outbox
import (
"encoding/json"
"errors"
"strings"
)
const (
// DefaultEventVersion 是通用事件协议默认版本。
DefaultEventVersion = "v1"
)
// OutboxEventPayload 是 outbox.payload 的统一事件外壳。
type OutboxEventPayload struct {
EventID string `json:"event_id,omitempty"`
EventType string `json:"event_type"`
EventVersion string `json:"event_version,omitempty"`
AggregateID string `json:"aggregate_id,omitempty"`
Payload json.RawMessage `json:"payload"`
}
// ParsedOutboxEventPayload 是 dispatch 阶段使用的标准化结构。
type ParsedOutboxEventPayload struct {
EventID string
EventType string
EventVersion string
AggregateID string
PayloadJSON json.RawMessage
}
// parseOutboxEventPayload 解析 outbox.payload。
//
// 当前策略(极致清理版):
// 1. 只接受“统一事件外壳”格式;
// 2. 不再支持旧格式纯业务 JSON 回退;
// 3. event_type 缺失时直接报错,交由上层标 dead。
func parseOutboxEventPayload(rawPayload string) (*ParsedOutboxEventPayload, error) {
var wrapped OutboxEventPayload
if err := json.Unmarshal([]byte(rawPayload), &wrapped); err != nil {
return nil, err
}
eventType := strings.TrimSpace(wrapped.EventType)
if eventType == "" {
return nil, errors.New("event type is empty")
}
if len(wrapped.Payload) == 0 {
return nil, errors.New("payload is empty")
}
eventVersion := strings.TrimSpace(wrapped.EventVersion)
if eventVersion == "" {
eventVersion = DefaultEventVersion
}
return &ParsedOutboxEventPayload{
EventID: strings.TrimSpace(wrapped.EventID),
EventType: eventType,
EventVersion: eventVersion,
AggregateID: strings.TrimSpace(wrapped.AggregateID),
PayloadJSON: wrapped.Payload,
}, nil
}

View File

@@ -0,0 +1,327 @@
package outbox
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
// Repository 是 outbox 状态仓储。
// 职责边界:
// 1. 只负责 outbox 状态流转与通用事务编排;
// 2. 不负责聊天、任务、通知等具体业务语义;
// 3. 同一仓储实例只面向一个服务级 outbox 目录。
type Repository struct {
db *gorm.DB
route ServiceRoute
}
func NewRepository(db *gorm.DB) *Repository {
return &Repository{db: db}
}
// WithTx 使用外部事务句柄构造同事务仓储实例。
func (d *Repository) WithTx(tx *gorm.DB) *Repository {
if d == nil {
return &Repository{db: tx}
}
return &Repository{db: tx, route: d.route}
}
// WithRoute 使用指定服务路由构造仓储实例。
// 1. 只切换 outbox 物理目录,不改变业务事务语义;
// 2. 适合多个 service engine 共用同一 DB 连接时分别绑定各自 route
// 3. 保留 route 的 table/topic/group避免回落到共享 topic。
func (d *Repository) WithRoute(route ServiceRoute) *Repository {
route = normalizeServiceRoute(route)
if d == nil {
return &Repository{route: route}
}
return &Repository{db: d.db, route: route}
}
// CreateMessage 将事件写入 outbox。
// 1. 只接收 eventType、messageKey、payload 和 maxRetry不再允许业务侧显式传 topic
// 2. table/topic/group 统一由 eventType -> service -> route 解析,保证服务级路由是唯一入口;
// 3. eventType 未注册时直接返回 error避免消息静默落到默认表或默认 topic。
func (d *Repository) CreateMessage(ctx context.Context, eventType string, messageKey string, payload any, maxRetry int) (int64, error) {
if d == nil || d.db == nil {
return 0, errors.New("outbox repository is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return 0, errors.New("eventType is empty")
}
messageKey = strings.TrimSpace(messageKey)
if maxRetry <= 0 {
maxRetry = 20
}
route, err := d.resolvePublishRoute(eventType)
if err != nil {
return 0, err
}
raw, err := json.Marshal(payload)
if err != nil {
return 0, err
}
now := time.Now()
msg := model.AgentOutboxMessage{
EventType: eventType,
ServiceName: route.ServiceName,
Topic: route.Topic,
MessageKey: messageKey,
Payload: string(raw),
Status: model.OutboxStatusPending,
RetryCount: 0,
MaxRetry: maxRetry,
NextRetryAt: &now,
}
if err = d.db.WithContext(ctx).Table(route.TableName).Create(&msg).Error; err != nil {
return 0, err
}
return msg.ID, nil
}
// GetByID 从当前仓储绑定的 outbox 表读取指定消息。
func (d *Repository) GetByID(ctx context.Context, id int64) (*model.AgentOutboxMessage, error) {
var msg model.AgentOutboxMessage
if err := d.scopedDB(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
return nil, err
}
return &msg, nil
}
// ListDueMessages 拉取到期可投递消息。
// 1. serviceName 为空时保持当前仓储目录内的扫描语义;
// 2. serviceName 非空时只扫描对应服务的消息;
// 3. 这样既能支持单服务 relay也能支持后续多服务 relay。
func (d *Repository) ListDueMessages(ctx context.Context, serviceName string, limit int) ([]model.AgentOutboxMessage, error) {
if limit <= 0 {
limit = 100
}
now := time.Now()
var messages []model.AgentOutboxMessage
query := d.scopedDB(ctx).
Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.OutboxStatusPending, now).
Order("next_retry_at ASC, id ASC").
Limit(limit)
serviceName = strings.TrimSpace(serviceName)
if serviceName != "" {
query = query.Where("service_name = ?", serviceName)
}
if err := query.Find(&messages).Error; err != nil {
return nil, err
}
return messages, nil
}
// MarkPublished 标记消息已经成功投递到 Kafka。
func (d *Repository) MarkPublished(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusPublished,
"published_at": &now,
"last_error": nil,
"next_retry_at": nil,
}
result := d.scopedDB(ctx).
Model(&model.AgentOutboxMessage{}).
Where("id = ? AND status NOT IN (?, ?)", id, model.OutboxStatusConsumed, model.OutboxStatusDead).
Updates(updates)
return result.Error
}
// MarkConsumed 标记消息已经在处理侧成功完成。
func (d *Repository) MarkConsumed(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
// MarkDead 将消息标记为死信。
func (d *Repository) MarkDead(ctx context.Context, id int64, reason string) error {
now := time.Now()
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": model.OutboxStatusDead,
"last_error": &lastErr,
"next_retry_at": nil,
"updated_at": now,
}
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
// MarkFailedForRetry 记录一次可重试失败并推进重试窗口。
// 1. 行级锁读取当前消息状态;
// 2. consumed/dead 状态直接短路;
// 3. retry_count + 1并根据最大次数决定继续 pending 还是转 dead
// 4. 写回 last_error 与 next_retry_at交给下一轮扫描继续投递。
func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason string) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var msg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
if err != nil {
return err
}
if msg.Status == model.OutboxStatusConsumed || msg.Status == model.OutboxStatusDead {
return nil
}
nextRetryCount := msg.RetryCount + 1
now := time.Now()
status := model.OutboxStatusPending
var nextRetryAt *time.Time
if nextRetryCount >= msg.MaxRetry {
status = model.OutboxStatusDead
nextRetryAt = nil
} else {
t := now.Add(calcRetryBackoff(nextRetryCount))
nextRetryAt = &t
}
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": status,
"retry_count": nextRetryCount,
"last_error": &lastErr,
"next_retry_at": nextRetryAt,
"updated_at": now,
}
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
})
}
// ConsumeAndMarkConsumed 是通用“消费成功事务入口”。
// 1. 在事务内锁定 outbox 记录;
// 2. consumed/dead 状态直接返回;
// 3. 执行业务回调 fn(tx),让业务落库和 outbox 状态共享同一事务;
// 4. 业务成功后统一标记 consumed。
func (d *Repository) ConsumeAndMarkConsumed(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
if fn != nil {
if err = fn(tx); err != nil {
return err
}
}
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
})
}
// ConsumeInTx 执行 outbox 业务事务,但不负责标记 consumed。
// 1. 先锁定当前 outbox 记录,避免并发消费者同时处理同一条消息;
// 2. 只要业务函数返回错误,就保持消息为 pending交给上层 retry
// 3. 业务成功后再由上层单独标记 consumed这样可以把远端 RPC 移到事务外。
func (d *Repository) ConsumeInTx(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
if fn != nil {
if err = fn(tx); err != nil {
return err
}
}
return nil
})
}
func (d *Repository) scopedDB(ctx context.Context) *gorm.DB {
return d.db.WithContext(ctx).Table(d.tableName())
}
func (d *Repository) tableName() string {
if d == nil {
return DefaultServiceRoute(ServiceNameAgent).TableName
}
route := normalizeServiceRoute(d.route)
if route.TableName != "" {
return route.TableName
}
return DefaultServiceRoute(ServiceNameAgent).TableName
}
func (d *Repository) resolvePublishRoute(eventType string) (ServiceRoute, error) {
if d == nil {
return ServiceRoute{}, errors.New("outbox repository is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return ServiceRoute{}, errors.New("eventType is empty")
}
route, ok := ResolveEventRoute(eventType)
if !ok {
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", eventType)
}
if d.route.ServiceName != "" && route.ServiceName != d.route.ServiceName {
return ServiceRoute{}, fmt.Errorf("eventType %s belongs to service %s, current repo service %s", eventType, route.ServiceName, d.route.ServiceName)
}
return normalizeServiceRoute(route), nil
}
func calcRetryBackoff(retryCount int) time.Duration {
if retryCount <= 0 {
return time.Second
}
if retryCount > 10 {
retryCount = 10
}
return time.Duration(retryCount*retryCount) * time.Second
}
func truncateError(reason string) string {
if len(reason) <= 2000 {
return reason
}
return reason[:2000]
}

View File

@@ -0,0 +1,136 @@
package outbox
import (
"errors"
"fmt"
"strings"
"sync"
)
var outboxRouteRegistry = struct {
sync.RWMutex
eventToService map[string]string
serviceRoutes map[string]ServiceRoute
}{
eventToService: make(map[string]string),
serviceRoutes: make(map[string]ServiceRoute),
}
// RegisterServiceRoute 注册或覆盖某个服务的物理 outbox 路由。
//
// 职责边界:
// 1. 只登记“服务 -> table/topic/group”目录不登记事件归属
// 2. 同服务重复注册时以后者覆盖前者,方便显式配置覆盖默认目录;
// 3. 空服务名直接报错,避免把共享 topic 误当成新终态。
func RegisterServiceRoute(route ServiceRoute) error {
route = normalizeServiceRoute(route)
if route.ServiceName == "" {
return errors.New("serviceName is empty")
}
outboxRouteRegistry.Lock()
defer outboxRouteRegistry.Unlock()
outboxRouteRegistry.serviceRoutes[route.ServiceName] = route
return nil
}
// RegisterEventService 记录“事件类型 -> 服务归属”的全局路由。
//
// 职责边界:
// 1. 只登记跨进程都要识别的事件归属,不承载 handler 逻辑;
// 2. 同一 event_type 只能归属一个服务,重复登记同值视为幂等;
// 3. 若该服务还没有显式路由,则先写入默认服务目录,保证后续能查到 table/topic/group。
func RegisterEventService(eventType, serviceName string) error {
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return errors.New("eventType is empty")
}
serviceName = normalizeServiceName(serviceName)
if serviceName == "" {
return errors.New("serviceName is empty")
}
outboxRouteRegistry.Lock()
defer outboxRouteRegistry.Unlock()
if existing, ok := outboxRouteRegistry.eventToService[eventType]; ok {
if existing != serviceName {
return fmt.Errorf("eventType %s already registered to service %s", eventType, existing)
}
return nil
}
outboxRouteRegistry.eventToService[eventType] = serviceName
return nil
}
// ResolveEventService 查询某个事件类型的归属服务。
//
// 返回值说明:
// 1. serviceName 为登记结果;
// 2. ok=false 表示当前路由表里还没有这个事件类型的归属信息。
func ResolveEventService(eventType string) (serviceName string, ok bool) {
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return "", false
}
outboxRouteRegistry.RLock()
defer outboxRouteRegistry.RUnlock()
serviceName, ok = outboxRouteRegistry.eventToService[eventType]
return serviceName, ok
}
// ResolveServiceRoute 查询某个服务的物理 outbox 配置。
//
// 返回值说明:
// 1. route 始终返回一个可执行的目录结果,未显式注册时回退默认目录;
// 2. ok=true 表示命中显式注册目录ok=false 表示走默认目录;
// 3. 这样既能支持显式配置覆盖,也能让基础设施在启动初期就有稳定默认值。
func ResolveServiceRoute(serviceName string) (route ServiceRoute, ok bool) {
serviceName = normalizeServiceName(serviceName)
if serviceName == "" {
return DefaultServiceRoute(""), false
}
outboxRouteRegistry.RLock()
route, ok = outboxRouteRegistry.serviceRoutes[serviceName]
outboxRouteRegistry.RUnlock()
if ok {
return normalizeServiceRoute(route), true
}
if route, ok = configuredServiceRoute(serviceName); ok {
return route, true
}
return DefaultServiceRoute(serviceName), false
}
// ResolveEventRoute 先按事件查服务,再按服务查物理目录。
//
// 返回值说明:
// 1. route 包含事件所在服务的 table/topic/group
// 2. ok=true 只表示“事件 -> 服务归属”已登记;
// 3. 服务目录若未显式注册,会自动回退到默认目录。
func ResolveEventRoute(eventType string) (route ServiceRoute, ok bool) {
serviceName, ok := ResolveEventService(eventType)
if !ok {
return ServiceRoute{}, false
}
route, _ = ResolveServiceRoute(serviceName)
return route, true
}
func configuredServiceRoute(serviceName string) (ServiceRoute, bool) {
cfg, ok := ResolveServiceConfig(serviceName)
if !ok {
return ServiceRoute{}, false
}
return normalizeServiceRoute(ServiceRoute{
ServiceName: cfg.Name,
TableName: cfg.TableName,
Topic: cfg.Topic,
GroupID: cfg.GroupID,
}), true
}

View File

@@ -0,0 +1,168 @@
package outbox
import (
"fmt"
"sort"
"strings"
"sync"
"github.com/spf13/viper"
)
const (
ServiceAgent = "agent"
ServiceTask = "task"
ServiceMemory = "memory"
ServiceActiveScheduler = "active-scheduler"
ServiceNotification = "notification"
)
// ServiceConfig 描述一个服务级 outbox 的固定归属。
//
// 职责边界:
// 1. 只描述“事件属于哪个服务、写哪张表、发哪个 topic、用哪个 group”。
// 2. 不承载具体业务 handler也不承载 Kafka 消息体格式。
// 3. 服务级写入、扫描和消费都应从这里读取同一份映射,避免配置漂移。
type ServiceConfig struct {
Name string
Topic string
GroupID string
TableName string
}
var serviceCatalogCache = struct {
sync.RWMutex
loaded bool
entries map[string]ServiceConfig
}{
entries: make(map[string]ServiceConfig),
}
// LoadServiceConfigs 读取服务级 outbox 目录。
//
// 说明:
// 1. 先给出默认终态映射,再允许通过配置中心覆盖 topic/groupID/table
// 2. 该目录只负责服务级 outbox 基础设施,不混入业务逻辑;
// 3. 若某个服务配置缺失,直接使用默认值,避免启动期因为非关键配置崩掉。
func LoadServiceConfigs() map[string]ServiceConfig {
serviceCatalogCache.Lock()
defer serviceCatalogCache.Unlock()
if serviceCatalogCache.loaded {
return cloneServiceConfigs(serviceCatalogCache.entries)
}
entries := map[string]ServiceConfig{
ServiceAgent: {
Name: ServiceAgent,
Topic: "smartflow.agent.outbox",
GroupID: "smartflow-agent-outbox-consumer",
TableName: "agent_outbox_messages",
},
ServiceTask: {
Name: ServiceTask,
Topic: "smartflow.task.outbox",
GroupID: "smartflow-task-outbox-consumer",
TableName: "task_outbox_messages",
},
ServiceMemory: {
Name: ServiceMemory,
Topic: "smartflow.memory.outbox",
GroupID: "smartflow-memory-outbox-consumer",
TableName: "memory_outbox_messages",
},
ServiceActiveScheduler: {
Name: ServiceActiveScheduler,
Topic: "smartflow.active-scheduler.outbox",
GroupID: "smartflow-active-scheduler-outbox-consumer",
TableName: "active_scheduler_outbox_messages",
},
ServiceNotification: {
Name: ServiceNotification,
Topic: "smartflow.notification.outbox",
GroupID: "smartflow-notification-outbox-consumer",
TableName: "notification_outbox_messages",
},
}
for name, entry := range entries {
entries[name] = overrideServiceConfig(entry)
}
serviceCatalogCache.entries = entries
serviceCatalogCache.loaded = true
return cloneServiceConfigs(entries)
}
// ResolveServiceConfig 查询某个服务的 outbox 目录。
func ResolveServiceConfig(serviceName string) (ServiceConfig, bool) {
serviceName = strings.TrimSpace(serviceName)
if serviceName == "" {
return ServiceConfig{}, false
}
entries := LoadServiceConfigs()
cfg, ok := entries[serviceName]
return cfg, ok
}
// ResolveEventServiceConfig 先解析事件归属服务,再返回该服务的 outbox 目录。
func ResolveEventServiceConfig(eventType string) (ServiceConfig, bool) {
serviceName, ok := ResolveEventService(eventType)
if !ok {
return ServiceConfig{}, false
}
return ResolveServiceConfig(serviceName)
}
// ServiceTables 返回当前目录中的所有 outbox 表名。
func ServiceTables() []string {
entries := LoadServiceConfigs()
tables := make([]string, 0, len(entries))
for _, entry := range entries {
tables = append(tables, entry.TableName)
}
sort.Strings(tables)
return tables
}
// ServiceNames 返回当前目录中的所有服务名。
func ServiceNames() []string {
entries := LoadServiceConfigs()
names := make([]string, 0, len(entries))
for name := range entries {
names = append(names, name)
}
sort.Strings(names)
return names
}
func overrideServiceConfig(entry ServiceConfig) ServiceConfig {
upperName := strings.TrimSpace(entry.Name)
if upperName == "" {
return entry
}
topicKey := fmt.Sprintf("outbox.services.%s.topic", upperName)
groupKey := fmt.Sprintf("outbox.services.%s.groupID", upperName)
tableKey := fmt.Sprintf("outbox.services.%s.table", upperName)
if topic := strings.TrimSpace(viper.GetString(topicKey)); topic != "" {
entry.Topic = topic
}
if groupID := strings.TrimSpace(viper.GetString(groupKey)); groupID != "" {
entry.GroupID = groupID
}
if tableName := strings.TrimSpace(viper.GetString(tableKey)); tableName != "" {
entry.TableName = tableName
}
return entry
}
func cloneServiceConfigs(entries map[string]ServiceConfig) map[string]ServiceConfig {
cloned := make(map[string]ServiceConfig, len(entries))
for name, entry := range entries {
cloned[name] = entry
}
return cloned
}

View File

@@ -0,0 +1,145 @@
package outbox
import (
"strings"
)
const (
ServiceNameAgent = "agent"
ServiceNameTask = "task"
ServiceNameMemory = "memory"
ServiceNameActiveScheduler = "active-scheduler"
ServiceNameNotification = "notification"
)
// ServiceRoute 描述一个 outbox 服务的终态路由信息。
//
// 职责边界:
// 1. 只承载服务级 outbox 的 table/topic/group 目录信息;
// 2. 不承载 handler、事务或 Kafka 连接对象;
// 3. 允许上层按事件类型先查服务,再由服务查到自己的物理资源。
type ServiceRoute struct {
ServiceName string
TableName string
Topic string
GroupID string
}
var builtinServiceRoutes = map[string]ServiceRoute{
ServiceNameAgent: {
ServiceName: ServiceNameAgent,
TableName: "agent_outbox_messages",
Topic: "smartflow.agent.outbox",
GroupID: "smartflow-agent-outbox-consumer",
},
ServiceNameTask: {
ServiceName: ServiceNameTask,
TableName: "task_outbox_messages",
Topic: "smartflow.task.outbox",
GroupID: "smartflow-task-outbox-consumer",
},
ServiceNameMemory: {
ServiceName: ServiceNameMemory,
TableName: "memory_outbox_messages",
Topic: "smartflow.memory.outbox",
GroupID: "smartflow-memory-outbox-consumer",
},
ServiceNameActiveScheduler: {
ServiceName: ServiceNameActiveScheduler,
TableName: "active_scheduler_outbox_messages",
Topic: "smartflow.active-scheduler.outbox",
GroupID: "smartflow-active-scheduler-outbox-consumer",
},
ServiceNameNotification: {
ServiceName: ServiceNameNotification,
TableName: "notification_outbox_messages",
Topic: "smartflow.notification.outbox",
GroupID: "smartflow-notification-outbox-consumer",
},
}
// DefaultServiceRoutes 返回当前已知服务的默认路由清单。
//
// 说明:
// 1. 这里是“目录初始值”,用于自动建表和首次注册时兜底;
// 2. 运行时若显式注册了服务路由,会以显式注册结果为准;
// 3. 返回值是拷贝,调用方可安全遍历,不会污染全局目录。
func DefaultServiceRoutes() []ServiceRoute {
return []ServiceRoute{
builtinServiceRoutes[ServiceNameAgent],
builtinServiceRoutes[ServiceNameTask],
builtinServiceRoutes[ServiceNameMemory],
builtinServiceRoutes[ServiceNameActiveScheduler],
builtinServiceRoutes[ServiceNameNotification],
}
}
// DefaultServiceRoute 根据服务名生成终态路由。
//
// 规则:
// 1. 已知服务直接返回约定映射;
// 2. 未知服务按命名约定生成 table/topic/group避免继续落回共享 topic
// 3. 空服务名回退到 agent 兼容路径,保住历史单体模式。
func DefaultServiceRoute(serviceName string) ServiceRoute {
serviceName = normalizeServiceName(serviceName)
if serviceName == "" {
serviceName = ServiceNameAgent
}
if route, ok := builtinServiceRoutes[serviceName]; ok {
return route
}
tablePrefix := strings.NewReplacer("-", "_").Replace(serviceName)
if tablePrefix == "" {
tablePrefix = ServiceNameAgent
}
return ServiceRoute{
ServiceName: serviceName,
TableName: tablePrefix + "_outbox_messages",
Topic: "smartflow." + serviceName + ".outbox",
GroupID: "smartflow-" + serviceName + "-outbox-consumer",
}
}
func normalizeServiceName(serviceName string) string {
return strings.TrimSpace(serviceName)
}
// normalizeServiceRoute 把空字段补成可执行的默认值。
//
// 说明:
// 1. 只做字符串裁剪和缺省补齐,不做注册副作用;
// 2. 服务名为空时只保留历史兼容路径,不强行把它当成新服务;
// 3. 这一步是 route 目录的最后一道兜底,避免上层拿到半成品路由。
func normalizeServiceRoute(route ServiceRoute) ServiceRoute {
route.ServiceName = normalizeServiceName(route.ServiceName)
route.TableName = strings.TrimSpace(route.TableName)
route.Topic = strings.TrimSpace(route.Topic)
route.GroupID = strings.TrimSpace(route.GroupID)
if route.ServiceName == "" {
if route.TableName == "" {
route.TableName = builtinServiceRoutes[ServiceNameAgent].TableName
}
if route.Topic == "" {
route.Topic = builtinServiceRoutes[ServiceNameAgent].Topic
}
if route.GroupID == "" {
route.GroupID = builtinServiceRoutes[ServiceNameAgent].GroupID
}
return route
}
defaultRoute := DefaultServiceRoute(route.ServiceName)
if route.TableName == "" {
route.TableName = defaultRoute.TableName
}
if route.Topic == "" {
route.Topic = defaultRoute.Topic
}
if route.GroupID == "" {
route.GroupID = defaultRoute.GroupID
}
return route
}

View File

@@ -0,0 +1,64 @@
package ratelimit
import (
"context"
"time"
"github.com/go-redis/redis/v8"
)
var tokenBucketScript = redis.NewScript(`-- KEYS[1]: 限流标识 (如 rate_limit:user_123)
-- ARGV[1]: 令牌桶最大容量 (Capacity)
-- ARGV[2]: 令牌填充速率 (Tokens per second)
-- ARGV[3]: 当前时间戳 (Current Unix timestamp in seconds)
-- ARGV[4]: 请求需要的令牌数 (通常为 1)
local bucket_info = redis.call("HMGET", KEYS[1], "last_tokens", "last_refreshed")
local last_tokens = tonumber(bucket_info[1])
local last_refreshed = tonumber(bucket_info[2])
local capacity = tonumber(ARGV[1])
local rate = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local requested = tonumber(ARGV[4])
-- 如果是首次访问,初始化桶
if last_tokens == nil then
last_tokens = capacity
last_refreshed = now
end
-- 💡 核心逻辑:计算这段时间新产生的令牌
local delta = math.max(0, now - last_refreshed)
local new_tokens = math.min(capacity, last_tokens + (delta * rate))
local allowed = false
if new_tokens >= requested then
new_tokens = new_tokens - requested
allowed = true
end
-- 更新 Redis 状态
redis.call("HMSET", KEYS[1], "last_tokens", new_tokens, "last_refreshed", now)
-- 设置过期时间(比如 1 小时没人访问就删掉,省内存)
redis.call("EXPIRE", KEYS[1], 3600)
return allowed and 1 or 0`)
type RateLimiter struct {
client *redis.Client
}
func NewRateLimiter(client *redis.Client) *RateLimiter {
return &RateLimiter{client: client}
}
func (r *RateLimiter) Allow(ctx context.Context, key string, capacity, rate int) (bool, error) {
// 传参Key, 容量, 速率, 当前时间, 请求数
res, err := tokenBucketScript.Run(ctx, r.client, []string{key},
capacity, rate, time.Now().Unix(), 1).Int()
if err != nil {
return false, err
}
return res == 1, nil
}

View File

@@ -0,0 +1,26 @@
package redisinfra
import (
"context"
"github.com/go-redis/redis/v8"
"github.com/spf13/viper"
)
// OpenRedisFromConfig 只按统一配置创建 Redis client 并做连通性校验。
//
// 职责边界:
// 1. 只负责初始化通用 Redis 连接,不承载任何业务 key 语义。
// 2. 只做 Ping 校验,失败时返回 error由调用方决定是否 fail fast。
// 3. 不创建、不预热、不清理任何缓存或分布式锁数据。
func OpenRedisFromConfig() (*redis.Client, error) {
client := redis.NewClient(&redis.Options{
Addr: viper.GetString("redis.host") + ":" + viper.GetString("redis.port"),
Password: viper.GetString("redis.password"),
DB: 0,
})
if _, err := client.Ping(context.Background()).Result(); err != nil {
return nil, err
}
return client, nil
}