Files
smartmate/backend/shared/infra/outbox/repository.go
Losita 3b6fca44a6 Version: 0.9.77.dev.260505
后端:
1.阶段 6 CP4/CP5 目录收口与共享边界纯化
- 将 backend 根目录收口为 services、client、gateway、cmd、shared 五个一级目录
- 收拢 bootstrap、inits、infra/kafka、infra/outbox、conv、respond、pkg、middleware,移除根目录旧实现与空目录
- 将 utils 下沉到 services/userauth/internal/auth,将 logic 下沉到 services/schedule/core/planning
- 将迁移期 runtime 桥接实现统一收拢到 services/runtime/{conv,dao,eventsvc,model},删除 shared/legacy 与未再被 import 的旧 service 实现
- 将 gateway/shared/respond 收口为 HTTP/Gin 错误写回适配,shared/respond 仅保留共享错误语义与状态映射
- 将 HTTP IdempotencyMiddleware 与 RateLimitMiddleware 收口到 gateway/middleware
- 将 GormCachePlugin 下沉到 shared/infra/gormcache,将共享 RateLimiter 下沉到 shared/infra/ratelimit,将 agent token budget 下沉到 services/agent/shared
- 删除 InitEino 兼容壳,收缩 cmd/internal/coreinit 仅保留旧组合壳残留域初始化语义
- 更新微服务迁移计划与桌面 checklist,补齐 CP4/CP5 当前切流点、目录终态与验证结果
- 完成 go test ./...、git diff --check 与最终真实 smoke;health、register/login、task/create+get、schedule/today、task-class/list、memory/items、agent chat/meta/timeline/context-stats 全部 200,SSE 合并结果为 CP5_OK 且 [DONE] 只有 1 个
2026-05-05 23:25:07 +08:00

328 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package outbox
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
// Repository 是 outbox 状态仓储。
// 职责边界:
// 1. 只负责 outbox 状态流转与通用事务编排;
// 2. 不负责聊天、任务、通知等具体业务语义;
// 3. 同一仓储实例只面向一个服务级 outbox 目录。
type Repository struct {
db *gorm.DB
route ServiceRoute
}
func NewRepository(db *gorm.DB) *Repository {
return &Repository{db: db}
}
// WithTx 使用外部事务句柄构造同事务仓储实例。
func (d *Repository) WithTx(tx *gorm.DB) *Repository {
if d == nil {
return &Repository{db: tx}
}
return &Repository{db: tx, route: d.route}
}
// WithRoute 使用指定服务路由构造仓储实例。
// 1. 只切换 outbox 物理目录,不改变业务事务语义;
// 2. 适合多个 service engine 共用同一 DB 连接时分别绑定各自 route
// 3. 保留 route 的 table/topic/group避免回落到共享 topic。
func (d *Repository) WithRoute(route ServiceRoute) *Repository {
route = normalizeServiceRoute(route)
if d == nil {
return &Repository{route: route}
}
return &Repository{db: d.db, route: route}
}
// CreateMessage 将事件写入 outbox。
// 1. 只接收 eventType、messageKey、payload 和 maxRetry不再允许业务侧显式传 topic
// 2. table/topic/group 统一由 eventType -> service -> route 解析,保证服务级路由是唯一入口;
// 3. eventType 未注册时直接返回 error避免消息静默落到默认表或默认 topic。
func (d *Repository) CreateMessage(ctx context.Context, eventType string, messageKey string, payload any, maxRetry int) (int64, error) {
if d == nil || d.db == nil {
return 0, errors.New("outbox repository is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return 0, errors.New("eventType is empty")
}
messageKey = strings.TrimSpace(messageKey)
if maxRetry <= 0 {
maxRetry = 20
}
route, err := d.resolvePublishRoute(eventType)
if err != nil {
return 0, err
}
raw, err := json.Marshal(payload)
if err != nil {
return 0, err
}
now := time.Now()
msg := model.AgentOutboxMessage{
EventType: eventType,
ServiceName: route.ServiceName,
Topic: route.Topic,
MessageKey: messageKey,
Payload: string(raw),
Status: model.OutboxStatusPending,
RetryCount: 0,
MaxRetry: maxRetry,
NextRetryAt: &now,
}
if err = d.db.WithContext(ctx).Table(route.TableName).Create(&msg).Error; err != nil {
return 0, err
}
return msg.ID, nil
}
// GetByID 从当前仓储绑定的 outbox 表读取指定消息。
func (d *Repository) GetByID(ctx context.Context, id int64) (*model.AgentOutboxMessage, error) {
var msg model.AgentOutboxMessage
if err := d.scopedDB(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
return nil, err
}
return &msg, nil
}
// ListDueMessages 拉取到期可投递消息。
// 1. serviceName 为空时保持当前仓储目录内的扫描语义;
// 2. serviceName 非空时只扫描对应服务的消息;
// 3. 这样既能支持单服务 relay也能支持后续多服务 relay。
func (d *Repository) ListDueMessages(ctx context.Context, serviceName string, limit int) ([]model.AgentOutboxMessage, error) {
if limit <= 0 {
limit = 100
}
now := time.Now()
var messages []model.AgentOutboxMessage
query := d.scopedDB(ctx).
Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.OutboxStatusPending, now).
Order("next_retry_at ASC, id ASC").
Limit(limit)
serviceName = strings.TrimSpace(serviceName)
if serviceName != "" {
query = query.Where("service_name = ?", serviceName)
}
if err := query.Find(&messages).Error; err != nil {
return nil, err
}
return messages, nil
}
// MarkPublished 标记消息已经成功投递到 Kafka。
func (d *Repository) MarkPublished(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusPublished,
"published_at": &now,
"last_error": nil,
"next_retry_at": nil,
}
result := d.scopedDB(ctx).
Model(&model.AgentOutboxMessage{}).
Where("id = ? AND status NOT IN (?, ?)", id, model.OutboxStatusConsumed, model.OutboxStatusDead).
Updates(updates)
return result.Error
}
// MarkConsumed 标记消息已经在处理侧成功完成。
func (d *Repository) MarkConsumed(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
// MarkDead 将消息标记为死信。
func (d *Repository) MarkDead(ctx context.Context, id int64, reason string) error {
now := time.Now()
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": model.OutboxStatusDead,
"last_error": &lastErr,
"next_retry_at": nil,
"updated_at": now,
}
return d.scopedDB(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
// MarkFailedForRetry 记录一次可重试失败并推进重试窗口。
// 1. 行级锁读取当前消息状态;
// 2. consumed/dead 状态直接短路;
// 3. retry_count + 1并根据最大次数决定继续 pending 还是转 dead
// 4. 写回 last_error 与 next_retry_at交给下一轮扫描继续投递。
func (d *Repository) MarkFailedForRetry(ctx context.Context, id int64, reason string) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var msg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
if err != nil {
return err
}
if msg.Status == model.OutboxStatusConsumed || msg.Status == model.OutboxStatusDead {
return nil
}
nextRetryCount := msg.RetryCount + 1
now := time.Now()
status := model.OutboxStatusPending
var nextRetryAt *time.Time
if nextRetryCount >= msg.MaxRetry {
status = model.OutboxStatusDead
nextRetryAt = nil
} else {
t := now.Add(calcRetryBackoff(nextRetryCount))
nextRetryAt = &t
}
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": status,
"retry_count": nextRetryCount,
"last_error": &lastErr,
"next_retry_at": nextRetryAt,
"updated_at": now,
}
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
})
}
// ConsumeAndMarkConsumed 是通用“消费成功事务入口”。
// 1. 在事务内锁定 outbox 记录;
// 2. consumed/dead 状态直接返回;
// 3. 执行业务回调 fn(tx),让业务落库和 outbox 状态共享同一事务;
// 4. 业务成功后统一标记 consumed。
func (d *Repository) ConsumeAndMarkConsumed(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
if fn != nil {
if err = fn(tx); err != nil {
return err
}
}
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return tx.Table(d.tableName()).Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
})
}
// ConsumeInTx 执行 outbox 业务事务,但不负责标记 consumed。
// 1. 先锁定当前 outbox 记录,避免并发消费者同时处理同一条消息;
// 2. 只要业务函数返回错误,就保持消息为 pending交给上层 retry
// 3. 业务成功后再由上层单独标记 consumed这样可以把远端 RPC 移到事务外。
func (d *Repository) ConsumeInTx(ctx context.Context, outboxID int64, fn func(tx *gorm.DB) error) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Table(d.tableName()).Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
if fn != nil {
if err = fn(tx); err != nil {
return err
}
}
return nil
})
}
func (d *Repository) scopedDB(ctx context.Context) *gorm.DB {
return d.db.WithContext(ctx).Table(d.tableName())
}
func (d *Repository) tableName() string {
if d == nil {
return DefaultServiceRoute(ServiceNameAgent).TableName
}
route := normalizeServiceRoute(d.route)
if route.TableName != "" {
return route.TableName
}
return DefaultServiceRoute(ServiceNameAgent).TableName
}
func (d *Repository) resolvePublishRoute(eventType string) (ServiceRoute, error) {
if d == nil {
return ServiceRoute{}, errors.New("outbox repository is nil")
}
eventType = strings.TrimSpace(eventType)
if eventType == "" {
return ServiceRoute{}, errors.New("eventType is empty")
}
route, ok := ResolveEventRoute(eventType)
if !ok {
return ServiceRoute{}, fmt.Errorf("outbox route not registered: eventType=%s", eventType)
}
if d.route.ServiceName != "" && route.ServiceName != d.route.ServiceName {
return ServiceRoute{}, fmt.Errorf("eventType %s belongs to service %s, current repo service %s", eventType, route.ServiceName, d.route.ServiceName)
}
return normalizeServiceRoute(route), nil
}
func calcRetryBackoff(retryCount int) time.Duration {
if retryCount <= 0 {
return time.Second
}
if retryCount > 10 {
retryCount = 10
}
return time.Duration(retryCount*retryCount) * time.Second
}
func truncateError(reason string) string {
if len(reason) <= 2000 {
return reason
}
return reason[:2000]
}