Files
smartmate/backend/dao/outbox.go
LoveLosita 1ed558b488 Version: 0.4.8.dev.260308
feat: 🏗️ 实现 Agent 消息可靠异步持久化(Outbox + Kafka)

* 新增 Outbox 数据模型与消息载荷定义,位于 `backend/model/outbox.go`
* 新增 Outbox DAO,支持创建、扫描、发布标记、失败重试与消费落库事务,位于 `backend/dao/outbox.go`
* 新增 Kafka 基础封装,包含配置、生产者、消费者与消息包装,位于 `backend/kafka` 文件夹

  * `config.go`:Kafka 配置文件
  * `producer.go`:Kafka 生产者
  * `consumer.go`:Kafka 消费者
  * `envelope.go`:消息封装处理
* 新增异步管道服务,处理扫描投递与消费落库,位于 `backend/service/agent_async_pipeline.go`
* 接入 Agent 聊天链路的可靠持久化,替换原有 goroutine 直接写库逻辑,位于 `backend/service/agent.go`
* 启动流程接入管道初始化与启动,位于 `backend/cmd/start.go`
* 增加 Kafka 配置项,更新 `backend/config.yaml` 与 `backend/config.example.yaml`
* 引入 Kafka 依赖:`github.com/segmentio/kafka-go`(见 `backend/go.mod`, `backend/go.sum`)

fix: 🐛 修复首启偶发 user 消息重复落库问题

* 解决因 Outbox 状态并发回写竞态,导致 `consumed` 被晚到的 `published` 覆盖的问题
* 在 `MarkPublished` 中增加条件,避免覆盖已标记为 `consumed` 或 `dead` 的消息,修复位置:`backend/dao/outbox.go`

perf:  更新 Docker Compose 配置与 Kafka 相关服务

* 更新 `docker-compose.yml` 文件,新增 Kafka 配置与服务

fix: 🧹 优化缓存删除逻辑

* 在 `cache deleter` 中忽略了 `model.AgentOutboxMessage`、`model.ChatHistory` 与 `model.AgentChat` 这三个结构体
* 防止这些结构体对应的表单删除缓存时,导致控制台消息爆炸
2026-03-08 12:53:54 +08:00

191 lines
5.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package dao
import (
"context"
"encoding/json"
"errors"
"time"
"github.com/LoveLosita/smartflow/backend/model"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type OutboxDAO struct {
db *gorm.DB
}
func NewOutboxDAO(db *gorm.DB) *OutboxDAO {
return &OutboxDAO{db: db}
}
func (d *OutboxDAO) CreateChatHistoryMessage(ctx context.Context, topic, messageKey string, payload model.ChatHistoryPersistPayload, maxRetry int) (int64, error) {
if maxRetry <= 0 {
maxRetry = 20
}
raw, err := json.Marshal(payload)
if err != nil {
return 0, err
}
now := time.Now()
msg := model.AgentOutboxMessage{
BizType: model.OutboxBizTypeChatHistoryPersist,
Topic: topic,
MessageKey: messageKey,
Payload: string(raw),
Status: model.OutboxStatusPending,
RetryCount: 0,
MaxRetry: maxRetry,
NextRetryAt: &now,
}
if err = d.db.WithContext(ctx).Create(&msg).Error; err != nil {
return 0, err
}
return msg.ID, nil
}
func (d *OutboxDAO) GetByID(ctx context.Context, id int64) (*model.AgentOutboxMessage, error) {
var msg model.AgentOutboxMessage
if err := d.db.WithContext(ctx).Where("id = ?", id).First(&msg).Error; err != nil {
return nil, err
}
return &msg, nil
}
func (d *OutboxDAO) ListDueMessages(ctx context.Context, limit int) ([]model.AgentOutboxMessage, error) {
if limit <= 0 {
limit = 100
}
now := time.Now()
var messages []model.AgentOutboxMessage
err := d.db.WithContext(ctx).
Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.OutboxStatusPending, now).
Order("next_retry_at ASC, id ASC").
Limit(limit).
Find(&messages).Error
if err != nil {
return nil, err
}
return messages, nil
}
// MarkPublished 仅在消息未进入最终态时更新为 published避免覆盖 consumed/dead。
func (d *OutboxDAO) MarkPublished(ctx context.Context, id int64) error {
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusPublished,
"published_at": &now,
"last_error": nil,
"next_retry_at": nil,
}
result := d.db.WithContext(ctx).
Model(&model.AgentOutboxMessage{}).
Where("id = ? AND status NOT IN (?, ?)", id, model.OutboxStatusConsumed, model.OutboxStatusDead).
Updates(updates)
return result.Error
}
func (d *OutboxDAO) MarkDead(ctx context.Context, id int64, reason string) error {
now := time.Now()
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": model.OutboxStatusDead,
"last_error": &lastErr,
"next_retry_at": nil,
"updated_at": now,
}
return d.db.WithContext(ctx).Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
}
func (d *OutboxDAO) MarkFailedForRetry(ctx context.Context, id int64, reason string) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var msg model.AgentOutboxMessage
err := tx.Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", id).First(&msg).Error
if err != nil {
return err
}
if msg.Status == model.OutboxStatusConsumed || msg.Status == model.OutboxStatusDead {
return nil
}
nextRetryCount := msg.RetryCount + 1
now := time.Now()
status := model.OutboxStatusPending
var nextRetryAt *time.Time
if nextRetryCount >= msg.MaxRetry {
status = model.OutboxStatusDead
nextRetryAt = nil
} else {
t := now.Add(calcRetryBackoff(nextRetryCount))
nextRetryAt = &t
}
lastErr := truncateError(reason)
updates := map[string]interface{}{
"status": status,
"retry_count": nextRetryCount,
"last_error": &lastErr,
"next_retry_at": nextRetryAt,
"updated_at": now,
}
return tx.Model(&model.AgentOutboxMessage{}).Where("id = ?", id).Updates(updates).Error
})
}
func (d *OutboxDAO) PersistChatHistoryAndMarkConsumed(ctx context.Context, outboxID int64, payload model.ChatHistoryPersistPayload) error {
return d.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
var outboxMsg model.AgentOutboxMessage
err := tx.Clauses(clause.Locking{Strength: "UPDATE"}).Where("id = ?", outboxID).First(&outboxMsg).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed {
return nil
}
if outboxMsg.Status == model.OutboxStatusDead {
return nil
}
chatMsg := payload.Message
chatRole := payload.Role
history := model.ChatHistory{
UserID: payload.UserID,
ChatID: payload.ConversationID,
MessageContent: &chatMsg,
Role: &chatRole,
}
if err = tx.Create(&history).Error; err != nil {
return err
}
now := time.Now()
updates := map[string]interface{}{
"status": model.OutboxStatusConsumed,
"consumed_at": &now,
"last_error": nil,
"next_retry_at": nil,
"updated_at": now,
}
return tx.Model(&model.AgentOutboxMessage{}).Where("id = ?", outboxID).Updates(updates).Error
})
}
func calcRetryBackoff(retryCount int) time.Duration {
if retryCount <= 0 {
return time.Second
}
if retryCount > 6 {
retryCount = 6
}
return time.Second * time.Duration(1<<(retryCount-1))
}
func truncateError(reason string) string {
if len(reason) <= 2000 {
return reason
}
return reason[:2000]
}