Version: 0.4.8.dev.260308

feat: 🏗️ 实现 Agent 消息可靠异步持久化(Outbox + Kafka)

* 新增 Outbox 数据模型与消息载荷定义,位于 `backend/model/outbox.go`
* 新增 Outbox DAO,支持创建、扫描、发布标记、失败重试与消费落库事务,位于 `backend/dao/outbox.go`
* 新增 Kafka 基础封装,包含配置、生产者、消费者与消息包装,位于 `backend/kafka` 文件夹

  * `config.go`:Kafka 配置文件
  * `producer.go`:Kafka 生产者
  * `consumer.go`:Kafka 消费者
  * `envelope.go`:消息封装处理
* 新增异步管道服务,处理扫描投递与消费落库,位于 `backend/service/agent_async_pipeline.go`
* 接入 Agent 聊天链路的可靠持久化,替换原有 goroutine 直接写库逻辑,位于 `backend/service/agent.go`
* 启动流程接入管道初始化与启动,位于 `backend/cmd/start.go`
* 增加 Kafka 配置项,更新 `backend/config.yaml` 与 `backend/config.example.yaml`
* 引入 Kafka 依赖:`github.com/segmentio/kafka-go`(见 `backend/go.mod`, `backend/go.sum`)

fix: 🐛 修复首启偶发 user 消息重复落库问题

* 解决因 Outbox 状态并发回写竞态,导致 `consumed` 被晚到的 `published` 覆盖的问题
* 在 `MarkPublished` 中增加条件,避免覆盖已标记为 `consumed` 或 `dead` 的消息,修复位置:`backend/dao/outbox.go`

perf:  更新 Docker Compose 配置与 Kafka 相关服务

* 更新 `docker-compose.yml` 文件,新增 Kafka 配置与服务

fix: 🧹 优化缓存删除逻辑

* 在 `cache deleter` 中忽略了 `model.AgentOutboxMessage`、`model.ChatHistory` 与 `model.AgentChat` 这三个结构体
* 防止这些结构体对应的表单删除缓存时,导致控制台消息爆炸
This commit is contained in:
LoveLosita
2026-03-08 12:53:54 +08:00
parent 4906f814fd
commit 1ed558b488
17 changed files with 800 additions and 66 deletions

View File

@@ -9,6 +9,7 @@ import (
"github.com/LoveLosita/smartflow/backend/conv"
"github.com/LoveLosita/smartflow/backend/dao"
"github.com/LoveLosita/smartflow/backend/inits"
"github.com/LoveLosita/smartflow/backend/model"
"github.com/LoveLosita/smartflow/backend/pkg"
"github.com/cloudwego/eino-ext/components/model/ark"
"github.com/cloudwego/eino/schema"
@@ -16,16 +17,18 @@ import (
)
type AgentService struct {
AIHub *inits.AIHub
repo *dao.AgentDAO
agentCache *dao.AgentCache
AIHub *inits.AIHub
repo *dao.AgentDAO
agentCache *dao.AgentCache
asyncPipeline *AgentAsyncPipeline
}
func NewAgentService(aiHub *inits.AIHub, repo *dao.AgentDAO, agentRedis *dao.AgentCache) *AgentService {
func NewAgentService(aiHub *inits.AIHub, repo *dao.AgentDAO, agentRedis *dao.AgentCache, asyncPipeline *AgentAsyncPipeline) *AgentService {
return &AgentService{
AIHub: aiHub,
repo: repo,
agentCache: agentRedis,
AIHub: aiHub,
repo: repo,
agentCache: agentRedis,
asyncPipeline: asyncPipeline,
}
}
@@ -38,19 +41,34 @@ func normalizeConversationID(chatID string) string {
}
func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
model := strings.TrimSpace(requestModel)
if strings.EqualFold(model, "strategist") {
modelName := strings.TrimSpace(requestModel)
if strings.EqualFold(modelName, "strategist") {
return s.AIHub.Strategist, "strategist"
}
return s.AIHub.Worker, "worker"
}
func (s *AgentService) saveChatHistoryReliable(ctx context.Context, payload model.ChatHistoryPersistPayload) error {
if s.asyncPipeline == nil {
return s.repo.SaveChatHistory(ctx, payload.UserID, payload.ConversationID, payload.Role, payload.Message)
}
return s.asyncPipeline.EnqueueChatHistoryPersist(ctx, payload)
}
func pushErrNonBlocking(errChan chan error, err error) {
select {
case errChan <- err:
default:
log.Printf("error channel is full, drop error: %v", err)
}
}
func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThinking bool, modelName string, userID int, chatID string) (<-chan string, <-chan error) {
// 1) 准备输出通道
outChan := make(chan string, 5)
errChan := make(chan error, 1)
// 2) 规范会话并选择模型
// 2) 规范会话并选择模型
chatID = normalizeConversationID(chatID)
selectedModel, resolvedModelName := s.pickChatModel(modelName)
@@ -63,9 +81,9 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
return outChan, errChan
}
if !result {
innerResult, err := s.repo.IfChatExists(ctx, userID, chatID)
if err != nil {
errChan <- err
innerResult, ifErr := s.repo.IfChatExists(ctx, userID, chatID)
if ifErr != nil {
errChan <- ifErr
close(outChan)
close(errChan)
return outChan, errChan
@@ -95,9 +113,9 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
cacheMiss := false
if chatHistory == nil {
cacheMiss = true
histories, err := s.repo.GetUserChatHistories(ctx, userID, pkg.HistoryFetchLimitByModel(resolvedModelName), chatID)
if err != nil {
errChan <- err
histories, hisErr := s.repo.GetUserChatHistories(ctx, userID, pkg.HistoryFetchLimitByModel(resolvedModelName), chatID)
if hisErr != nil {
errChan <- hisErr
close(outChan)
close(errChan)
return outChan, errChan
@@ -112,10 +130,10 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
// 6) 根据最新裁剪结果动态调整 Redis 会话窗口
targetWindow := pkg.CalcSessionWindowSize(len(chatHistory))
if err := s.agentCache.SetSessionWindowSize(ctx, chatID, targetWindow); err != nil {
if err = s.agentCache.SetSessionWindowSize(ctx, chatID, targetWindow); err != nil {
log.Printf("failed to set history window for %s: %v", chatID, err)
}
if err := s.agentCache.EnforceHistoryWindow(ctx, chatID); err != nil {
if err = s.agentCache.EnforceHistoryWindow(ctx, chatID); err != nil {
log.Printf("failed to enforce history window for %s: %v", chatID, err)
}
@@ -126,7 +144,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
// 缓存未命中时,把“裁剪后的历史”回填进缓存
if cacheMiss {
if err := s.agentCache.BackfillHistory(ctx, chatID, chatHistory); err != nil {
if err = s.agentCache.BackfillHistory(ctx, chatID, chatHistory); err != nil {
errChan <- err
close(outChan)
close(errChan)
@@ -134,37 +152,44 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
}
}
// 7) 异步落用户消息(先写缓存再写库)
go func() {
bg := context.Background()
_ = s.agentCache.PushMessage(bg, chatID, &schema.Message{
Role: schema.User,
Content: userMessage,
})
_ = s.repo.SaveChatHistory(bg, userID, chatID, "user", userMessage)
}()
// 7) 先同步写 Redis再把持久化请求交给 outbox + Kafka
if err = s.agentCache.PushMessage(ctx, chatID, &schema.Message{Role: schema.User, Content: userMessage}); err != nil {
log.Printf("failed to push user message into redis history: %v", err)
}
if err = s.saveChatHistoryReliable(ctx, model.ChatHistoryPersistPayload{
UserID: userID,
ConversationID: chatID,
Role: "user",
Message: userMessage,
}); err != nil {
errChan <- err
close(outChan)
close(errChan)
return outChan, errChan
}
// 8) 启动流式聊天
go func() {
defer close(outChan)
fullText, err := agent.StreamChat(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan)
if err != nil {
errChan <- err
fullText, streamErr := agent.StreamChat(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan)
if streamErr != nil {
pushErrNonBlocking(errChan, streamErr)
return
}
// 9) 异步落助手回复
go func() {
bg := context.Background()
_ = s.agentCache.PushMessage(bg, chatID, &schema.Message{
Role: schema.Assistant,
Content: fullText,
})
if saveErr := s.repo.SaveChatHistory(bg, userID, chatID, "assistant", fullText); saveErr != nil {
log.Printf("failed to save chat history to database: %v", saveErr)
}
}()
// 9) 回答完成后,同步写 Redis并把数据库落库交给 outbox + Kafka
if cacheErr := s.agentCache.PushMessage(context.Background(), chatID, &schema.Message{Role: schema.Assistant, Content: fullText}); cacheErr != nil {
log.Printf("failed to push assistant message into redis history: %v", cacheErr)
}
if saveErr := s.saveChatHistoryReliable(context.Background(), model.ChatHistoryPersistPayload{
UserID: userID,
ConversationID: chatID,
Role: "assistant",
Message: fullText,
}); saveErr != nil {
pushErrNonBlocking(errChan, saveErr)
}
}()
return outChan, errChan

View File

@@ -0,0 +1,214 @@
package service
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"time"
"github.com/LoveLosita/smartflow/backend/dao"
kafkabus "github.com/LoveLosita/smartflow/backend/kafka"
"github.com/LoveLosita/smartflow/backend/model"
segmentkafka "github.com/segmentio/kafka-go"
"gorm.io/gorm"
)
// AgentAsyncPipeline 负责 outbox 扫描、Kafka 投递与消费落库。
type AgentAsyncPipeline struct {
outboxRepo *dao.OutboxDAO
producer *kafkabus.Producer
consumer *kafkabus.Consumer
topic string
maxRetry int
scanEvery time.Duration
scanBatch int
}
func NewAgentAsyncPipeline(outboxRepo *dao.OutboxDAO, cfg kafkabus.Config) (*AgentAsyncPipeline, error) {
if !cfg.Enabled {
return nil, nil
}
producer, err := kafkabus.NewProducer(cfg)
if err != nil {
return nil, err
}
consumer, err := kafkabus.NewConsumer(cfg)
if err != nil {
_ = producer.Close()
return nil, err
}
return &AgentAsyncPipeline{
outboxRepo: outboxRepo,
producer: producer,
consumer: consumer,
topic: cfg.Topic,
maxRetry: cfg.MaxRetry,
scanEvery: cfg.RetryScanInterval,
scanBatch: cfg.RetryBatchSize,
}, nil
}
func (p *AgentAsyncPipeline) Start(ctx context.Context) {
if p == nil {
return
}
go p.startDispatchLoop(ctx)
go p.startConsumeLoop(ctx)
}
func (p *AgentAsyncPipeline) Close() {
if p == nil {
return
}
if err := p.producer.Close(); err != nil {
log.Printf("关闭 Kafka producer 失败: %v", err)
}
if err := p.consumer.Close(); err != nil {
log.Printf("关闭 Kafka consumer 失败: %v", err)
}
}
func (p *AgentAsyncPipeline) EnqueueChatHistoryPersist(ctx context.Context, payload model.ChatHistoryPersistPayload) error {
if p == nil {
return errors.New("Kafka 异步链路未初始化")
}
outboxID, err := p.outboxRepo.CreateChatHistoryMessage(ctx, p.topic, payload.ConversationID, payload, p.maxRetry)
if err != nil {
return err
}
if err = p.dispatchOne(context.Background(), outboxID); err != nil {
log.Printf("outbox 消息 %d 首次投递失败,等待扫描重试: %v", outboxID, err)
}
return nil
}
func (p *AgentAsyncPipeline) startDispatchLoop(ctx context.Context) {
ticker := time.NewTicker(p.scanEvery)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
pendingMessages, err := p.outboxRepo.ListDueMessages(ctx, p.scanBatch)
if err != nil {
log.Printf("扫描 outbox 失败: %v", err)
continue
}
for _, msg := range pendingMessages {
if err = p.dispatchOne(ctx, msg.ID); err != nil {
log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err)
}
}
}
}
}
func (p *AgentAsyncPipeline) dispatchOne(ctx context.Context, outboxID int64) error {
outboxMsg, err := p.outboxRepo.GetByID(ctx, outboxID)
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil
}
return err
}
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
return nil
}
envelope := kafkabus.Envelope{
OutboxID: outboxMsg.ID,
BizType: outboxMsg.BizType,
Payload: json.RawMessage(outboxMsg.Payload),
}
raw, err := json.Marshal(envelope)
if err != nil {
markErr := p.outboxRepo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 包裹失败: "+err.Error())
if markErr != nil {
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
}
return err
}
if err = p.producer.Enqueue(ctx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil {
_ = p.outboxRepo.MarkFailedForRetry(ctx, outboxMsg.ID, "投递 Kafka 失败: "+err.Error())
return err
}
if err = p.outboxRepo.MarkPublished(ctx, outboxMsg.ID); err != nil {
_ = p.outboxRepo.MarkFailedForRetry(ctx, outboxMsg.ID, "更新已投递状态失败: "+err.Error())
return err
}
return nil
}
func (p *AgentAsyncPipeline) startConsumeLoop(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
default:
}
msg, err := p.consumer.Dequeue(ctx)
if err != nil {
if errors.Is(err, context.Canceled) {
return
}
log.Printf("Kafka 消费拉取失败: %v", err)
time.Sleep(300 * time.Millisecond)
continue
}
if err = p.handleMessage(ctx, msg); err != nil {
log.Printf("处理 Kafka 消息失败(topic=%s, partition=%d, offset=%d): %v", msg.Topic, msg.Partition, msg.Offset, err)
}
}
}
func (p *AgentAsyncPipeline) handleMessage(ctx context.Context, msg segmentkafka.Message) error {
var envelope kafkabus.Envelope
if err := json.Unmarshal(msg.Value, &envelope); err != nil {
_ = p.consumer.Commit(ctx, msg)
return fmt.Errorf("解析 Kafka 包裹失败: %w", err)
}
if envelope.OutboxID <= 0 {
_ = p.consumer.Commit(ctx, msg)
return errors.New("Kafka 包裹缺少 outbox_id")
}
switch envelope.BizType {
case model.OutboxBizTypeChatHistoryPersist:
return p.consumeChatHistory(ctx, msg, envelope)
default:
_ = p.outboxRepo.MarkDead(ctx, envelope.OutboxID, "未知业务类型: "+envelope.BizType)
if err := p.consumer.Commit(ctx, msg); err != nil {
return err
}
return nil
}
}
func (p *AgentAsyncPipeline) consumeChatHistory(ctx context.Context, msg segmentkafka.Message, envelope kafkabus.Envelope) error {
var payload model.ChatHistoryPersistPayload
if err := json.Unmarshal(envelope.Payload, &payload); err != nil {
_ = p.outboxRepo.MarkDead(ctx, envelope.OutboxID, "解析聊天持久化载荷失败: "+err.Error())
if commitErr := p.consumer.Commit(ctx, msg); commitErr != nil {
return commitErr
}
return nil
}
if err := p.outboxRepo.PersistChatHistoryAndMarkConsumed(ctx, envelope.OutboxID, payload); err != nil {
if markErr := p.outboxRepo.MarkFailedForRetry(ctx, envelope.OutboxID, "消费并落库失败: "+err.Error()); markErr != nil {
return markErr
}
if commitErr := p.consumer.Commit(ctx, msg); commitErr != nil {
return commitErr
}
return err
}
return p.consumer.Commit(ctx, msg)
}