Version: 0.4.8.dev.260308
feat: 🏗️ 实现 Agent 消息可靠异步持久化(Outbox + Kafka) * 新增 Outbox 数据模型与消息载荷定义,位于 `backend/model/outbox.go` * 新增 Outbox DAO,支持创建、扫描、发布标记、失败重试与消费落库事务,位于 `backend/dao/outbox.go` * 新增 Kafka 基础封装,包含配置、生产者、消费者与消息包装,位于 `backend/kafka` 文件夹 * `config.go`:Kafka 配置文件 * `producer.go`:Kafka 生产者 * `consumer.go`:Kafka 消费者 * `envelope.go`:消息封装处理 * 新增异步管道服务,处理扫描投递与消费落库,位于 `backend/service/agent_async_pipeline.go` * 接入 Agent 聊天链路的可靠持久化,替换原有 goroutine 直接写库逻辑,位于 `backend/service/agent.go` * 启动流程接入管道初始化与启动,位于 `backend/cmd/start.go` * 增加 Kafka 配置项,更新 `backend/config.yaml` 与 `backend/config.example.yaml` * 引入 Kafka 依赖:`github.com/segmentio/kafka-go`(见 `backend/go.mod`, `backend/go.sum`) fix: 🐛 修复首启偶发 user 消息重复落库问题 * 解决因 Outbox 状态并发回写竞态,导致 `consumed` 被晚到的 `published` 覆盖的问题 * 在 `MarkPublished` 中增加条件,避免覆盖已标记为 `consumed` 或 `dead` 的消息,修复位置:`backend/dao/outbox.go` perf: ⚡ 更新 Docker Compose 配置与 Kafka 相关服务 * 更新 `docker-compose.yml` 文件,新增 Kafka 配置与服务 fix: 🧹 优化缓存删除逻辑 * 在 `cache deleter` 中忽略了 `model.AgentOutboxMessage`、`model.ChatHistory` 与 `model.AgentChat` 这三个结构体 * 防止这些结构体对应的表单删除缓存时,导致控制台消息爆炸
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/LoveLosita/smartflow/backend/conv"
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
"github.com/LoveLosita/smartflow/backend/inits"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
"github.com/LoveLosita/smartflow/backend/pkg"
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
@@ -16,16 +17,18 @@ import (
|
||||
)
|
||||
|
||||
type AgentService struct {
|
||||
AIHub *inits.AIHub
|
||||
repo *dao.AgentDAO
|
||||
agentCache *dao.AgentCache
|
||||
AIHub *inits.AIHub
|
||||
repo *dao.AgentDAO
|
||||
agentCache *dao.AgentCache
|
||||
asyncPipeline *AgentAsyncPipeline
|
||||
}
|
||||
|
||||
func NewAgentService(aiHub *inits.AIHub, repo *dao.AgentDAO, agentRedis *dao.AgentCache) *AgentService {
|
||||
func NewAgentService(aiHub *inits.AIHub, repo *dao.AgentDAO, agentRedis *dao.AgentCache, asyncPipeline *AgentAsyncPipeline) *AgentService {
|
||||
return &AgentService{
|
||||
AIHub: aiHub,
|
||||
repo: repo,
|
||||
agentCache: agentRedis,
|
||||
AIHub: aiHub,
|
||||
repo: repo,
|
||||
agentCache: agentRedis,
|
||||
asyncPipeline: asyncPipeline,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,19 +41,34 @@ func normalizeConversationID(chatID string) string {
|
||||
}
|
||||
|
||||
func (s *AgentService) pickChatModel(requestModel string) (*ark.ChatModel, string) {
|
||||
model := strings.TrimSpace(requestModel)
|
||||
if strings.EqualFold(model, "strategist") {
|
||||
modelName := strings.TrimSpace(requestModel)
|
||||
if strings.EqualFold(modelName, "strategist") {
|
||||
return s.AIHub.Strategist, "strategist"
|
||||
}
|
||||
return s.AIHub.Worker, "worker"
|
||||
}
|
||||
|
||||
func (s *AgentService) saveChatHistoryReliable(ctx context.Context, payload model.ChatHistoryPersistPayload) error {
|
||||
if s.asyncPipeline == nil {
|
||||
return s.repo.SaveChatHistory(ctx, payload.UserID, payload.ConversationID, payload.Role, payload.Message)
|
||||
}
|
||||
return s.asyncPipeline.EnqueueChatHistoryPersist(ctx, payload)
|
||||
}
|
||||
|
||||
func pushErrNonBlocking(errChan chan error, err error) {
|
||||
select {
|
||||
case errChan <- err:
|
||||
default:
|
||||
log.Printf("error channel is full, drop error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThinking bool, modelName string, userID int, chatID string) (<-chan string, <-chan error) {
|
||||
// 1) 准备输出通道
|
||||
outChan := make(chan string, 5)
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
// 2) 规范化会话并选择模型
|
||||
// 2) 规范会话并选择模型
|
||||
chatID = normalizeConversationID(chatID)
|
||||
selectedModel, resolvedModelName := s.pickChatModel(modelName)
|
||||
|
||||
@@ -63,9 +81,9 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
|
||||
return outChan, errChan
|
||||
}
|
||||
if !result {
|
||||
innerResult, err := s.repo.IfChatExists(ctx, userID, chatID)
|
||||
if err != nil {
|
||||
errChan <- err
|
||||
innerResult, ifErr := s.repo.IfChatExists(ctx, userID, chatID)
|
||||
if ifErr != nil {
|
||||
errChan <- ifErr
|
||||
close(outChan)
|
||||
close(errChan)
|
||||
return outChan, errChan
|
||||
@@ -95,9 +113,9 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
|
||||
cacheMiss := false
|
||||
if chatHistory == nil {
|
||||
cacheMiss = true
|
||||
histories, err := s.repo.GetUserChatHistories(ctx, userID, pkg.HistoryFetchLimitByModel(resolvedModelName), chatID)
|
||||
if err != nil {
|
||||
errChan <- err
|
||||
histories, hisErr := s.repo.GetUserChatHistories(ctx, userID, pkg.HistoryFetchLimitByModel(resolvedModelName), chatID)
|
||||
if hisErr != nil {
|
||||
errChan <- hisErr
|
||||
close(outChan)
|
||||
close(errChan)
|
||||
return outChan, errChan
|
||||
@@ -112,10 +130,10 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
|
||||
|
||||
// 6) 根据最新裁剪结果动态调整 Redis 会话窗口
|
||||
targetWindow := pkg.CalcSessionWindowSize(len(chatHistory))
|
||||
if err := s.agentCache.SetSessionWindowSize(ctx, chatID, targetWindow); err != nil {
|
||||
if err = s.agentCache.SetSessionWindowSize(ctx, chatID, targetWindow); err != nil {
|
||||
log.Printf("failed to set history window for %s: %v", chatID, err)
|
||||
}
|
||||
if err := s.agentCache.EnforceHistoryWindow(ctx, chatID); err != nil {
|
||||
if err = s.agentCache.EnforceHistoryWindow(ctx, chatID); err != nil {
|
||||
log.Printf("failed to enforce history window for %s: %v", chatID, err)
|
||||
}
|
||||
|
||||
@@ -126,7 +144,7 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
|
||||
|
||||
// 缓存未命中时,把“裁剪后的历史”回填进缓存
|
||||
if cacheMiss {
|
||||
if err := s.agentCache.BackfillHistory(ctx, chatID, chatHistory); err != nil {
|
||||
if err = s.agentCache.BackfillHistory(ctx, chatID, chatHistory); err != nil {
|
||||
errChan <- err
|
||||
close(outChan)
|
||||
close(errChan)
|
||||
@@ -134,37 +152,44 @@ func (s *AgentService) AgentChat(ctx context.Context, userMessage string, ifThin
|
||||
}
|
||||
}
|
||||
|
||||
// 7) 异步落用户消息(先写缓存再写库)
|
||||
go func() {
|
||||
bg := context.Background()
|
||||
_ = s.agentCache.PushMessage(bg, chatID, &schema.Message{
|
||||
Role: schema.User,
|
||||
Content: userMessage,
|
||||
})
|
||||
_ = s.repo.SaveChatHistory(bg, userID, chatID, "user", userMessage)
|
||||
}()
|
||||
// 7) 先同步写 Redis,再把持久化请求交给 outbox + Kafka
|
||||
if err = s.agentCache.PushMessage(ctx, chatID, &schema.Message{Role: schema.User, Content: userMessage}); err != nil {
|
||||
log.Printf("failed to push user message into redis history: %v", err)
|
||||
}
|
||||
if err = s.saveChatHistoryReliable(ctx, model.ChatHistoryPersistPayload{
|
||||
UserID: userID,
|
||||
ConversationID: chatID,
|
||||
Role: "user",
|
||||
Message: userMessage,
|
||||
}); err != nil {
|
||||
errChan <- err
|
||||
close(outChan)
|
||||
close(errChan)
|
||||
return outChan, errChan
|
||||
}
|
||||
|
||||
// 8) 启动流式聊天
|
||||
go func() {
|
||||
defer close(outChan)
|
||||
|
||||
fullText, err := agent.StreamChat(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan)
|
||||
if err != nil {
|
||||
errChan <- err
|
||||
fullText, streamErr := agent.StreamChat(ctx, selectedModel, resolvedModelName, userMessage, ifThinking, chatHistory, outChan)
|
||||
if streamErr != nil {
|
||||
pushErrNonBlocking(errChan, streamErr)
|
||||
return
|
||||
}
|
||||
|
||||
// 9) 异步落助手回复
|
||||
go func() {
|
||||
bg := context.Background()
|
||||
_ = s.agentCache.PushMessage(bg, chatID, &schema.Message{
|
||||
Role: schema.Assistant,
|
||||
Content: fullText,
|
||||
})
|
||||
if saveErr := s.repo.SaveChatHistory(bg, userID, chatID, "assistant", fullText); saveErr != nil {
|
||||
log.Printf("failed to save chat history to database: %v", saveErr)
|
||||
}
|
||||
}()
|
||||
// 9) 回答完成后,同步写 Redis,并把数据库落库交给 outbox + Kafka
|
||||
if cacheErr := s.agentCache.PushMessage(context.Background(), chatID, &schema.Message{Role: schema.Assistant, Content: fullText}); cacheErr != nil {
|
||||
log.Printf("failed to push assistant message into redis history: %v", cacheErr)
|
||||
}
|
||||
if saveErr := s.saveChatHistoryReliable(context.Background(), model.ChatHistoryPersistPayload{
|
||||
UserID: userID,
|
||||
ConversationID: chatID,
|
||||
Role: "assistant",
|
||||
Message: fullText,
|
||||
}); saveErr != nil {
|
||||
pushErrNonBlocking(errChan, saveErr)
|
||||
}
|
||||
}()
|
||||
|
||||
return outChan, errChan
|
||||
|
||||
214
backend/service/agent_async_pipeline.go
Normal file
214
backend/service/agent_async_pipeline.go
Normal file
@@ -0,0 +1,214 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/dao"
|
||||
kafkabus "github.com/LoveLosita/smartflow/backend/kafka"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
segmentkafka "github.com/segmentio/kafka-go"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// AgentAsyncPipeline 负责 outbox 扫描、Kafka 投递与消费落库。
|
||||
type AgentAsyncPipeline struct {
|
||||
outboxRepo *dao.OutboxDAO
|
||||
producer *kafkabus.Producer
|
||||
consumer *kafkabus.Consumer
|
||||
topic string
|
||||
maxRetry int
|
||||
scanEvery time.Duration
|
||||
scanBatch int
|
||||
}
|
||||
|
||||
func NewAgentAsyncPipeline(outboxRepo *dao.OutboxDAO, cfg kafkabus.Config) (*AgentAsyncPipeline, error) {
|
||||
if !cfg.Enabled {
|
||||
return nil, nil
|
||||
}
|
||||
producer, err := kafkabus.NewProducer(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
consumer, err := kafkabus.NewConsumer(cfg)
|
||||
if err != nil {
|
||||
_ = producer.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &AgentAsyncPipeline{
|
||||
outboxRepo: outboxRepo,
|
||||
producer: producer,
|
||||
consumer: consumer,
|
||||
topic: cfg.Topic,
|
||||
maxRetry: cfg.MaxRetry,
|
||||
scanEvery: cfg.RetryScanInterval,
|
||||
scanBatch: cfg.RetryBatchSize,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) Start(ctx context.Context) {
|
||||
if p == nil {
|
||||
return
|
||||
}
|
||||
go p.startDispatchLoop(ctx)
|
||||
go p.startConsumeLoop(ctx)
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) Close() {
|
||||
if p == nil {
|
||||
return
|
||||
}
|
||||
if err := p.producer.Close(); err != nil {
|
||||
log.Printf("关闭 Kafka producer 失败: %v", err)
|
||||
}
|
||||
if err := p.consumer.Close(); err != nil {
|
||||
log.Printf("关闭 Kafka consumer 失败: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) EnqueueChatHistoryPersist(ctx context.Context, payload model.ChatHistoryPersistPayload) error {
|
||||
if p == nil {
|
||||
return errors.New("Kafka 异步链路未初始化")
|
||||
}
|
||||
outboxID, err := p.outboxRepo.CreateChatHistoryMessage(ctx, p.topic, payload.ConversationID, payload, p.maxRetry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = p.dispatchOne(context.Background(), outboxID); err != nil {
|
||||
log.Printf("outbox 消息 %d 首次投递失败,等待扫描重试: %v", outboxID, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) startDispatchLoop(ctx context.Context) {
|
||||
ticker := time.NewTicker(p.scanEvery)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
pendingMessages, err := p.outboxRepo.ListDueMessages(ctx, p.scanBatch)
|
||||
if err != nil {
|
||||
log.Printf("扫描 outbox 失败: %v", err)
|
||||
continue
|
||||
}
|
||||
for _, msg := range pendingMessages {
|
||||
if err = p.dispatchOne(ctx, msg.ID); err != nil {
|
||||
log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) dispatchOne(ctx context.Context, outboxID int64) error {
|
||||
outboxMsg, err := p.outboxRepo.GetByID(ctx, outboxID)
|
||||
if err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if outboxMsg.Status == model.OutboxStatusConsumed || outboxMsg.Status == model.OutboxStatusDead {
|
||||
return nil
|
||||
}
|
||||
|
||||
envelope := kafkabus.Envelope{
|
||||
OutboxID: outboxMsg.ID,
|
||||
BizType: outboxMsg.BizType,
|
||||
Payload: json.RawMessage(outboxMsg.Payload),
|
||||
}
|
||||
raw, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
markErr := p.outboxRepo.MarkDead(ctx, outboxMsg.ID, "序列化 outbox 包裹失败: "+err.Error())
|
||||
if markErr != nil {
|
||||
log.Printf("标记 outbox 死信失败(id=%d): %v", outboxMsg.ID, markErr)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if err = p.producer.Enqueue(ctx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil {
|
||||
_ = p.outboxRepo.MarkFailedForRetry(ctx, outboxMsg.ID, "投递 Kafka 失败: "+err.Error())
|
||||
return err
|
||||
}
|
||||
if err = p.outboxRepo.MarkPublished(ctx, outboxMsg.ID); err != nil {
|
||||
_ = p.outboxRepo.MarkFailedForRetry(ctx, outboxMsg.ID, "更新已投递状态失败: "+err.Error())
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) startConsumeLoop(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
msg, err := p.consumer.Dequeue(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
log.Printf("Kafka 消费拉取失败: %v", err)
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
if err = p.handleMessage(ctx, msg); err != nil {
|
||||
log.Printf("处理 Kafka 消息失败(topic=%s, partition=%d, offset=%d): %v", msg.Topic, msg.Partition, msg.Offset, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) handleMessage(ctx context.Context, msg segmentkafka.Message) error {
|
||||
var envelope kafkabus.Envelope
|
||||
if err := json.Unmarshal(msg.Value, &envelope); err != nil {
|
||||
_ = p.consumer.Commit(ctx, msg)
|
||||
return fmt.Errorf("解析 Kafka 包裹失败: %w", err)
|
||||
}
|
||||
if envelope.OutboxID <= 0 {
|
||||
_ = p.consumer.Commit(ctx, msg)
|
||||
return errors.New("Kafka 包裹缺少 outbox_id")
|
||||
}
|
||||
|
||||
switch envelope.BizType {
|
||||
case model.OutboxBizTypeChatHistoryPersist:
|
||||
return p.consumeChatHistory(ctx, msg, envelope)
|
||||
default:
|
||||
_ = p.outboxRepo.MarkDead(ctx, envelope.OutboxID, "未知业务类型: "+envelope.BizType)
|
||||
if err := p.consumer.Commit(ctx, msg); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p *AgentAsyncPipeline) consumeChatHistory(ctx context.Context, msg segmentkafka.Message, envelope kafkabus.Envelope) error {
|
||||
var payload model.ChatHistoryPersistPayload
|
||||
if err := json.Unmarshal(envelope.Payload, &payload); err != nil {
|
||||
_ = p.outboxRepo.MarkDead(ctx, envelope.OutboxID, "解析聊天持久化载荷失败: "+err.Error())
|
||||
if commitErr := p.consumer.Commit(ctx, msg); commitErr != nil {
|
||||
return commitErr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := p.outboxRepo.PersistChatHistoryAndMarkConsumed(ctx, envelope.OutboxID, payload); err != nil {
|
||||
if markErr := p.outboxRepo.MarkFailedForRetry(ctx, envelope.OutboxID, "消费并落库失败: "+err.Error()); markErr != nil {
|
||||
return markErr
|
||||
}
|
||||
if commitErr := p.consumer.Commit(ctx, msg); commitErr != nil {
|
||||
return commitErr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return p.consumer.Commit(ctx, msg)
|
||||
}
|
||||
Reference in New Issue
Block a user