diff --git a/backend/active_scheduler/service/trigger_outbox.go b/backend/active_scheduler/service/trigger_outbox.go index 388b181..2a9232d 100644 --- a/backend/active_scheduler/service/trigger_outbox.go +++ b/backend/active_scheduler/service/trigger_outbox.go @@ -13,8 +13,6 @@ import ( sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" ) -const requestedNotificationDedupeWindow = 30 * time.Minute - // EnqueueActiveScheduleTriggeredInTx 在事务内写入 active_schedule.triggered outbox 消息。 // // 职责边界: @@ -53,6 +51,9 @@ func EnqueueNotificationFeishuRequestedInTx( maxRetry int, payload sharedevents.FeishuNotificationRequestedPayload, ) error { + if err := ensureNotificationFeishuOutboxRoute(); err != nil { + return err + } return enqueueContractEventInTx( ctx, outboxRepo, @@ -73,6 +74,16 @@ func EnqueueNotificationFeishuRequestedInTx( // 1. 只做 model -> contract DTO 映射; // 2. 不校验 trigger 是否应该被处理,业务真值判断由 scanner / worker 完成; // 3. 若 payload_json 不是合法 JSON,返回 error,让调用方回滚本次触发。 +// ensureNotificationFeishuOutboxRoute 确保 publisher 侧能把飞书通知事件写入 notification outbox。 +// +// 职责边界: +// 1. 这里只登记 event_type -> notification 服务归属,不注册 handler,也不启动单体旧消费者; +// 2. RegisterEventService 本身幂等,重复调用用于覆盖 API/worker 不同启动路径; +// 3. 若路由登记失败,直接返回给事务调用方,让 trigger 与 notification 入队一起回滚。 +func ensureNotificationFeishuOutboxRoute() error { + return outboxinfra.RegisterEventService(sharedevents.NotificationFeishuRequestedEventType, outboxinfra.ServiceNotification) +} + func BuildTriggeredPayloadFromModel(row model.ActiveScheduleTrigger) (sharedevents.ActiveScheduleTriggeredPayload, error) { var rawPayload json.RawMessage if row.PayloadJSON != nil && strings.TrimSpace(*row.PayloadJSON) != "" { @@ -144,12 +155,7 @@ func BuildNotificationDedupeKey(userID int, triggerType string, requestedAt time if requestedAt.IsZero() { requestedAt = time.Now() } - windowStart := requestedAt.Truncate(requestedNotificationDedupeWindow) - return fmt.Sprintf("%d:%s:%s", - userID, - strings.TrimSpace(triggerType), - windowStart.Format(time.RFC3339), - ) + return sharedevents.BuildFeishuNotificationDedupeKey(userID, triggerType, requestedAt, sharedevents.DefaultFeishuNotificationDedupeWindow) } func enqueueContractEventInTx( diff --git a/backend/api/notification.go b/backend/api/notification.go index ac78876..d1bc661 100644 --- a/backend/api/notification.go +++ b/backend/api/notification.go @@ -2,12 +2,12 @@ package api import ( "context" - "errors" "net/http" "time" - "github.com/LoveLosita/smartflow/backend/notification" "github.com/LoveLosita/smartflow/backend/respond" + contracts "github.com/LoveLosita/smartflow/backend/shared/contracts/notification" + "github.com/LoveLosita/smartflow/backend/shared/ports" "github.com/gin-gonic/gin" ) @@ -16,15 +16,15 @@ const notificationAPITimeout = 8 * time.Second // NotificationAPI 承载当前用户的外部通知通道配置接口。 // // 职责边界: -// 1. 只负责从 JWT 上下文取得当前 user_id、绑定请求体并调用 notification.ChannelService; +// 1. 只负责从 JWT 上下文取得当前 user_id、绑定请求体并调用 notification zrpc client; // 2. 不直接读写 user_notification_channels,避免 API 层绕过 webhook 校验和脱敏规则; // 3. 不参与主动调度、notification_records 状态机和 outbox 消费。 type NotificationAPI struct { - channelService *notification.ChannelService + client ports.NotificationCommandClient } -func NewNotificationAPI(channelService *notification.ChannelService) *NotificationAPI { - return &NotificationAPI{channelService: channelService} +func NewNotificationAPI(client ports.NotificationCommandClient) *NotificationAPI { + return &NotificationAPI{client: client} } type saveFeishuWebhookRequest struct { @@ -36,7 +36,7 @@ type saveFeishuWebhookRequest struct { // GetFeishuWebhook 查询当前用户的飞书 Webhook 触发器配置。 func (api *NotificationAPI) GetFeishuWebhook(c *gin.Context) { - if api == nil || api.channelService == nil { + if api == nil || api.client == nil { c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) return } @@ -44,9 +44,11 @@ func (api *NotificationAPI) GetFeishuWebhook(c *gin.Context) { ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) defer cancel() - channel, err := api.channelService.GetFeishuWebhook(ctx, c.GetInt("user_id")) + channel, err := api.client.GetFeishuWebhook(ctx, contracts.GetFeishuWebhookRequest{ + UserID: c.GetInt("user_id"), + }) if err != nil { - writeNotificationError(c, err) + respond.DealWithError(c, err) return } c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, channel)) @@ -54,7 +56,7 @@ func (api *NotificationAPI) GetFeishuWebhook(c *gin.Context) { // SaveFeishuWebhook 幂等保存当前用户的飞书 Webhook 触发器配置。 func (api *NotificationAPI) SaveFeishuWebhook(c *gin.Context) { - if api == nil || api.channelService == nil { + if api == nil || api.client == nil { c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) return } @@ -72,14 +74,15 @@ func (api *NotificationAPI) SaveFeishuWebhook(c *gin.Context) { ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) defer cancel() - channel, err := api.channelService.SaveFeishuWebhook(ctx, c.GetInt("user_id"), notification.SaveFeishuWebhookRequest{ + channel, err := api.client.SaveFeishuWebhook(ctx, contracts.SaveFeishuWebhookRequest{ + UserID: c.GetInt("user_id"), Enabled: enabled, WebhookURL: req.WebhookURL, AuthType: req.AuthType, BearerToken: req.BearerToken, }) if err != nil { - writeNotificationError(c, err) + respond.DealWithError(c, err) return } c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, channel)) @@ -87,7 +90,7 @@ func (api *NotificationAPI) SaveFeishuWebhook(c *gin.Context) { // DeleteFeishuWebhook 删除当前用户的飞书 Webhook 触发器配置。 func (api *NotificationAPI) DeleteFeishuWebhook(c *gin.Context) { - if api == nil || api.channelService == nil { + if api == nil || api.client == nil { c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) return } @@ -95,8 +98,10 @@ func (api *NotificationAPI) DeleteFeishuWebhook(c *gin.Context) { ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) defer cancel() - if err := api.channelService.DeleteFeishuWebhook(ctx, c.GetInt("user_id")); err != nil { - writeNotificationError(c, err) + if err := api.client.DeleteFeishuWebhook(ctx, contracts.DeleteFeishuWebhookRequest{ + UserID: c.GetInt("user_id"), + }); err != nil { + respond.DealWithError(c, err) return } c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, gin.H{"deleted": true})) @@ -104,7 +109,7 @@ func (api *NotificationAPI) DeleteFeishuWebhook(c *gin.Context) { // TestFeishuWebhook 发送一条最小业务 JSON 到当前用户配置的飞书 Webhook。 func (api *NotificationAPI) TestFeishuWebhook(c *gin.Context) { - if api == nil || api.channelService == nil { + if api == nil || api.client == nil { c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) return } @@ -112,18 +117,12 @@ func (api *NotificationAPI) TestFeishuWebhook(c *gin.Context) { ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) defer cancel() - result, err := api.channelService.TestFeishuWebhook(ctx, c.GetInt("user_id")) + result, err := api.client.TestFeishuWebhook(ctx, contracts.TestFeishuWebhookRequest{ + UserID: c.GetInt("user_id"), + }) if err != nil { - writeNotificationError(c, err) + respond.DealWithError(c, err) return } c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, result)) } - -func writeNotificationError(c *gin.Context, err error) { - if errors.Is(err, notification.ErrInvalidChannelConfig) { - c.JSON(http.StatusBadRequest, respond.WrongParamType) - return - } - respond.DealWithError(c, err) -} diff --git a/backend/cmd/notification/main.go b/backend/cmd/notification/main.go new file mode 100644 index 0000000..ef880af --- /dev/null +++ b/backend/cmd/notification/main.go @@ -0,0 +1,77 @@ +package main + +import ( + "context" + "log" + "os" + "os/signal" + "syscall" + + "github.com/LoveLosita/smartflow/backend/bootstrap" + kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + notificationdao "github.com/LoveLosita/smartflow/backend/services/notification/dao" + notificationrpc "github.com/LoveLosita/smartflow/backend/services/notification/rpc" + notificationsv "github.com/LoveLosita/smartflow/backend/services/notification/sv" + "github.com/spf13/viper" +) + +func main() { + if err := bootstrap.LoadConfig(); err != nil { + log.Fatalf("failed to load config: %v", err) + } + + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + db, err := notificationdao.OpenDBFromConfig() + if err != nil { + log.Fatalf("failed to connect notification database: %v", err) + } + + channelDAO := notificationdao.NewChannelDAO(db) + recordDAO := notificationdao.NewRecordDAO(db) + svc, err := notificationsv.NewNotificationServiceWithFeishuWebhook(recordDAO, channelDAO, notificationsv.FeishuWebhookProviderOptions{ + FrontendBaseURL: viper.GetString("notification.frontendBaseURL"), + }, notificationsv.ServiceOptions{}) + if err != nil { + log.Fatalf("failed to initialize notification service: %v", err) + } + + outboxRepo := outboxinfra.NewRepository(db) + eventBus, err := outboxinfra.NewEventBus(outboxRepo, kafkabus.LoadConfig()) + if err != nil { + log.Fatalf("failed to initialize notification outbox bus: %v", err) + } + if eventBus != nil { + if err := notificationsv.RegisterFeishuRequestedHandler(eventBus, outboxRepo, svc); err != nil { + log.Fatalf("failed to register notification outbox handler: %v", err) + } + eventBus.Start(ctx) + defer eventBus.Close() + log.Println("Notification outbox consumer started") + } else { + log.Println("Notification outbox consumer is disabled") + } + + svc.StartRetryLoop(ctx, viper.GetDuration("notification.retryScanEvery"), viper.GetInt("notification.retryBatchSize")) + log.Println("Notification retry scanner started") + + server, listenOn, err := notificationrpc.NewServer(notificationrpc.ServerOptions{ + ListenOn: viper.GetString("notification.rpc.listenOn"), + Timeout: viper.GetDuration("notification.rpc.timeout"), + Service: svc, + }) + if err != nil { + log.Fatalf("failed to build notification zrpc server: %v", err) + } + defer server.Stop() + + go func() { + log.Printf("notification zrpc service starting on %s", listenOn) + server.Start() + }() + + <-ctx.Done() + log.Println("notification service stopping") +} diff --git a/backend/cmd/start.go b/backend/cmd/start.go index ae98449..cf29084 100644 --- a/backend/cmd/start.go +++ b/backend/cmd/start.go @@ -23,6 +23,7 @@ import ( "github.com/LoveLosita/smartflow/backend/api" "github.com/LoveLosita/smartflow/backend/bootstrap" "github.com/LoveLosita/smartflow/backend/dao" + gatewaynotification "github.com/LoveLosita/smartflow/backend/gateway/notification" gatewayrouter "github.com/LoveLosita/smartflow/backend/gateway/router" gatewayuserauth "github.com/LoveLosita/smartflow/backend/gateway/userauth" kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" @@ -38,7 +39,6 @@ import ( newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream" newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" "github.com/LoveLosita/smartflow/backend/newAgent/tools/web" - "github.com/LoveLosita/smartflow/backend/notification" "github.com/LoveLosita/smartflow/backend/pkg" "github.com/LoveLosita/smartflow/backend/service" agentsvcsvc "github.com/LoveLosita/smartflow/backend/service/agentsvc" @@ -46,6 +46,7 @@ import ( llmservice "github.com/LoveLosita/smartflow/backend/services/llm" ragservice "github.com/LoveLosita/smartflow/backend/services/rag" ragconfig "github.com/LoveLosita/smartflow/backend/services/rag/config" + "github.com/LoveLosita/smartflow/backend/shared/ports" "github.com/go-redis/redis/v8" "github.com/spf13/viper" "gorm.io/gorm" @@ -69,7 +70,6 @@ type appRuntime struct { memoryModule *memory.Module activeJobScanner *activejob.Scanner activeTriggerWorkflow *activesvc.TriggerWorkflowService - notificationService *notification.NotificationService limiter *pkg.RateLimiter handlers *api.ApiHandlers userAuthClient *gatewayuserauth.Client @@ -112,7 +112,7 @@ func StartAPI() { } // StartWorker 只启动后台异步能力,不注册 Gin 路由。 -// 当前包含 outbox relay / Kafka consumer / memory worker / 主动调度扫描 / 通知重试。 +// 当前包含 outbox relay / Kafka consumer / memory worker / 主动调度扫描。 func StartWorker() { ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer stop() @@ -215,6 +215,14 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) { if err != nil { return nil, fmt.Errorf("failed to initialize userauth zrpc client: %w", err) } + notificationClient, err := gatewaynotification.NewClient(gatewaynotification.ClientConfig{ + Endpoints: viper.GetStringSlice("notification.rpc.endpoints"), + Target: viper.GetString("notification.rpc.target"), + Timeout: viper.GetDuration("notification.rpc.timeout"), + }) + if err != nil { + return nil, fmt.Errorf("failed to initialize notification zrpc client: %w", err) + } taskSv := service.NewTaskService(taskRepo, cacheRepo, eventBus) taskSv.SetActiveScheduleDAO(manager.ActiveSchedule) courseService := buildCourseService(llmService, courseRepo, scheduleRepo) @@ -268,22 +276,6 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) { return nil, err } agentService.SetActiveScheduleSessionRerunFunc(buildActiveScheduleSessionRerunFunc(manager.ActiveSchedule, activeScheduleGraphRunner, activeSchedulePreviewConfirm, activeScheduleFeedbackLocator)) - // 1. 生产投递先切到用户级飞书 Webhook provider,mock provider 文件继续保留给后续单测和本地隔离验证。 - // 2. provider 与配置测试接口共用同一个实例,保证“测试成功”和“正式投递”走同一套 URL 校验、JSON 拼装和 HTTP 结果分类。 - feishuProvider, err := notification.NewWebhookFeishuProvider(manager.Notification, notification.WebhookFeishuProviderOptions{ - FrontendBaseURL: viper.GetString("notification.frontendBaseURL"), - }) - if err != nil { - return nil, err - } - notificationService, err := notification.NewNotificationService(manager.ActiveSchedule, feishuProvider, notification.ServiceOptions{}) - if err != nil { - return nil, err - } - notificationChannelService, err := notification.NewChannelService(manager.Notification, feishuProvider, notification.ChannelServiceOptions{}) - if err != nil { - return nil, err - } var activeTriggerWorkflow *activesvc.TriggerWorkflowService var activeJobScanner *activejob.Scanner if eventBus != nil { @@ -305,7 +297,7 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) { return nil, err } } - handlers := buildAPIHandlers(taskSv, taskClassService, courseService, scheduleService, agentService, memoryModule, activeScheduleDryRun, activeSchedulePreviewConfirm, activeScheduleTrigger, notificationChannelService) + handlers := buildAPIHandlers(taskSv, taskClassService, courseService, scheduleService, agentService, memoryModule, activeScheduleDryRun, activeSchedulePreviewConfirm, activeScheduleTrigger, notificationClient) runtime := &appRuntime{ db: db, @@ -320,7 +312,6 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) { memoryModule: memoryModule, activeJobScanner: activeJobScanner, activeTriggerWorkflow: activeTriggerWorkflow, - notificationService: notificationService, limiter: limiter, handlers: handlers, userAuthClient: userAuthClient, @@ -846,7 +837,7 @@ func buildAPIHandlers( activeScheduleDryRun *activesvc.DryRunService, activeSchedulePreviewConfirm *activesvc.PreviewConfirmService, activeScheduleTrigger *activesvc.TriggerService, - notificationChannelService *notification.ChannelService, + notificationClient ports.NotificationCommandClient, ) *api.ApiHandlers { return &api.ApiHandlers{ TaskHandler: api.NewTaskHandler(taskService), @@ -856,7 +847,7 @@ func buildAPIHandlers( AgentHandler: api.NewAgentHandler(agentService), MemoryHandler: api.NewMemoryHandler(memoryModule), ActiveSchedule: api.NewActiveScheduleAPI(activeScheduleDryRun, activeSchedulePreviewConfirm, activeScheduleTrigger), - Notification: api.NewNotificationAPI(notificationChannelService), + Notification: api.NewNotificationAPI(notificationClient), } } @@ -879,10 +870,6 @@ func (r *appRuntime) startWorkers(ctx context.Context) { r.activeJobScanner.Start(ctx) log.Println("Active schedule due job scanner started") } - if r.notificationService != nil { - r.notificationService.StartRetryLoop(ctx, viper.GetDuration("notification.retryScanEvery"), viper.GetInt("notification.retryBatchSize")) - log.Println("Notification retry scanner started") - } } func (r *appRuntime) registerEventHandlers() error { @@ -895,7 +882,6 @@ func (r *appRuntime) registerEventHandlers() error { r.cacheRepo, r.memoryModule, r.activeTriggerWorkflow, - r.notificationService, r.userAuthClient, ); err != nil { return err diff --git a/backend/config.example.yaml b/backend/config.example.yaml index e6ea27a..cff1436 100644 --- a/backend/config.example.yaml +++ b/backend/config.example.yaml @@ -50,6 +50,11 @@ kafka: # 通知投递配置。 notification: + rpc: + listenOn: "0.0.0.0:9082" + endpoints: + - "127.0.0.1:9082" + timeout: 6s frontendBaseURL: "http://localhost:5173" retryScanEvery: 1m retryBatchSize: 50 diff --git a/backend/dao/active_schedule.go b/backend/dao/active_schedule.go index 19115ad..ff85140 100644 --- a/backend/dao/active_schedule.go +++ b/backend/dao/active_schedule.go @@ -13,7 +13,7 @@ import ( // ActiveScheduleDAO 管理主动调度阶段 1 的自有表。 // // 职责边界: -// 1. 只负责 active_schedule_jobs / triggers / previews / notification_records 的基础读写; +// 1. 只负责 active_schedule_jobs / triggers / previews 的基础读写; // 2. 不负责构造候选、调用 LLM、投递 provider 或写正式日程; // 3. 幂等查询只按持久化键读取事实,是否复用结果由上层状态机判断。 type ActiveScheduleDAO struct { @@ -308,88 +308,3 @@ func (d *ActiveScheduleDAO) FindPreviewByApplyIdempotencyKey(ctx context.Context } return &preview, nil } - -func (d *ActiveScheduleDAO) CreateNotificationRecord(ctx context.Context, record *model.NotificationRecord) error { - if err := d.ensureDB(); err != nil { - return err - } - if record == nil { - return errors.New("notification record 不能为空") - } - return d.db.WithContext(ctx).Create(record).Error -} - -func (d *ActiveScheduleDAO) UpdateNotificationRecordFields(ctx context.Context, notificationID int64, updates map[string]any) error { - if err := d.ensureDB(); err != nil { - return err - } - if notificationID <= 0 { - return errors.New("notification record id 不能为空") - } - if len(updates) == 0 { - return nil - } - return d.db.WithContext(ctx). - Model(&model.NotificationRecord{}). - Where("id = ?", notificationID). - Updates(updates).Error -} - -func (d *ActiveScheduleDAO) GetNotificationRecordByID(ctx context.Context, notificationID int64) (*model.NotificationRecord, error) { - if err := d.ensureDB(); err != nil { - return nil, err - } - if notificationID <= 0 { - return nil, gorm.ErrRecordNotFound - } - var record model.NotificationRecord - err := d.db.WithContext(ctx).Where("id = ?", notificationID).First(&record).Error - if err != nil { - return nil, err - } - return &record, nil -} - -// FindNotificationRecordByDedupeKey 查询通知去重记录。 -// -// 说明: -// 1. notification 第一版按 channel + dedupe_key 聚合去重; -// 2. 若返回 pending/sending/sent,上层应避免重复投递; -// 3. 若返回 failed,上层可以复用同一条记录进入 provider retry。 -func (d *ActiveScheduleDAO) FindNotificationRecordByDedupeKey(ctx context.Context, channel string, dedupeKey string) (*model.NotificationRecord, error) { - if err := d.ensureDB(); err != nil { - return nil, err - } - if channel == "" || dedupeKey == "" { - return nil, gorm.ErrRecordNotFound - } - var record model.NotificationRecord - err := d.db.WithContext(ctx). - Where("channel = ? AND dedupe_key = ?", channel, dedupeKey). - Order("created_at DESC, id DESC"). - First(&record).Error - if err != nil { - return nil, err - } - return &record, nil -} - -// ListRetryableNotificationRecords 查询到达重试时间的通知记录。 -func (d *ActiveScheduleDAO) ListRetryableNotificationRecords(ctx context.Context, now time.Time, limit int) ([]model.NotificationRecord, error) { - if err := d.ensureDB(); err != nil { - return nil, err - } - if limit <= 0 || now.IsZero() { - return []model.NotificationRecord{}, nil - } - var records []model.NotificationRecord - err := d.db.WithContext(ctx). - Where("status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?", model.NotificationRecordStatusFailed, now). - Order("next_retry_at ASC, id ASC"). - Limit(limit). - Find(&records).Error - if err != nil { - return nil, err - } - return records, nil -} diff --git a/backend/dao/base.go b/backend/dao/base.go index e031782..fabfb6c 100644 --- a/backend/dao/base.go +++ b/backend/dao/base.go @@ -16,7 +16,6 @@ type RepoManager struct { Agent *AgentDAO ActiveSchedule *ActiveScheduleDAO ActiveScheduleSession *ActiveScheduleSessionDAO - Notification *NotificationChannelDAO } func NewManager(db *gorm.DB) *RepoManager { @@ -29,7 +28,6 @@ func NewManager(db *gorm.DB) *RepoManager { Agent: NewAgentDAO(db), ActiveSchedule: NewActiveScheduleDAO(db), ActiveScheduleSession: NewActiveScheduleSessionDAO(db), - Notification: NewNotificationChannelDAO(db), } } @@ -49,7 +47,6 @@ func (m *RepoManager) WithTx(tx *gorm.DB) *RepoManager { Agent: m.Agent.WithTx(tx), ActiveSchedule: m.ActiveSchedule.WithTx(tx), ActiveScheduleSession: m.ActiveScheduleSession.WithTx(tx), - Notification: m.Notification.WithTx(tx), } } diff --git a/backend/gateway/notification/client.go b/backend/gateway/notification/client.go new file mode 100644 index 0000000..07061b8 --- /dev/null +++ b/backend/gateway/notification/client.go @@ -0,0 +1,181 @@ +package notification + +import ( + "context" + "errors" + "strings" + "time" + + notificationpb "github.com/LoveLosita/smartflow/backend/services/notification/rpc/pb" + contracts "github.com/LoveLosita/smartflow/backend/shared/contracts/notification" + "github.com/zeromicro/go-zero/zrpc" +) + +const ( + defaultEndpoint = "127.0.0.1:9082" + defaultTimeout = 6 * time.Second +) + +type ClientConfig struct { + Endpoints []string + Target string + Timeout time.Duration +} + +// Client 是 gateway 侧 notification zrpc 的最小适配层。 +// +// 职责边界: +// 1. 只负责跨进程 gRPC 调用和响应转译,不碰 DB / provider / outbox 细节; +// 2. 服务端业务错误先通过 gRPC status 传输,再在这里反解回 respond.Response 风格; +// 3. 上层调用方仍然可以保持 `res, err :=` 的统一用法。 +type Client struct { + rpc notificationpb.NotificationClient +} + +func NewClient(cfg ClientConfig) (*Client, error) { + timeout := cfg.Timeout + if timeout <= 0 { + timeout = defaultTimeout + } + endpoints := normalizeEndpoints(cfg.Endpoints) + target := strings.TrimSpace(cfg.Target) + if len(endpoints) == 0 && target == "" { + endpoints = []string{defaultEndpoint} + } + + zclient, err := zrpc.NewClient(zrpc.RpcClientConf{ + Endpoints: endpoints, + Target: target, + NonBlock: true, + Timeout: int64(timeout / time.Millisecond), + }) + if err != nil { + return nil, err + } + return &Client{rpc: notificationpb.NewNotificationClient(zclient.Conn())}, nil +} + +func (c *Client) GetFeishuWebhook(ctx context.Context, req contracts.GetFeishuWebhookRequest) (*contracts.ChannelResponse, error) { + if err := c.ensureReady(); err != nil { + return nil, err + } + resp, err := c.rpc.GetFeishuWebhook(ctx, ¬ificationpb.GetFeishuWebhookRequest{ + UserId: int64(req.UserID), + }) + if err != nil { + return nil, responseFromRPCError(err) + } + return channelFromResponse(resp) +} + +func (c *Client) SaveFeishuWebhook(ctx context.Context, req contracts.SaveFeishuWebhookRequest) (*contracts.ChannelResponse, error) { + if err := c.ensureReady(); err != nil { + return nil, err + } + resp, err := c.rpc.SaveFeishuWebhook(ctx, ¬ificationpb.SaveFeishuWebhookRequest{ + UserId: int64(req.UserID), + Enabled: req.Enabled, + WebhookUrl: req.WebhookURL, + AuthType: req.AuthType, + BearerToken: req.BearerToken, + }) + if err != nil { + return nil, responseFromRPCError(err) + } + return channelFromResponse(resp) +} + +func (c *Client) DeleteFeishuWebhook(ctx context.Context, req contracts.DeleteFeishuWebhookRequest) error { + if err := c.ensureReady(); err != nil { + return err + } + resp, err := c.rpc.DeleteFeishuWebhook(ctx, ¬ificationpb.DeleteFeishuWebhookRequest{ + UserId: int64(req.UserID), + }) + if err != nil { + return responseFromRPCError(err) + } + if resp == nil { + return errors.New("notification zrpc service returned empty delete response") + } + return nil +} + +func (c *Client) TestFeishuWebhook(ctx context.Context, req contracts.TestFeishuWebhookRequest) (*contracts.TestResult, error) { + if err := c.ensureReady(); err != nil { + return nil, err + } + resp, err := c.rpc.TestFeishuWebhook(ctx, ¬ificationpb.TestFeishuWebhookRequest{ + UserId: int64(req.UserID), + }) + if err != nil { + return nil, responseFromRPCError(err) + } + return testResultFromResponse(resp) +} + +func (c *Client) ensureReady() error { + if c == nil || c.rpc == nil { + return errors.New("notification zrpc client is not initialized") + } + return nil +} + +func channelFromResponse(resp *notificationpb.ChannelResponse) (*contracts.ChannelResponse, error) { + if resp == nil { + return nil, errors.New("notification zrpc service returned empty channel response") + } + var lastTestAt *time.Time + if value := timeFromUnixNano(resp.LastTestAtUnixNano); !value.IsZero() { + lastTestAt = &value + } + return &contracts.ChannelResponse{ + Channel: resp.Channel, + Enabled: resp.Enabled, + Configured: resp.Configured, + WebhookURLMask: resp.WebhookUrlMask, + AuthType: resp.AuthType, + HasBearerToken: resp.HasBearerToken, + LastTestStatus: resp.LastTestStatus, + LastTestError: resp.LastTestError, + LastTestAt: lastTestAt, + }, nil +} + +func testResultFromResponse(resp *notificationpb.TestResult) (*contracts.TestResult, error) { + if resp == nil { + return nil, errors.New("notification zrpc service returned empty test response") + } + channel, err := channelFromResponse(resp.Channel) + if err != nil { + return nil, err + } + return &contracts.TestResult{ + Channel: *channel, + Status: resp.Status, + Outcome: resp.Outcome, + Message: resp.Message, + TraceID: resp.TraceId, + SentAt: timeFromUnixNano(resp.SentAtUnixNano), + Skipped: resp.Skipped, + Provider: resp.Provider, + }, nil +} + +func normalizeEndpoints(values []string) []string { + endpoints := make([]string, 0, len(values)) + for _, value := range values { + trimmed := strings.TrimSpace(value) + if trimmed != "" { + endpoints = append(endpoints, trimmed) + } + } + return endpoints +} + +func timeFromUnixNano(value int64) time.Time { + if value <= 0 { + return time.Time{} + } + return time.Unix(0, value) +} diff --git a/backend/gateway/notification/errors.go b/backend/gateway/notification/errors.go new file mode 100644 index 0000000..a65f93f --- /dev/null +++ b/backend/gateway/notification/errors.go @@ -0,0 +1,151 @@ +package notification + +import ( + "errors" + "fmt" + "strings" + + "github.com/LoveLosita/smartflow/backend/respond" + "google.golang.org/genproto/googleapis/rpc/errdetails" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// responseFromRPCError 负责把 notification 的 gRPC 错误反解回项目内的 respond.Response。 +// +// 职责边界: +// 1. 只在 gateway 边缘层使用,不下沉到服务实现里; +// 2. 业务错误尽量恢复成 respond.Response,方便 API 层继续复用现有 DealWithError; +// 3. 只要拿不到业务语义,就退化成普通 error,让上层按 500 处理。 +func responseFromRPCError(err error) error { + if err == nil { + return nil + } + + st, ok := status.FromError(err) + if !ok { + return wrapRPCError(err) + } + + if resp, ok := responseFromStatus(st); ok { + return resp + } + + switch st.Code() { + case codes.Internal, codes.Unknown, codes.Unavailable, codes.DeadlineExceeded, codes.DataLoss, codes.Unimplemented: + msg := strings.TrimSpace(st.Message()) + if msg == "" { + msg = "notification zrpc service internal error" + } + return wrapRPCError(errors.New(msg)) + } + + msg := strings.TrimSpace(st.Message()) + if msg == "" { + msg = "notification zrpc service rejected request" + } + return respond.Response{ + Status: grpcCodeToRespondStatus(st.Code()), + Info: msg, + } +} + +func responseFromStatus(st *status.Status) (respond.Response, bool) { + if st == nil { + return respond.Response{}, false + } + + if resp, ok := responseFromStatusDetails(st); ok { + return resp, true + } + if resp, ok := responseFromLegacyStatus(st.Code(), st.Message()); ok { + return resp, true + } + return respond.Response{}, false +} + +func responseFromStatusDetails(st *status.Status) (respond.Response, bool) { + for _, detail := range st.Details() { + info, ok := detail.(*errdetails.ErrorInfo) + if !ok { + continue + } + + statusValue := strings.TrimSpace(info.Reason) + if statusValue == "" { + statusValue = grpcCodeToRespondStatus(st.Code()) + } + if statusValue == "" { + return respond.Response{}, false + } + + message := strings.TrimSpace(st.Message()) + if message == "" && info.Metadata != nil { + message = strings.TrimSpace(info.Metadata["info"]) + } + if message == "" { + message = statusValue + } + return respond.Response{Status: statusValue, Info: message}, true + } + return respond.Response{}, false +} + +func responseFromLegacyStatus(code codes.Code, message string) (respond.Response, bool) { + trimmed := strings.TrimSpace(message) + if resp, ok := respondResponseByMessage(trimmed); ok { + return resp, true + } + + switch code { + case codes.Unauthenticated: + if trimmed == "" { + trimmed = "unauthorized" + } + return respond.Response{Status: respond.ErrUnauthorized.Status, Info: trimmed}, true + case codes.InvalidArgument: + if trimmed == "" { + trimmed = "invalid argument" + } + return respond.Response{Status: respond.MissingParam.Status, Info: trimmed}, true + case codes.Internal, codes.Unknown, codes.DataLoss: + if trimmed == "" { + trimmed = "notification service internal error" + } + return respond.InternalError(errors.New(trimmed)), true + } + + return respond.Response{}, false +} + +func respondResponseByMessage(message string) (respond.Response, bool) { + switch strings.TrimSpace(message) { + case respond.MissingParam.Info: + return respond.MissingParam, true + case respond.WrongParamType.Info: + return respond.WrongParamType, true + case respond.ErrUnauthorized.Info: + return respond.ErrUnauthorized, true + } + return respond.Response{}, false +} + +func grpcCodeToRespondStatus(code codes.Code) string { + switch code { + case codes.Unauthenticated: + return respond.ErrUnauthorized.Status + case codes.InvalidArgument: + return respond.MissingParam.Status + case codes.Internal, codes.Unknown, codes.DataLoss: + return "500" + default: + return "400" + } +} + +func wrapRPCError(err error) error { + if err == nil { + return nil + } + return fmt.Errorf("调用 notification zrpc 服务失败: %w", err) +} diff --git a/backend/infra/kafka/admin.go b/backend/infra/kafka/admin.go index f995454..e86053d 100644 --- a/backend/infra/kafka/admin.go +++ b/backend/infra/kafka/admin.go @@ -58,6 +58,11 @@ func probeTopic(ctx context.Context, brokers []string, topic string) error { continue } + // 1. segmentio/kafka-go 的 ReadPartitions 不直接接收 context。 + // 2. 这里必须给底层连接设置 I/O deadline,避免 broker 已接受连接但 metadata 响应卡住时, + // 上层 WaitTopicReady 永远阻塞,导致 outbox dispatch / consume 循环无法启动。 + // 3. deadline 命中后本轮探测失败,外层 ticker 会继续重试直到总 timeout 到期。 + _ = conn.SetDeadline(time.Now().Add(2 * time.Second)) partitions, readErr := conn.ReadPartitions(topic) _ = conn.Close() if readErr != nil { diff --git a/backend/infra/outbox/engine.go b/backend/infra/outbox/engine.go index cf13c74..31227fe 100644 --- a/backend/infra/outbox/engine.go +++ b/backend/infra/outbox/engine.go @@ -17,6 +17,8 @@ import ( "gorm.io/gorm" ) +const defaultDispatchTimeout = 10 * time.Second + // MessageHandler 是事件消费处理器。 // // 语义约束: @@ -153,13 +155,16 @@ func (e *Engine) Start(ctx context.Context) { e.scanEvery, e.scanBatch, ) + // 1. dispatch 先启动,保证已到期的 outbox 不会被 topic 探测阻塞在 pending。 + // 2. consume 仍等待 topic 探测,降低启动期消费者空转与 metadata 抖动。 + // 3. 若探测失败,继续启动消费者;真实错误交给消费循环记录并由运维日志暴露。 + e.StartDispatch(ctx) + if err := kafkabus.WaitTopicReady(ctx, e.brokers, e.route.Topic, 30*time.Second); err != nil { log.Printf("Kafka topic not ready before consume loop start: %v", err) } else { log.Printf("Kafka topic is ready: %s", e.route.Topic) } - - e.StartDispatch(ctx) e.StartConsume(ctx) } @@ -246,25 +251,35 @@ func (e *Engine) startDispatchLoop(ctx context.Context) { ticker := time.NewTicker(e.scanEvery) defer ticker.Stop() + log.Printf("outbox dispatch loop started: service=%s scan=%s batch=%d", e.route.ServiceName, e.scanEvery, e.scanBatch) + e.scanAndDispatchDueMessages(ctx) + for { select { case <-ctx.Done(): return case <-ticker.C: - pendingMessages, err := e.repo.ListDueMessages(ctx, e.route.ServiceName, e.scanBatch) - if err != nil { - log.Printf("扫描 outbox 失败: %v", err) - continue - } - if len(pendingMessages) > 0 { - log.Printf("outbox due messages=%d, service=%s start dispatch", len(pendingMessages), e.route.ServiceName) - } + e.scanAndDispatchDueMessages(ctx) + } + } +} - for _, msg := range pendingMessages { - if err = e.dispatchOne(ctx, msg.ID); err != nil { - log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err) - } - } +func (e *Engine) scanAndDispatchDueMessages(ctx context.Context) { + // 1. 每轮只拉取当前服务到期消息,避免独立微服务误扫其它服务的 outbox 表。 + // 2. 单条投递失败只记录并进入 retry,不阻断本轮剩余消息。 + // 3. 启动时也会执行一次本函数,避免重启后必须等待下一次 ticker 才能推进历史 pending。 + pendingMessages, err := e.repo.ListDueMessages(ctx, e.route.ServiceName, e.scanBatch) + if err != nil { + log.Printf("扫描 outbox 失败: %v", err) + return + } + if len(pendingMessages) > 0 { + log.Printf("outbox due messages=%d, service=%s start dispatch", len(pendingMessages), e.route.ServiceName) + } + + for _, msg := range pendingMessages { + if err = e.dispatchOne(ctx, msg.ID); err != nil { + log.Printf("重试投递 outbox 消息失败(id=%d): %v", msg.ID, err) } } } @@ -315,7 +330,11 @@ func (e *Engine) dispatchOne(ctx context.Context, outboxID int64) error { return err } - if err = e.producer.Enqueue(ctx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil { + // 1. Kafka 写入使用单条超时,避免 broker/metadata 卡住时消息长期停留在 pending。 + // 2. 超时失败后仍走统一 retry 状态机,由下一轮扫描继续补偿。 + dispatchCtx, cancel := context.WithTimeout(ctx, defaultDispatchTimeout) + defer cancel() + if err = e.producer.Enqueue(dispatchCtx, outboxMsg.Topic, outboxMsg.MessageKey, raw); err != nil { _ = e.repo.MarkFailedForRetry(ctx, outboxMsg.ID, "投递 Kafka 失败: "+err.Error()) return err } @@ -426,18 +445,18 @@ func resolveEngineRoute(repo *Repository, cfg kafkabus.Config) ServiceRoute { GroupID: strings.TrimSpace(cfg.GroupID), } if repo != nil { - repoRoute := normalizeServiceRoute(repo.route) + repoRoute := repo.route if route.ServiceName == "" { - route.ServiceName = repoRoute.ServiceName + route.ServiceName = strings.TrimSpace(repoRoute.ServiceName) } - if route.TableName == "" { - route.TableName = repoRoute.TableName + if route.TableName == "" && strings.TrimSpace(repoRoute.TableName) != "" { + route.TableName = strings.TrimSpace(repoRoute.TableName) } - if route.Topic == "" { - route.Topic = repoRoute.Topic + if route.Topic == "" && strings.TrimSpace(repoRoute.Topic) != "" { + route.Topic = strings.TrimSpace(repoRoute.Topic) } - if route.GroupID == "" { - route.GroupID = repoRoute.GroupID + if route.GroupID == "" && strings.TrimSpace(repoRoute.GroupID) != "" { + route.GroupID = strings.TrimSpace(repoRoute.GroupID) } } diff --git a/backend/inits/mysql.go b/backend/inits/mysql.go index b3cd5e2..23c6051 100644 --- a/backend/inits/mysql.go +++ b/backend/inits/mysql.go @@ -14,9 +14,9 @@ import ( // autoMigrateCoreModels 只迁移仍留在当前单体进程内的业务表。 // // 职责边界: -// 1. 负责 agent / task / schedule / memory / notification 等尚未独立拆出的表; -// 2. 不负责 users、JWT、黑名单、token 额度等 user/auth 领域表; -// 3. user/auth 表由 cmd/userauth 进程在自己的 DAO 初始化阶段迁移,避免 all 启动时跨服务碰核心用户表。 +// 1. 负责 agent / task / schedule / memory 等尚未独立拆出的表; +// 2. 不负责 users、notification_records、JWT、黑名单、token 额度等已拆服务表; +// 3. user/auth 与 notification 表由各自独立进程在自己的 DAO 初始化阶段迁移,避免 all 启动时跨服务碰核心表。 func autoMigrateCoreModels(db *gorm.DB) error { models := []any{ &model.AgentChat{}, @@ -30,8 +30,6 @@ func autoMigrateCoreModels(db *gorm.DB) error { &model.ActiveScheduleJob{}, &model.ActiveScheduleTrigger{}, &model.ActiveSchedulePreview{}, - &model.NotificationRecord{}, - &model.UserNotificationChannel{}, &model.AgentScheduleState{}, &model.ActiveScheduleSession{}, &model.AgentStateSnapshotRecord{}, diff --git a/backend/model/active_schedule.go b/backend/model/active_schedule.go index ff1dbe9..cfe94e6 100644 --- a/backend/model/active_schedule.go +++ b/backend/model/active_schedule.go @@ -66,21 +66,6 @@ const ( ActiveScheduleApplyStatusExpired = "expired" ) -const ( - // NotificationRecordStatusPending 表示通知记录已落库,等待投递。 - NotificationRecordStatusPending = "pending" - // NotificationRecordStatusSending 表示当前 worker 正在调用 provider。 - NotificationRecordStatusSending = "sending" - // NotificationRecordStatusSent 表示 provider 明确返回成功。 - NotificationRecordStatusSent = "sent" - // NotificationRecordStatusFailed 表示本次投递失败,但仍可重试。 - NotificationRecordStatusFailed = "failed" - // NotificationRecordStatusDead 表示达到重试上限或不可恢复错误。 - NotificationRecordStatusDead = "dead" - // NotificationRecordStatusSkipped 表示命中去重或配置关闭,本次不投递。 - NotificationRecordStatusSkipped = "skipped" -) - const ( // ActiveScheduleTriggerTypeImportantUrgentTask 是重要且紧急任务到线触发。 ActiveScheduleTriggerTypeImportantUrgentTask = "important_urgent_task" @@ -221,44 +206,3 @@ type ActiveSchedulePreview struct { } func (ActiveSchedulePreview) TableName() string { return "active_schedule_previews" } - -// NotificationRecord 是通知投递记录表模型。 -// -// 职责边界: -// 1. 负责记录飞书等通知渠道的幂等、状态流转和 provider 返回; -// 2. 不负责决定是否生成调度预览,也不负责 apply 状态; -// 3. 重试时复用同一条记录,避免短时间重复打扰用户。 -type NotificationRecord struct { - ID int64 `gorm:"column:id;primaryKey;autoIncrement"` - - Channel string `gorm:"column:channel;type:varchar(32);not null;uniqueIndex:uk_notification_dedupe,priority:1;comment:通知渠道"` - UserID int `gorm:"column:user_id;not null;index:idx_notification_user_created,priority:1"` - TriggerID string `gorm:"column:trigger_id;type:varchar(64);not null;index:idx_notification_trigger"` - PreviewID string `gorm:"column:preview_id;type:varchar(64);not null;index:idx_notification_preview"` - TriggerType string `gorm:"column:trigger_type;type:varchar(64);not null"` - TargetType string `gorm:"column:target_type;type:varchar(64);not null"` - TargetID int `gorm:"column:target_id;not null"` - DedupeKey string `gorm:"column:dedupe_key;type:varchar(191);not null;uniqueIndex:uk_notification_dedupe,priority:2"` - TargetURL string `gorm:"column:target_url;type:varchar(255);not null;comment:站内预览链接"` - SummaryText string `gorm:"column:summary_text;type:text"` - FallbackText string `gorm:"column:fallback_text;type:text"` - FallbackUsed bool `gorm:"column:fallback_used;not null;default:false"` - Status string `gorm:"column:status;type:varchar(32);not null;default:'pending';index:idx_notification_status_retry,priority:1;comment:pending/sending/sent/failed/dead/skipped"` - AttemptCount int `gorm:"column:attempt_count;not null;default:0"` - MaxAttempts int `gorm:"column:max_attempts;not null;default:5"` - NextRetryAt *time.Time `gorm:"column:next_retry_at;index:idx_notification_status_retry,priority:2"` - LastErrorCode *string `gorm:"column:last_error_code;type:varchar(64)"` - LastError *string `gorm:"column:last_error;type:text"` - - ProviderMessageID *string `gorm:"column:provider_message_id;type:varchar(128)"` - ProviderRequestJSON *string `gorm:"column:provider_request_json;type:json"` - ProviderResponseJSON *string `gorm:"column:provider_response_json;type:json"` - SentAt *time.Time `gorm:"column:sent_at"` - TraceID string `gorm:"column:trace_id;type:varchar(64);index:idx_notification_trace_id"` - - CreatedAt time.Time `gorm:"column:created_at;autoCreateTime;index:idx_notification_user_created,priority:2"` - UpdatedAt time.Time `gorm:"column:updated_at;autoUpdateTime"` - DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;index"` -} - -func (NotificationRecord) TableName() string { return "notification_records" } diff --git a/backend/notification/channel_service.go b/backend/notification/channel_service.go deleted file mode 100644 index 4cd72cd..0000000 --- a/backend/notification/channel_service.go +++ /dev/null @@ -1,222 +0,0 @@ -package notification - -import ( - "context" - "errors" - "strings" - "time" - - "github.com/LoveLosita/smartflow/backend/model" - "gorm.io/gorm" -) - -const ( - ChannelTestStatusSuccess = "success" - ChannelTestStatusFailed = "failed" -) - -var ErrInvalidChannelConfig = errors.New("notification channel config invalid") - -type UserNotificationChannelStore interface { - UserNotificationChannelReader - UpsertUserNotificationChannel(ctx context.Context, channel *model.UserNotificationChannel) error - DeleteUserNotificationChannel(ctx context.Context, userID int, channel string) error - UpdateUserNotificationChannelTestResult(ctx context.Context, userID int, channel string, status string, testErr string, testedAt time.Time) error -} - -type SaveFeishuWebhookRequest struct { - Enabled bool - WebhookURL string - AuthType string - BearerToken string -} - -type ChannelResponse struct { - Channel string `json:"channel"` - Enabled bool `json:"enabled"` - Configured bool `json:"configured"` - WebhookURLMask string `json:"webhook_url_mask,omitempty"` - AuthType string `json:"auth_type"` - HasBearerToken bool `json:"has_bearer_token"` - LastTestStatus string `json:"last_test_status,omitempty"` - LastTestError string `json:"last_test_error,omitempty"` - LastTestAt *time.Time `json:"last_test_at,omitempty"` -} - -type TestResult struct { - Channel ChannelResponse `json:"channel"` - Status string `json:"status"` - Outcome string `json:"outcome"` - Message string `json:"message,omitempty"` - TraceID string `json:"trace_id,omitempty"` - SentAt time.Time `json:"sent_at"` - Skipped bool `json:"skipped"` - Provider string `json:"provider"` -} - -type ChannelServiceOptions struct { - Now func() time.Time -} - -// ChannelService 管理用户通知通道配置和测试发送。 -// -// 职责边界: -// 1. 负责保存、查询、删除当前用户的飞书 webhook 配置; -// 2. 负责调用同一套 provider 发送测试事件并回写 last_test_*; -// 3. 不参与主动调度 trigger / preview / notification_records 状态机。 -type ChannelService struct { - store UserNotificationChannelStore - provider FeishuProvider - now func() time.Time -} - -func NewChannelService(store UserNotificationChannelStore, provider FeishuProvider, opts ChannelServiceOptions) (*ChannelService, error) { - if store == nil { - return nil, errors.New("notification channel store is nil") - } - if provider == nil { - return nil, errors.New("feishu provider is nil") - } - now := opts.Now - if now == nil { - now = time.Now - } - return &ChannelService{ - store: store, - provider: provider, - now: now, - }, nil -} - -func (s *ChannelService) GetFeishuWebhook(ctx context.Context, userID int) (ChannelResponse, error) { - if userID <= 0 { - return ChannelResponse{}, ErrInvalidChannelConfig - } - row, err := s.store.GetUserNotificationChannel(ctx, userID, model.NotificationChannelFeishuWebhook) - if err != nil { - if errors.Is(err, gorm.ErrRecordNotFound) { - return ChannelResponse{ - Channel: model.NotificationChannelFeishuWebhook, - AuthType: model.NotificationAuthTypeNone, - Configured: false, - }, nil - } - return ChannelResponse{}, err - } - return responseFromChannel(row), nil -} - -func (s *ChannelService) SaveFeishuWebhook(ctx context.Context, userID int, req SaveFeishuWebhookRequest) (ChannelResponse, error) { - if userID <= 0 { - return ChannelResponse{}, ErrInvalidChannelConfig - } - webhookURL := strings.TrimSpace(req.WebhookURL) - if err := ValidateFeishuWebhookURL(webhookURL); err != nil { - return ChannelResponse{}, ErrInvalidChannelConfig - } - authType := normalizeAuthType(req.AuthType) - bearerToken := strings.TrimSpace(req.BearerToken) - if authType == model.NotificationAuthTypeBearer && bearerToken == "" { - return ChannelResponse{}, ErrInvalidChannelConfig - } - row := &model.UserNotificationChannel{ - UserID: userID, - Channel: model.NotificationChannelFeishuWebhook, - Enabled: req.Enabled, - WebhookURL: webhookURL, - AuthType: authType, - BearerToken: bearerToken, - } - if err := s.store.UpsertUserNotificationChannel(ctx, row); err != nil { - return ChannelResponse{}, err - } - return s.GetFeishuWebhook(ctx, userID) -} - -func (s *ChannelService) DeleteFeishuWebhook(ctx context.Context, userID int) error { - if userID <= 0 { - return ErrInvalidChannelConfig - } - return s.store.DeleteUserNotificationChannel(ctx, userID, model.NotificationChannelFeishuWebhook) -} - -func (s *ChannelService) TestFeishuWebhook(ctx context.Context, userID int) (TestResult, error) { - if userID <= 0 { - return TestResult{}, ErrInvalidChannelConfig - } - now := s.now() - traceID := "trace_feishu_webhook_test" - sendResult, sendErr := s.provider.Send(ctx, FeishuSendRequest{ - NotificationID: 0, - UserID: userID, - TriggerID: "ast_test_webhook", - PreviewID: "asp_test_webhook", - TriggerType: "manual_test", - TargetType: "notification_channel", - TargetID: 0, - TargetURL: "/assistant/00000000-0000-0000-0000-000000000000", - MessageText: "这是一条 SmartFlow 飞书 Webhook 测试消息。", - TraceID: traceID, - AttemptCount: 1, - }) - if sendErr != nil { - return TestResult{}, sendErr - } - - status := ChannelTestStatusFailed - testErr := strings.TrimSpace(sendResult.ErrorMessage) - if sendResult.Outcome == FeishuSendOutcomeSuccess { - status = ChannelTestStatusSuccess - testErr = "" - } - if sendResult.Outcome == FeishuSendOutcomeSkipped && testErr == "" { - testErr = "飞书 webhook 未配置或未启用" - } - if err := s.store.UpdateUserNotificationChannelTestResult(ctx, userID, model.NotificationChannelFeishuWebhook, status, testErr, now); err != nil { - return TestResult{}, err - } - channel, err := s.GetFeishuWebhook(ctx, userID) - if err != nil { - return TestResult{}, err - } - return TestResult{ - Channel: channel, - Status: status, - Outcome: string(sendResult.Outcome), - Message: testErr, - TraceID: traceID, - SentAt: now, - Skipped: sendResult.Outcome == FeishuSendOutcomeSkipped, - Provider: ChannelFeishu, - }, nil -} - -func responseFromChannel(row *model.UserNotificationChannel) ChannelResponse { - if row == nil { - return ChannelResponse{ - Channel: model.NotificationChannelFeishuWebhook, - AuthType: model.NotificationAuthTypeNone, - Configured: false, - } - } - return ChannelResponse{ - Channel: row.Channel, - Enabled: row.Enabled, - Configured: strings.TrimSpace(row.WebhookURL) != "", - WebhookURLMask: MaskWebhookURL(row.WebhookURL), - AuthType: normalizeAuthType(row.AuthType), - HasBearerToken: strings.TrimSpace(row.BearerToken) != "", - LastTestStatus: row.LastTestStatus, - LastTestError: row.LastTestError, - LastTestAt: row.LastTestAt, - } -} - -func normalizeAuthType(authType string) string { - switch strings.ToLower(strings.TrimSpace(authType)) { - case model.NotificationAuthTypeBearer: - return model.NotificationAuthTypeBearer - default: - return model.NotificationAuthTypeNone - } -} diff --git a/backend/notification/dedupe.go b/backend/notification/dedupe.go deleted file mode 100644 index d018fe8..0000000 --- a/backend/notification/dedupe.go +++ /dev/null @@ -1,32 +0,0 @@ -package notification - -import ( - "fmt" - "strings" - "time" -) - -const ( - // DefaultFeishuDedupeWindow 是 notification 第一版固定的 30 分钟去重窗口。 - DefaultFeishuDedupeWindow = 30 * time.Minute -) - -// BuildTimeWindowDedupeKey 构造“user_id + trigger_type + time_window”去重键。 -// -// 职责边界: -// 1. 供事件发布方在生成 `notification.feishu.requested` payload 时复用; -// 2. 只负责把 30 分钟窗口归一成稳定 key,不负责落 notification_records; -// 3. unfinished_feedback 若要改用 feedback_id / idempotency_key,可不使用这个 helper。 -func BuildTimeWindowDedupeKey(userID int, triggerType string, requestedAt time.Time, window time.Duration) string { - if window <= 0 { - window = DefaultFeishuDedupeWindow - } - if userID <= 0 || strings.TrimSpace(triggerType) == "" || requestedAt.IsZero() { - return "" - } - - // 1. 先把请求时间归一到固定窗口起点,保证 30 分钟内多次触发得到同一 key。 - // 2. requestedAt 为空或非法时直接返回空字符串,让上游显式感知入参不完整。 - windowStartUnix := requestedAt.Unix() / int64(window.Seconds()) - return fmt.Sprintf("%d:%s:%d", userID, strings.TrimSpace(triggerType), windowStartUnix) -} diff --git a/backend/notification/provider.go b/backend/notification/provider.go deleted file mode 100644 index 95f56c9..0000000 --- a/backend/notification/provider.go +++ /dev/null @@ -1,88 +0,0 @@ -package notification - -import "context" - -const ( - // ChannelFeishu 表示当前通知记录走飞书通道。 - ChannelFeishu = "feishu" -) - -const ( - // FeishuErrorCodeProviderTimeout 表示 provider 超时,属于可重试错误。 - FeishuErrorCodeProviderTimeout = "provider_timeout" - // FeishuErrorCodeProviderRateLimited 表示 provider 限流,属于可重试错误。 - FeishuErrorCodeProviderRateLimited = "provider_rate_limited" - // FeishuErrorCodeProvider5xx 表示 provider 服务端异常,属于可重试错误。 - FeishuErrorCodeProvider5xx = "provider_5xx" - // FeishuErrorCodeNetworkError 表示网络层异常,属于可重试错误。 - FeishuErrorCodeNetworkError = "network_error" - // FeishuErrorCodeRecipientMissing 表示缺少接收方,属于不可恢复错误。 - FeishuErrorCodeRecipientMissing = "recipient_missing" - // FeishuErrorCodeInvalidURL 表示目标链接非法,属于不可恢复错误。 - FeishuErrorCodeInvalidURL = "invalid_url" - // FeishuErrorCodeProviderAuthFailed 表示 provider 认证失败,属于不可恢复错误。 - FeishuErrorCodeProviderAuthFailed = "provider_auth_failed" - // FeishuErrorCodePayloadInvalid 表示请求体非法,属于不可恢复错误。 - FeishuErrorCodePayloadInvalid = "payload_invalid" -) - -// FeishuSendOutcome 表示 provider 对一次投递尝试的分类结果。 -// -// 职责边界: -// 1. 只表达 provider 层对“这次投递”是否成功、是否可重试的判断; -// 2. 不直接承载 notification_records 的状态机,状态流转由 NotificationService 决定; -// 3. future webhook/open_id provider 只要返回同一套枚举,即可复用现有重试逻辑。 -type FeishuSendOutcome string - -const ( - FeishuSendOutcomeSuccess FeishuSendOutcome = "success" - FeishuSendOutcomeTemporaryFail FeishuSendOutcome = "temporary_fail" - FeishuSendOutcomePermanentFail FeishuSendOutcome = "permanent_fail" - FeishuSendOutcomeSkipped FeishuSendOutcome = "skipped" -) - -// FeishuSendRequest 是通知服务传给 provider 的稳定输入。 -// -// 职责边界: -// 1. 只描述 provider 真正发消息所需的信息; -// 2. 不暴露 GORM model,避免 provider 依赖数据库细节; -// 3. 同时保留审计字段,方便 mock/webhook provider 记录请求摘要。 -type FeishuSendRequest struct { - NotificationID int64 `json:"notification_id"` - UserID int `json:"user_id"` - TriggerID string `json:"trigger_id"` - PreviewID string `json:"preview_id"` - TriggerType string `json:"trigger_type"` - TargetType string `json:"target_type"` - TargetID int `json:"target_id"` - TargetURL string `json:"target_url"` - MessageText string `json:"message_text"` - FallbackUsed bool `json:"fallback_used"` - TraceID string `json:"trace_id,omitempty"` - AttemptCount int `json:"attempt_count"` -} - -// FeishuSendResult 是 provider 对外返回的投递结果。 -// -// 职责边界: -// 1. outcome 决定 NotificationService 应该进入 sent / failed / dead 中哪一条路径; -// 2. request/response payload 仅用于落库审计,不要求与任意具体 SDK 强绑定; -// 3. error_code 需要尽量稳定,便于后续按错误码做告警和排障。 -type FeishuSendResult struct { - Outcome FeishuSendOutcome `json:"outcome"` - ProviderMessageID string `json:"provider_message_id,omitempty"` - ErrorCode string `json:"error_code,omitempty"` - ErrorMessage string `json:"error_message,omitempty"` - RequestPayload any `json:"request_payload,omitempty"` - ResponsePayload any `json:"response_payload,omitempty"` -} - -// FeishuProvider 是飞书投递能力的抽象边界。 -// -// 职责边界: -// 1. 负责把最终文案发给具体 provider; -// 2. 不负责 notification_records 的创建、去重、状态机和重试节奏; -// 3. 后续新增 WebhookFeishuProvider / OpenIDFeishuProvider 时,只需实现这个接口。 -type FeishuProvider interface { - Send(ctx context.Context, req FeishuSendRequest) (FeishuSendResult, error) -} diff --git a/backend/service/events/core_outbox_handlers.go b/backend/service/events/core_outbox_handlers.go index 90647dd..fb4f7c7 100644 --- a/backend/service/events/core_outbox_handlers.go +++ b/backend/service/events/core_outbox_handlers.go @@ -6,7 +6,6 @@ import ( "github.com/LoveLosita/smartflow/backend/dao" outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" "github.com/LoveLosita/smartflow/backend/memory" - "github.com/LoveLosita/smartflow/backend/notification" sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" "github.com/LoveLosita/smartflow/backend/shared/ports" ) @@ -37,9 +36,10 @@ func RegisterCoreOutboxHandlers( // RegisterAllOutboxHandlers 注册当前阶段所有 outbox handler。 // // 职责边界: -// 1. 只负责把 core / active_scheduler / notification 三类路由一次性接线; +// 1. 只负责把当前单体残留域的 core / active_scheduler 路由一次性接线; // 2. 不负责创建依赖,也不负责启动事件总线; -// 3. 供当前启动流程在“总线启动前”统一完成显式路由注册。 +// 3. notification 已独立到 cmd/notification,自有 outbox consumer 不再由单体注册; +// 4. 供当前启动流程在“总线启动前”统一完成显式路由注册。 func RegisterAllOutboxHandlers( eventBus OutboxBus, outboxRepo *outboxinfra.Repository, @@ -48,10 +48,9 @@ func RegisterAllOutboxHandlers( cacheRepo *dao.CacheDAO, memoryModule *memory.Module, activeTriggerWorkflow ActiveScheduleTriggeredProcessor, - notificationService *notification.NotificationService, adjuster ports.TokenUsageAdjuster, ) error { - if err := validateAllOutboxHandlerDeps(eventBus, outboxRepo, repoManager, agentRepo, cacheRepo, memoryModule, activeTriggerWorkflow, notificationService); err != nil { + if err := validateAllOutboxHandlerDeps(eventBus, outboxRepo, repoManager, agentRepo, cacheRepo, memoryModule, activeTriggerWorkflow); err != nil { return err } @@ -63,7 +62,6 @@ func RegisterAllOutboxHandlers( cacheRepo, memoryModule, activeTriggerWorkflow, - notificationService, adjuster, )) } @@ -102,7 +100,7 @@ func validateCoreOutboxHandlerDeps( return nil } -// validateAllOutboxHandlerDeps 在核心依赖基础上,额外校验 active_scheduler 和 notification 相关依赖。 +// validateAllOutboxHandlerDeps 在核心依赖基础上,额外校验 active_scheduler 相关依赖。 func validateAllOutboxHandlerDeps( eventBus OutboxBus, outboxRepo *outboxinfra.Repository, @@ -111,7 +109,6 @@ func validateAllOutboxHandlerDeps( cacheRepo *dao.CacheDAO, memoryModule *memory.Module, activeTriggerWorkflow ActiveScheduleTriggeredProcessor, - notificationService *notification.NotificationService, ) error { if err := validateCoreOutboxHandlerDeps(eventBus, outboxRepo, repoManager, agentRepo, cacheRepo, memoryModule); err != nil { return err @@ -119,9 +116,6 @@ func validateAllOutboxHandlerDeps( if activeTriggerWorkflow == nil { return errors.New("active schedule triggered processor is nil") } - if notificationService == nil { - return errors.New("notification service is nil") - } return nil } @@ -190,7 +184,6 @@ func allOutboxHandlerRoutes( cacheRepo *dao.CacheDAO, memoryModule *memory.Module, activeTriggerWorkflow ActiveScheduleTriggeredProcessor, - notificationService *notification.NotificationService, adjuster ports.TokenUsageAdjuster, ) []outboxHandlerRoute { routes := coreOutboxHandlerRoutes(eventBus, outboxRepo, repoManager, agentRepo, cacheRepo, memoryModule, adjuster) @@ -202,13 +195,6 @@ func allOutboxHandlerRoutes( return RegisterActiveScheduleTriggeredHandler(eventBus, outboxRepo, activeTriggerWorkflow) }, }, - outboxHandlerRoute{ - EventType: sharedevents.NotificationFeishuRequestedEventType, - Service: outboxHandlerServiceNotification, - Register: func() error { - return RegisterFeishuNotificationHandler(eventBus, outboxRepo, notificationService) - }, - }, ) return routes } diff --git a/backend/service/events/outbox_bus.go b/backend/service/events/outbox_bus.go index 2de4cfe..be62250 100644 --- a/backend/service/events/outbox_bus.go +++ b/backend/service/events/outbox_bus.go @@ -170,7 +170,6 @@ func OutboxServiceNames() []string { string(outboxHandlerServiceTask), string(outboxHandlerServiceMemory), string(outboxHandlerServiceActiveScheduler), - string(outboxHandlerServiceNotification), } } diff --git a/backend/dao/notification_channel.go b/backend/services/notification/dao/channel.go similarity index 69% rename from backend/dao/notification_channel.go rename to backend/services/notification/dao/channel.go index 6b4812d..910f480 100644 --- a/backend/dao/notification_channel.go +++ b/backend/services/notification/dao/channel.go @@ -5,30 +5,30 @@ import ( "errors" "time" - "github.com/LoveLosita/smartflow/backend/model" + notificationmodel "github.com/LoveLosita/smartflow/backend/services/notification/model" "gorm.io/gorm" "gorm.io/gorm/clause" ) -// NotificationChannelDAO 管理用户外部通知通道配置。 +// ChannelDAO 管理用户外部通知通道配置。 // // 职责边界: // 1. 只负责 user_notification_channels 的基础读写; // 2. 不负责 webhook 请求发送、notification_records 状态机或 outbox 消费; -// 3. webhook_url / bearer_token 的脱敏由 API/service 层处理,DAO 保持真实持久化值。 -type NotificationChannelDAO struct { +// 3. webhook_url / bearer_token 的脱敏由 service 层处理,DAO 保持真实持久化值。 +type ChannelDAO struct { db *gorm.DB } -func NewNotificationChannelDAO(db *gorm.DB) *NotificationChannelDAO { - return &NotificationChannelDAO{db: db} +func NewChannelDAO(db *gorm.DB) *ChannelDAO { + return &ChannelDAO{db: db} } -func (d *NotificationChannelDAO) WithTx(tx *gorm.DB) *NotificationChannelDAO { - return &NotificationChannelDAO{db: tx} +func (d *ChannelDAO) WithTx(tx *gorm.DB) *ChannelDAO { + return &ChannelDAO{db: tx} } -func (d *NotificationChannelDAO) ensureDB() error { +func (d *ChannelDAO) ensureDB() error { if d == nil || d.db == nil { return errors.New("notification channel dao 未初始化") } @@ -41,7 +41,7 @@ func (d *NotificationChannelDAO) ensureDB() error { // 1. 只覆盖开关、webhook、鉴权配置和 updated_at; // 2. 不清空 last_test_*,避免用户保存配置后丢掉最近一次测试结果; // 3. channel.ID 由数据库自增,调用方不应依赖传入 ID。 -func (d *NotificationChannelDAO) UpsertUserNotificationChannel(ctx context.Context, channel *model.UserNotificationChannel) error { +func (d *ChannelDAO) UpsertUserNotificationChannel(ctx context.Context, channel *notificationmodel.UserNotificationChannel) error { if err := d.ensureDB(); err != nil { return err } @@ -60,7 +60,7 @@ func (d *NotificationChannelDAO) UpsertUserNotificationChannel(ctx context.Conte "updated_at": now, } return d.db.WithContext(ctx). - Model(&model.UserNotificationChannel{}). + Model(¬ificationmodel.UserNotificationChannel{}). Clauses(clause.OnConflict{ Columns: []clause.Column{{Name: "user_id"}, {Name: "channel"}}, DoUpdates: clause.Assignments(map[string]any{ @@ -75,14 +75,14 @@ func (d *NotificationChannelDAO) UpsertUserNotificationChannel(ctx context.Conte } // GetUserNotificationChannel 查询用户指定通知通道配置。 -func (d *NotificationChannelDAO) GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*model.UserNotificationChannel, error) { +func (d *ChannelDAO) GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*notificationmodel.UserNotificationChannel, error) { if err := d.ensureDB(); err != nil { return nil, err } if userID <= 0 || channel == "" { return nil, gorm.ErrRecordNotFound } - var row model.UserNotificationChannel + var row notificationmodel.UserNotificationChannel err := d.db.WithContext(ctx). Where("user_id = ? AND channel = ?", userID, channel). First(&row).Error @@ -95,7 +95,7 @@ func (d *NotificationChannelDAO) GetUserNotificationChannel(ctx context.Context, // DeleteUserNotificationChannel 删除用户指定通知通道配置。 // // 说明:当前表不保留软删除列;删除后再次保存会重新创建配置。 -func (d *NotificationChannelDAO) DeleteUserNotificationChannel(ctx context.Context, userID int, channel string) error { +func (d *ChannelDAO) DeleteUserNotificationChannel(ctx context.Context, userID int, channel string) error { if err := d.ensureDB(); err != nil { return err } @@ -104,11 +104,11 @@ func (d *NotificationChannelDAO) DeleteUserNotificationChannel(ctx context.Conte } return d.db.WithContext(ctx). Where("user_id = ? AND channel = ?", userID, channel). - Delete(&model.UserNotificationChannel{}).Error + Delete(¬ificationmodel.UserNotificationChannel{}).Error } // UpdateUserNotificationChannelTestResult 回写用户 webhook 测试结果。 -func (d *NotificationChannelDAO) UpdateUserNotificationChannelTestResult(ctx context.Context, userID int, channel string, status string, testErr string, testedAt time.Time) error { +func (d *ChannelDAO) UpdateUserNotificationChannelTestResult(ctx context.Context, userID int, channel string, status string, testErr string, testedAt time.Time) error { if err := d.ensureDB(); err != nil { return err } @@ -121,7 +121,7 @@ func (d *NotificationChannelDAO) UpdateUserNotificationChannelTestResult(ctx con "last_test_at": &testedAt, } return d.db.WithContext(ctx). - Model(&model.UserNotificationChannel{}). + Model(¬ificationmodel.UserNotificationChannel{}). Where("user_id = ? AND channel = ?", userID, channel). Updates(updates).Error } diff --git a/backend/services/notification/dao/connect.go b/backend/services/notification/dao/connect.go new file mode 100644 index 0000000..6d91c81 --- /dev/null +++ b/backend/services/notification/dao/connect.go @@ -0,0 +1,60 @@ +package dao + +import ( + "fmt" + + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + coremodel "github.com/LoveLosita/smartflow/backend/model" + notificationmodel "github.com/LoveLosita/smartflow/backend/services/notification/model" + "github.com/spf13/viper" + "gorm.io/driver/mysql" + "gorm.io/gorm" +) + +// OpenDBFromConfig 创建 notification 服务自己的数据库句柄。 +// +// 职责边界: +// 1. 只迁移 notification_records 与 user_notification_channels; +// 2. 不迁移主动调度、agent、userauth 或其它服务表; +// 3. 返回的 *gorm.DB 供 notification 服务内 DAO 和 outbox consumer 复用。 +func OpenDBFromConfig() (*gorm.DB, error) { + host := viper.GetString("database.host") + port := viper.GetString("database.port") + user := viper.GetString("database.user") + password := viper.GetString("database.password") + dbname := viper.GetString("database.dbname") + + dsn := fmt.Sprintf( + "%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", + user, password, host, port, dbname, + ) + + db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{}) + if err != nil { + return nil, err + } + if err = db.AutoMigrate(¬ificationmodel.NotificationRecord{}, ¬ificationmodel.UserNotificationChannel{}); err != nil { + return nil, fmt.Errorf("auto migrate notification tables failed: %w", err) + } + if err = autoMigrateNotificationOutboxTable(db); err != nil { + return nil, err + } + return db, nil +} + +// autoMigrateNotificationOutboxTable 只迁移 notification 服务自己的 outbox 物理表。 +// +// 职责边界: +// 1. 只负责 notification.outbox 对应表,不碰单体残留的其他业务表; +// 2. 让独立 notification 服务可以单独启动和消费 outbox,不依赖 backend/inits 的全量迁移; +// 3. 若后续调整 outbox 表名,只改 service catalog,不在这里硬编码。 +func autoMigrateNotificationOutboxTable(db *gorm.DB) error { + cfg, ok := outboxinfra.ResolveServiceConfig(outboxinfra.ServiceNotification) + if !ok { + return fmt.Errorf("resolve notification outbox config failed") + } + if err := db.Table(cfg.TableName).AutoMigrate(&coremodel.AgentOutboxMessage{}); err != nil { + return fmt.Errorf("auto migrate notification outbox table failed for %s (%s): %w", cfg.Name, cfg.TableName, err) + } + return nil +} diff --git a/backend/services/notification/dao/record.go b/backend/services/notification/dao/record.go new file mode 100644 index 0000000..9d4cd25 --- /dev/null +++ b/backend/services/notification/dao/record.go @@ -0,0 +1,170 @@ +package dao + +import ( + "context" + "errors" + "time" + + notificationmodel "github.com/LoveLosita/smartflow/backend/services/notification/model" + "gorm.io/gorm" +) + +// RecordDAO 管理 notification_records 投递状态机持久化。 +// +// 职责边界: +// 1. 只负责通知记录的创建、去重查询、状态更新和重试扫描; +// 2. 不负责 provider 发送、幂等锁或 outbox consumed 标记; +// 3. 不读写 active_schedule_* 表,避免 notification 服务反向持有主动调度内部状态。 +type RecordDAO struct { + db *gorm.DB +} + +func NewRecordDAO(db *gorm.DB) *RecordDAO { + return &RecordDAO{db: db} +} + +func (d *RecordDAO) WithTx(tx *gorm.DB) *RecordDAO { + return &RecordDAO{db: tx} +} + +func (d *RecordDAO) ensureDB() error { + if d == nil || d.db == nil { + return errors.New("notification record dao 未初始化") + } + return nil +} + +func (d *RecordDAO) CreateNotificationRecord(ctx context.Context, record *notificationmodel.NotificationRecord) error { + if err := d.ensureDB(); err != nil { + return err + } + if record == nil { + return errors.New("notification record 不能为空") + } + return d.db.WithContext(ctx).Create(record).Error +} + +func (d *RecordDAO) UpdateNotificationRecordFields(ctx context.Context, notificationID int64, updates map[string]any) error { + if err := d.ensureDB(); err != nil { + return err + } + if notificationID <= 0 { + return errors.New("notification record id 不能为空") + } + if len(updates) == 0 { + return nil + } + return d.db.WithContext(ctx). + Model(¬ificationmodel.NotificationRecord{}). + Where("id = ?", notificationID). + Updates(updates).Error +} + +func (d *RecordDAO) GetNotificationRecordByID(ctx context.Context, notificationID int64) (*notificationmodel.NotificationRecord, error) { + if err := d.ensureDB(); err != nil { + return nil, err + } + if notificationID <= 0 { + return nil, gorm.ErrRecordNotFound + } + var record notificationmodel.NotificationRecord + err := d.db.WithContext(ctx).Where("id = ?", notificationID).First(&record).Error + if err != nil { + return nil, err + } + return &record, nil +} + +// FindNotificationRecordByDedupeKey 查询通知去重记录。 +// +// 说明: +// 1. notification 第一版按 channel + dedupe_key 聚合去重; +// 2. 若返回 pending/sending/sent,上层应避免重复投递; +// 3. 若返回 failed,上层可以复用同一条记录进入 provider retry。 +func (d *RecordDAO) FindNotificationRecordByDedupeKey(ctx context.Context, channel string, dedupeKey string) (*notificationmodel.NotificationRecord, error) { + if err := d.ensureDB(); err != nil { + return nil, err + } + if channel == "" || dedupeKey == "" { + return nil, gorm.ErrRecordNotFound + } + var record notificationmodel.NotificationRecord + err := d.db.WithContext(ctx). + Where("channel = ? AND dedupe_key = ?", channel, dedupeKey). + Order("created_at DESC, id DESC"). + First(&record).Error + if err != nil { + return nil, err + } + return &record, nil +} + +// ListRetryableNotificationRecords 查询到达重试时间的通知记录。 +// +// 1. failed 记录按 next_retry_at 进入重试队列; +// 2. sending 记录只有超过租约才会回收,避免仍在执行的 provider 调用被重复放大; +// 3. 这让 retry scanner 同时覆盖显式失败重试和“发送中崩溃恢复”。 +func (d *RecordDAO) ListRetryableNotificationRecords(ctx context.Context, now time.Time, sendingStaleBefore time.Time, limit int) ([]notificationmodel.NotificationRecord, error) { + if err := d.ensureDB(); err != nil { + return nil, err + } + if limit <= 0 || now.IsZero() { + return []notificationmodel.NotificationRecord{}, nil + } + if sendingStaleBefore.IsZero() { + sendingStaleBefore = now.Add(-10 * time.Minute) + } + var records []notificationmodel.NotificationRecord + err := d.db.WithContext(ctx). + Where( + "(status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?) OR (status = ? AND updated_at <= ?)", + notificationmodel.RecordStatusFailed, + now, + notificationmodel.RecordStatusSending, + sendingStaleBefore, + ). + Order("next_retry_at ASC, id ASC"). + Limit(limit). + Find(&records).Error + if err != nil { + return nil, err + } + return records, nil +} + +// ClaimRetryableNotificationRecord 抢占一条到期失败通知,避免多实例重复调用 provider。 +// +// 职责边界: +// 1. 只做跨进程 claim,不发送通知、不推进最终投递状态; +// 2. failed 到期记录和 stale sending 记录都可以被回收为 sending; +// 3. 返回 claimed=false 表示记录已被其它实例抢走或状态已变化,调用方应跳过本次重试。 +func (d *RecordDAO) ClaimRetryableNotificationRecord(ctx context.Context, notificationID int64, now time.Time, sendingStaleBefore time.Time) (bool, error) { + if err := d.ensureDB(); err != nil { + return false, err + } + if notificationID <= 0 || now.IsZero() { + return false, nil + } + if sendingStaleBefore.IsZero() { + sendingStaleBefore = now.Add(-10 * time.Minute) + } + result := d.db.WithContext(ctx). + Model(¬ificationmodel.NotificationRecord{}). + Where( + "id = ? AND ((status = ? AND next_retry_at IS NOT NULL AND next_retry_at <= ?) OR (status = ? AND updated_at <= ?))", + notificationID, + notificationmodel.RecordStatusFailed, + now, + notificationmodel.RecordStatusSending, + sendingStaleBefore, + ). + Updates(map[string]any{ + "status": notificationmodel.RecordStatusSending, + "next_retry_at": nil, + "updated_at": now, + }) + if result.Error != nil { + return false, result.Error + } + return result.RowsAffected == 1, nil +} diff --git a/backend/notification/mock_provider.go b/backend/services/notification/internal/feishu/mock.go similarity index 60% rename from backend/notification/mock_provider.go rename to backend/services/notification/internal/feishu/mock.go index fdeea12..f9caf5d 100644 --- a/backend/notification/mock_provider.go +++ b/backend/services/notification/internal/feishu/mock.go @@ -1,4 +1,4 @@ -package notification +package feishu import ( "context" @@ -7,42 +7,40 @@ import ( "time" ) -// MockFeishuMode 描述 mock provider 下一次返回哪类结果。 -type MockFeishuMode string +// MockMode 描述 mock provider 下一次返回哪类结果。 +type MockMode string const ( - MockFeishuModeSuccess MockFeishuMode = "success" - MockFeishuModeTemporaryFail MockFeishuMode = "temporary_fail" - MockFeishuModePermanentFail MockFeishuMode = "permanent_fail" + MockModeSuccess MockMode = "success" + MockModeTemporaryFail MockMode = "temporary_fail" + MockModePermanentFail MockMode = "permanent_fail" ) -// MockFeishuProvider 是进程内 mock provider。 +// MockProvider 是进程内 mock provider。 // // 职责边界: // 1. 只用于本地联调、单元测试和阶段性验收; // 2. 不做真实 HTTP 调用,直接根据预设 mode 返回 success / temporary_fail / permanent_fail; // 3. 保留调用历史,方便测试断言“有没有重复发飞书”。 -type MockFeishuProvider struct { +type MockProvider struct { mu sync.Mutex - defaultMode MockFeishuMode - queuedModes []MockFeishuMode - calls []FeishuSendRequest + defaultMode MockMode + queuedModes []MockMode + calls []SendRequest } -// NewMockFeishuProvider 创建一个进程内 mock provider。 -func NewMockFeishuProvider(defaultMode MockFeishuMode) *MockFeishuProvider { +func NewMockProvider(defaultMode MockMode) *MockProvider { if defaultMode == "" { - defaultMode = MockFeishuModeSuccess + defaultMode = MockModeSuccess } - return &MockFeishuProvider{defaultMode: defaultMode} + return &MockProvider{defaultMode: defaultMode} } -// SetDefaultMode 设置默认返回模式。 -func (p *MockFeishuProvider) SetDefaultMode(mode MockFeishuMode) { +func (p *MockProvider) SetDefaultMode(mode MockMode) { p.mu.Lock() defer p.mu.Unlock() if mode == "" { - mode = MockFeishuModeSuccess + mode = MockModeSuccess } p.defaultMode = mode } @@ -53,7 +51,7 @@ func (p *MockFeishuProvider) SetDefaultMode(mode MockFeishuMode) { // 1. 先进先出消费,便于测试“先失败再成功”的重试路径; // 2. 队列用尽后回退到 defaultMode; // 3. 空模式会被自动忽略,避免测试代码误塞脏数据。 -func (p *MockFeishuProvider) PushModes(modes ...MockFeishuMode) { +func (p *MockProvider) PushModes(modes ...MockMode) { p.mu.Lock() defer p.mu.Unlock() for _, mode := range modes { @@ -64,11 +62,10 @@ func (p *MockFeishuProvider) PushModes(modes ...MockFeishuMode) { } } -// Calls 返回当前 provider 已记录的调用快照。 -func (p *MockFeishuProvider) Calls() []FeishuSendRequest { +func (p *MockProvider) Calls() []SendRequest { p.mu.Lock() defer p.mu.Unlock() - copied := make([]FeishuSendRequest, len(p.calls)) + copied := make([]SendRequest, len(p.calls)) copy(copied, p.calls) return copied } @@ -79,7 +76,7 @@ func (p *MockFeishuProvider) Calls() []FeishuSendRequest { // 1. 先记录本次请求,方便测试校验是否发生重复投递; // 2. 再按 queuedModes -> defaultMode 的顺序决定 outcome; // 3. 最后返回可落库审计的 request/response 摘要。 -func (p *MockFeishuProvider) Send(_ context.Context, req FeishuSendRequest) (FeishuSendResult, error) { +func (p *MockProvider) Send(_ context.Context, req SendRequest) (SendResult, error) { p.mu.Lock() p.calls = append(p.calls, req) @@ -91,10 +88,10 @@ func (p *MockFeishuProvider) Send(_ context.Context, req FeishuSendRequest) (Fei p.mu.Unlock() switch mode { - case MockFeishuModeTemporaryFail: - return FeishuSendResult{ - Outcome: FeishuSendOutcomeTemporaryFail, - ErrorCode: FeishuErrorCodeProviderTimeout, + case MockModeTemporaryFail: + return SendResult{ + Outcome: SendOutcomeTemporaryFail, + ErrorCode: ErrorCodeProviderTimeout, ErrorMessage: "mock feishu provider temporary failure", RequestPayload: map[string]any{ "notification_id": req.NotificationID, @@ -107,10 +104,10 @@ func (p *MockFeishuProvider) Send(_ context.Context, req FeishuSendRequest) (Fei "reason": "mock temporary failure", }, }, nil - case MockFeishuModePermanentFail: - return FeishuSendResult{ - Outcome: FeishuSendOutcomePermanentFail, - ErrorCode: FeishuErrorCodePayloadInvalid, + case MockModePermanentFail: + return SendResult{ + Outcome: SendOutcomePermanentFail, + ErrorCode: ErrorCodePayloadInvalid, ErrorMessage: "mock feishu provider permanent failure", RequestPayload: map[string]any{ "notification_id": req.NotificationID, @@ -124,8 +121,8 @@ func (p *MockFeishuProvider) Send(_ context.Context, req FeishuSendRequest) (Fei }, }, nil default: - return FeishuSendResult{ - Outcome: FeishuSendOutcomeSuccess, + return SendResult{ + Outcome: SendOutcomeSuccess, ProviderMessageID: fmt.Sprintf("mock_feishu_%d", time.Now().UnixNano()), RequestPayload: map[string]any{ "notification_id": req.NotificationID, @@ -134,7 +131,7 @@ func (p *MockFeishuProvider) Send(_ context.Context, req FeishuSendRequest) (Fei "target_url": req.TargetURL, }, ResponsePayload: map[string]any{ - "mode": string(MockFeishuModeSuccess), + "mode": string(MockModeSuccess), "status": "ok", }, }, nil diff --git a/backend/services/notification/internal/feishu/types.go b/backend/services/notification/internal/feishu/types.go new file mode 100644 index 0000000..3f2e407 --- /dev/null +++ b/backend/services/notification/internal/feishu/types.go @@ -0,0 +1,88 @@ +package feishu + +import "context" + +const ( + // Channel 表示当前通知记录走飞书通道。 + Channel = "feishu" +) + +const ( + // ErrorCodeProviderTimeout 表示 provider 超时,属于可重试错误。 + ErrorCodeProviderTimeout = "provider_timeout" + // ErrorCodeProviderRateLimited 表示 provider 限流,属于可重试错误。 + ErrorCodeProviderRateLimited = "provider_rate_limited" + // ErrorCodeProvider5xx 表示 provider 服务端异常,属于可重试错误。 + ErrorCodeProvider5xx = "provider_5xx" + // ErrorCodeNetworkError 表示网络层异常,属于可重试错误。 + ErrorCodeNetworkError = "network_error" + // ErrorCodeRecipientMissing 表示缺少接收方,属于不可恢复错误。 + ErrorCodeRecipientMissing = "recipient_missing" + // ErrorCodeInvalidURL 表示目标链接非法,属于不可恢复错误。 + ErrorCodeInvalidURL = "invalid_url" + // ErrorCodeProviderAuthFailed 表示 provider 认证失败,属于不可恢复错误。 + ErrorCodeProviderAuthFailed = "provider_auth_failed" + // ErrorCodePayloadInvalid 表示请求体非法,属于不可恢复错误。 + ErrorCodePayloadInvalid = "payload_invalid" +) + +// SendOutcome 表示 provider 对一次投递尝试的分类结果。 +// +// 职责边界: +// 1. 只表达 provider 层对“这次投递”是否成功、是否可重试的判断; +// 2. 不直接承载 notification_records 的状态机,状态流转由 service 决定; +// 3. 后续新增 Webhook / OpenID provider 时,只需返回同一套枚举。 +type SendOutcome string + +const ( + SendOutcomeSuccess SendOutcome = "success" + SendOutcomeTemporaryFail SendOutcome = "temporary_fail" + SendOutcomePermanentFail SendOutcome = "permanent_fail" + SendOutcomeSkipped SendOutcome = "skipped" +) + +// SendRequest 是通知服务传给 provider 的稳定输入。 +// +// 职责边界: +// 1. 只描述 provider 真正发消息所需的信息; +// 2. 不暴露 GORM model,避免 provider 依赖数据库细节; +// 3. 同时保留审计字段,方便 mock/webhook provider 记录请求摘要。 +type SendRequest struct { + NotificationID int64 `json:"notification_id"` + UserID int `json:"user_id"` + TriggerID string `json:"trigger_id"` + PreviewID string `json:"preview_id"` + TriggerType string `json:"trigger_type"` + TargetType string `json:"target_type"` + TargetID int `json:"target_id"` + TargetURL string `json:"target_url"` + MessageText string `json:"message_text"` + FallbackUsed bool `json:"fallback_used"` + TraceID string `json:"trace_id,omitempty"` + AttemptCount int `json:"attempt_count"` +} + +// SendResult 是 provider 对外返回的投递结果。 +// +// 职责边界: +// 1. outcome 决定 service 应该进入 sent / failed / dead 中哪一条路径; +// 2. request/response payload 仅用于落库审计,不要求与任意具体 SDK 强绑定; +// 3. error_code 需要尽量稳定,便于后续按错误码做告警和排障。 +type SendResult struct { + Outcome SendOutcome `json:"outcome"` + ProviderMessageID string `json:"provider_message_id,omitempty"` + ErrorCode string `json:"error_code,omitempty"` + ErrorMessage string `json:"error_message,omitempty"` + RequestPayload any `json:"request_payload,omitempty"` + ResponsePayload any `json:"response_payload,omitempty"` +} + +// Provider 是飞书投递能力的抽象边界。 +// +// 职责边界: +// 1. 负责把最终文案发给具体 provider; +// 2. 不负责 notification_records 的创建、去重、状态机和重试节奏; +// 3. 调用方只根据 SendResult.Outcome 推进自己的状态机。 +type Provider interface { + Send(ctx context.Context, req SendRequest) (SendResult, error) +} diff --git a/backend/notification/webhook_provider.go b/backend/services/notification/internal/feishu/webhook.go similarity index 64% rename from backend/notification/webhook_provider.go rename to backend/services/notification/internal/feishu/webhook.go index be30f7a..16d8cf5 100644 --- a/backend/notification/webhook_provider.go +++ b/backend/services/notification/internal/feishu/webhook.go @@ -1,4 +1,4 @@ -package notification +package feishu import ( "bytes" @@ -12,7 +12,7 @@ import ( "strings" "time" - "github.com/LoveLosita/smartflow/backend/model" + notificationmodel "github.com/LoveLosita/smartflow/backend/services/notification/model" "gorm.io/gorm" ) @@ -26,59 +26,59 @@ const ( maxWebhookResponseBodyLen = 64 * 1024 ) -// UserNotificationChannelReader 描述 webhook provider 读取用户通知配置所需的最小能力。 +// ChannelReader 描述 webhook provider 读取用户通知配置所需的最小能力。 // // 职责边界: // 1. 只读取 user_id + channel 对应的配置; // 2. 不负责保存配置和测试结果; -// 3. 生产环境由 NotificationChannelDAO 实现,测试可替换为内存 fake。 -type UserNotificationChannelReader interface { - GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*model.UserNotificationChannel, error) +// 3. 生产环境由 notification/dao.ChannelDAO 实现,测试可替换为内存 fake。 +type ChannelReader interface { + GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*notificationmodel.UserNotificationChannel, error) } -type WebhookFeishuProviderOptions struct { +type WebhookProviderOptions struct { HTTPClient *http.Client FrontendBaseURL string Timeout time.Duration Now func() time.Time } -// WebhookFeishuProvider 把 SmartFlow 通知事件发送到用户配置的飞书 Webhook 触发器。 +// WebhookProvider 把 SmartFlow 通知事件发送到用户配置的飞书 Webhook 触发器。 // // 职责边界: // 1. 只负责读取用户 webhook 配置、拼装极简业务 JSON 并执行 HTTP POST; // 2. 不负责 notification_records 的创建、重试节奏和幂等; // 3. 不实现飞书群自定义机器人 msg_type 协议,私聊/群发由飞书流程自行编排。 -type WebhookFeishuProvider struct { - store UserNotificationChannelReader +type WebhookProvider struct { + store ChannelReader client *http.Client frontendBaseURL string now func() time.Time } -type FeishuWebhookPayload struct { - Event string `json:"event"` - Version string `json:"version"` - NotificationID int64 `json:"notification_id"` - UserID int `json:"user_id"` - PreviewID string `json:"preview_id"` - TriggerID string `json:"trigger_id"` - TriggerType string `json:"trigger_type"` - TargetType string `json:"target_type"` - TargetID int `json:"target_id"` - Message FeishuWebhookMessage `json:"message"` - TraceID string `json:"trace_id,omitempty"` - SentAt string `json:"sent_at"` +type WebhookPayload struct { + Event string `json:"event"` + Version string `json:"version"` + NotificationID int64 `json:"notification_id"` + UserID int `json:"user_id"` + PreviewID string `json:"preview_id"` + TriggerID string `json:"trigger_id"` + TriggerType string `json:"trigger_type"` + TargetType string `json:"target_type"` + TargetID int `json:"target_id"` + Message WebhookMessage `json:"message"` + TraceID string `json:"trace_id,omitempty"` + SentAt string `json:"sent_at"` } -type FeishuWebhookMessage struct { +type WebhookMessage struct { Title string `json:"title"` Summary string `json:"summary"` ActionText string `json:"action_text"` ActionURL string `json:"action_url"` } -func NewWebhookFeishuProvider(store UserNotificationChannelReader, opts WebhookFeishuProviderOptions) (*WebhookFeishuProvider, error) { +func NewWebhookProvider(store ChannelReader, opts WebhookProviderOptions) (*WebhookProvider, error) { if store == nil { return nil, errors.New("user notification channel store is nil") } @@ -94,7 +94,7 @@ func NewWebhookFeishuProvider(store UserNotificationChannelReader, opts WebhookF if now == nil { now = time.Now } - return &WebhookFeishuProvider{ + return &WebhookProvider{ store: store, client: client, frontendBaseURL: normalizeFrontendBaseURL(opts.FrontendBaseURL), @@ -102,13 +102,13 @@ func NewWebhookFeishuProvider(store UserNotificationChannelReader, opts WebhookF }, nil } -// BuildFeishuWebhookPayload 生成飞书 Webhook 触发器消费的极简业务 JSON。 +// BuildWebhookPayload 生成飞书 Webhook 触发器消费的极简业务 JSON。 // // 说明: // 1. 该结构不包含飞书群机器人 msg_type 字段; // 2. message 四个字段是飞书流程拼私聊消息的稳定输入; // 3. 其它字段用于用户流程分支、SmartFlow 排障和审计。 -func BuildFeishuWebhookPayload(req FeishuSendRequest, frontendBaseURL string, sentAt time.Time) FeishuWebhookPayload { +func BuildWebhookPayload(req SendRequest, frontendBaseURL string, sentAt time.Time) WebhookPayload { if sentAt.IsZero() { sentAt = time.Now() } @@ -116,7 +116,7 @@ func BuildFeishuWebhookPayload(req FeishuSendRequest, frontendBaseURL string, se if summary == "" { summary = "我为你生成了一份日程调整建议,请回到系统确认是否应用。" } - return FeishuWebhookPayload{ + return WebhookPayload{ Event: webhookPayloadEvent, Version: webhookPayloadVersion, NotificationID: req.NotificationID, @@ -126,7 +126,7 @@ func BuildFeishuWebhookPayload(req FeishuSendRequest, frontendBaseURL string, se TriggerType: strings.TrimSpace(req.TriggerType), TargetType: strings.TrimSpace(req.TargetType), TargetID: req.TargetID, - Message: FeishuWebhookMessage{ + Message: WebhookMessage{ Title: webhookMessageTitle, Summary: summary, ActionText: webhookMessageActionText, @@ -138,24 +138,24 @@ func BuildFeishuWebhookPayload(req FeishuSendRequest, frontendBaseURL string, se } // Send 向用户配置的飞书 Webhook 触发器投递一次 SmartFlow 通知事件。 -func (p *WebhookFeishuProvider) Send(ctx context.Context, req FeishuSendRequest) (FeishuSendResult, error) { +func (p *WebhookProvider) Send(ctx context.Context, req SendRequest) (SendResult, error) { if p == nil || p.store == nil || p.client == nil { - return FeishuSendResult{}, errors.New("webhook feishu provider 未初始化") + return SendResult{}, errors.New("webhook feishu provider 未初始化") } - config, err := p.store.GetUserNotificationChannel(ctx, req.UserID, model.NotificationChannelFeishuWebhook) + config, err := p.store.GetUserNotificationChannel(ctx, req.UserID, notificationmodel.ChannelFeishuWebhook) if err != nil { if errors.Is(err, gorm.ErrRecordNotFound) { return skippedResult(req, "用户未配置飞书 Webhook 触发器"), nil } - return FeishuSendResult{}, err + return SendResult{}, err } if config == nil || !config.Enabled || strings.TrimSpace(config.WebhookURL) == "" { return skippedResult(req, "用户未启用飞书 Webhook 触发器"), nil } - if err = ValidateFeishuWebhookURL(config.WebhookURL); err != nil { - return FeishuSendResult{ - Outcome: FeishuSendOutcomePermanentFail, - ErrorCode: FeishuErrorCodeInvalidURL, + if err = ValidateWebhookURL(config.WebhookURL); err != nil { + return SendResult{ + Outcome: SendOutcomePermanentFail, + ErrorCode: ErrorCodeInvalidURL, ErrorMessage: err.Error(), RequestPayload: map[string]any{ "notification_id": req.NotificationID, @@ -165,18 +165,18 @@ func (p *WebhookFeishuProvider) Send(ctx context.Context, req FeishuSendRequest) }, nil } - payload := BuildFeishuWebhookPayload(req, p.frontendBaseURL, p.now()) + payload := BuildWebhookPayload(req, p.frontendBaseURL, p.now()) raw, err := json.Marshal(payload) if err != nil { - return FeishuSendResult{}, err + return SendResult{}, err } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimSpace(config.WebhookURL), bytes.NewReader(raw)) if err != nil { - return permanentWebhookResult(req, payload, nil, FeishuErrorCodeInvalidURL, err.Error()), nil + return permanentWebhookResult(req, payload, nil, ErrorCodeInvalidURL, err.Error()), nil } httpReq.Header.Set("Content-Type", "application/json; charset=utf-8") - if strings.EqualFold(strings.TrimSpace(config.AuthType), model.NotificationAuthTypeBearer) && strings.TrimSpace(config.BearerToken) != "" { + if strings.EqualFold(strings.TrimSpace(config.AuthType), notificationmodel.AuthTypeBearer) && strings.TrimSpace(config.BearerToken) != "" { httpReq.Header.Set("Authorization", "Bearer "+strings.TrimSpace(config.BearerToken)) } @@ -189,12 +189,12 @@ func (p *WebhookFeishuProvider) Send(ctx context.Context, req FeishuSendRequest) body, readErr := io.ReadAll(io.LimitReader(resp.Body, maxWebhookResponseBodyLen)) responsePayload := buildWebhookResponsePayload(resp.StatusCode, body, readErr) if readErr != nil { - return temporaryWebhookResult(req, payload, responsePayload, FeishuErrorCodeNetworkError, readErr.Error()), nil + return temporaryWebhookResult(req, payload, responsePayload, ErrorCodeNetworkError, readErr.Error()), nil } return classifyWebhookHTTPResult(req, payload, responsePayload, resp.StatusCode, body), nil } -func classifyWebhookHTTPResult(req FeishuSendRequest, payload FeishuWebhookPayload, responsePayload map[string]any, statusCode int, body []byte) FeishuSendResult { +func classifyWebhookHTTPResult(req SendRequest, payload WebhookPayload, responsePayload map[string]any, statusCode int, body []byte) SendResult { if statusCode >= 200 && statusCode < 300 { if len(strings.TrimSpace(string(body))) > 0 { var parsed struct { @@ -202,11 +202,11 @@ func classifyWebhookHTTPResult(req FeishuSendRequest, payload FeishuWebhookPaylo Msg string `json:"msg"` } if err := json.Unmarshal(body, &parsed); err == nil && parsed.Code != nil && *parsed.Code != 0 { - return permanentWebhookResult(req, payload, responsePayload, FeishuErrorCodePayloadInvalid, firstNonEmpty(parsed.Msg, fmt.Sprintf("飞书 webhook 返回 code=%d", *parsed.Code))) + return permanentWebhookResult(req, payload, responsePayload, ErrorCodePayloadInvalid, firstNonEmpty(parsed.Msg, fmt.Sprintf("飞书 webhook 返回 code=%d", *parsed.Code))) } } - return FeishuSendResult{ - Outcome: FeishuSendOutcomeSuccess, + return SendResult{ + Outcome: SendOutcomeSuccess, ProviderMessageID: fmt.Sprintf("feishu_webhook_%d_%d", req.NotificationID, time.Now().UnixNano()), RequestPayload: payload, ResponsePayload: responsePayload, @@ -214,20 +214,20 @@ func classifyWebhookHTTPResult(req FeishuSendRequest, payload FeishuWebhookPaylo } switch { case statusCode == http.StatusTooManyRequests: - return temporaryWebhookResult(req, payload, responsePayload, FeishuErrorCodeProviderRateLimited, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) + return temporaryWebhookResult(req, payload, responsePayload, ErrorCodeProviderRateLimited, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) case statusCode >= 500: - return temporaryWebhookResult(req, payload, responsePayload, FeishuErrorCodeProvider5xx, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) + return temporaryWebhookResult(req, payload, responsePayload, ErrorCodeProvider5xx, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) case statusCode == http.StatusUnauthorized || statusCode == http.StatusForbidden: - return permanentWebhookResult(req, payload, responsePayload, FeishuErrorCodeProviderAuthFailed, fmt.Sprintf("飞书 webhook 鉴权失败 HTTP %d", statusCode)) + return permanentWebhookResult(req, payload, responsePayload, ErrorCodeProviderAuthFailed, fmt.Sprintf("飞书 webhook 鉴权失败 HTTP %d", statusCode)) default: - return permanentWebhookResult(req, payload, responsePayload, FeishuErrorCodePayloadInvalid, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) + return permanentWebhookResult(req, payload, responsePayload, ErrorCodePayloadInvalid, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) } } -func skippedResult(req FeishuSendRequest, reason string) FeishuSendResult { - return FeishuSendResult{ - Outcome: FeishuSendOutcomeSkipped, - ErrorCode: FeishuErrorCodeRecipientMissing, +func skippedResult(req SendRequest, reason string) SendResult { + return SendResult{ + Outcome: SendOutcomeSkipped, + ErrorCode: ErrorCodeRecipientMissing, ErrorMessage: reason, RequestPayload: map[string]any{ "notification_id": req.NotificationID, @@ -241,9 +241,9 @@ func skippedResult(req FeishuSendRequest, reason string) FeishuSendResult { } } -func temporaryWebhookResult(req FeishuSendRequest, payload FeishuWebhookPayload, responsePayload any, code string, message string) FeishuSendResult { - return FeishuSendResult{ - Outcome: FeishuSendOutcomeTemporaryFail, +func temporaryWebhookResult(_ SendRequest, payload WebhookPayload, responsePayload any, code string, message string) SendResult { + return SendResult{ + Outcome: SendOutcomeTemporaryFail, ErrorCode: code, ErrorMessage: message, RequestPayload: payload, @@ -251,9 +251,9 @@ func temporaryWebhookResult(req FeishuSendRequest, payload FeishuWebhookPayload, } } -func permanentWebhookResult(req FeishuSendRequest, payload FeishuWebhookPayload, responsePayload any, code string, message string) FeishuSendResult { - return FeishuSendResult{ - Outcome: FeishuSendOutcomePermanentFail, +func permanentWebhookResult(_ SendRequest, payload WebhookPayload, responsePayload any, code string, message string) SendResult { + return SendResult{ + Outcome: SendOutcomePermanentFail, ErrorCode: code, ErrorMessage: message, RequestPayload: payload, @@ -276,9 +276,9 @@ func buildWebhookResponsePayload(statusCode int, body []byte, readErr error) map func classifyNetworkError(err error) string { if errors.Is(err, context.DeadlineExceeded) { - return FeishuErrorCodeProviderTimeout + return ErrorCodeProviderTimeout } - return FeishuErrorCodeNetworkError + return ErrorCodeNetworkError } func normalizeFrontendBaseURL(value string) string { @@ -298,8 +298,8 @@ func buildActionURL(frontendBaseURL string, targetURL string) string { return base + "/" + strings.TrimLeft(targetURL, "/") } -// ValidateFeishuWebhookURL 校验第一版允许保存的飞书 Webhook 触发器地址。 -func ValidateFeishuWebhookURL(rawURL string) error { +// ValidateWebhookURL 校验第一版允许保存的飞书 Webhook 触发器地址。 +func ValidateWebhookURL(rawURL string) error { parsed, err := url.Parse(strings.TrimSpace(rawURL)) if err != nil { return err diff --git a/backend/model/notification_channel.go b/backend/services/notification/model/channel.go similarity index 73% rename from backend/model/notification_channel.go rename to backend/services/notification/model/channel.go index d794648..5a8caa7 100644 --- a/backend/model/notification_channel.go +++ b/backend/services/notification/model/channel.go @@ -1,27 +1,25 @@ package model -import ( - "time" +import "time" + +const ( + // ChannelFeishuWebhook 表示用户配置的是飞书 Webhook 触发器。 + ChannelFeishuWebhook = "feishu_webhook" ) const ( - // NotificationChannelFeishuWebhook 表示用户配置的是飞书 Webhook 触发器。 - NotificationChannelFeishuWebhook = "feishu_webhook" -) - -const ( - // NotificationAuthTypeNone 表示 webhook 不需要额外鉴权头。 - NotificationAuthTypeNone = "none" - // NotificationAuthTypeBearer 表示 webhook 需要 Authorization: Bearer token。 - NotificationAuthTypeBearer = "bearer" + // AuthTypeNone 表示 webhook 不需要额外鉴权头。 + AuthTypeNone = "none" + // AuthTypeBearer 表示 webhook 需要 Authorization: Bearer token。 + AuthTypeBearer = "bearer" ) // UserNotificationChannel 保存单个用户的外部通知通道配置。 // // 职责边界: // 1. 只记录 user_id 到具体通知 provider 配置的映射; -// 2. 不记录 notification_records 投递状态,投递状态仍属于 NotificationRecord; -// 3. 当前 webhook_url / bearer_token 暂以明文字段承载,接口和日志必须脱敏;后续接入统一密钥加密能力后再替换存储实现。 +// 2. 不记录 notification_records 投递状态,投递状态属于 NotificationRecord; +// 3. 当前 webhook_url / bearer_token 暂以明文字段承载,接口和日志必须脱敏。 type UserNotificationChannel struct { ID int64 `gorm:"column:id;primaryKey;autoIncrement" json:"id"` diff --git a/backend/services/notification/model/record.go b/backend/services/notification/model/record.go new file mode 100644 index 0000000..85a426e --- /dev/null +++ b/backend/services/notification/model/record.go @@ -0,0 +1,59 @@ +package model + +import "time" + +const ( + // RecordStatusPending 表示通知记录已落库,等待投递。 + RecordStatusPending = "pending" + // RecordStatusSending 表示当前 worker 正在调用 provider。 + RecordStatusSending = "sending" + // RecordStatusSent 表示 provider 明确返回成功。 + RecordStatusSent = "sent" + // RecordStatusFailed 表示本次投递失败,但仍可重试。 + RecordStatusFailed = "failed" + // RecordStatusDead 表示达到重试上限或不可恢复错误。 + RecordStatusDead = "dead" + // RecordStatusSkipped 表示命中去重或配置关闭,本次不投递。 + RecordStatusSkipped = "skipped" +) + +// NotificationRecord 是通知投递记录表模型。 +// +// 职责边界: +// 1. 记录一次通知请求的幂等键、投递状态、provider 请求和响应审计; +// 2. 不保存用户 webhook 配置,配置由 UserNotificationChannel 维护; +// 3. 不承担主动调度 preview 或正式日程状态,二者只通过 trigger_id/preview_id 关联排障。 +type NotificationRecord struct { + ID int64 `gorm:"column:id;primaryKey;autoIncrement"` + + Channel string `gorm:"column:channel;type:varchar(32);not null;uniqueIndex:uk_notification_dedupe,priority:1;comment:通知渠道"` + UserID int `gorm:"column:user_id;not null;index:idx_notification_user_created,priority:1"` + TriggerID string `gorm:"column:trigger_id;type:varchar(64);not null;index:idx_notification_trigger"` + PreviewID string `gorm:"column:preview_id;type:varchar(64);not null;index:idx_notification_preview"` + TriggerType string `gorm:"column:trigger_type;type:varchar(64);not null"` + TargetType string `gorm:"column:target_type;type:varchar(64);not null"` + TargetID int `gorm:"column:target_id;not null"` + DedupeKey string `gorm:"column:dedupe_key;type:varchar(191);not null;uniqueIndex:uk_notification_dedupe,priority:2"` + TargetURL string `gorm:"column:target_url;type:varchar(255);not null;comment:站内预览链接"` + SummaryText string `gorm:"column:summary_text;type:text"` + FallbackText string `gorm:"column:fallback_text;type:text"` + FallbackUsed bool `gorm:"column:fallback_used;not null;default:false"` + + Status string `gorm:"column:status;type:varchar(32);not null;default:'pending';index:idx_notification_status_retry,priority:1;comment:pending/sending/sent/failed/dead/skipped"` + AttemptCount int `gorm:"column:attempt_count;not null;default:0"` + MaxAttempts int `gorm:"column:max_attempts;not null;default:5"` + NextRetryAt *time.Time `gorm:"column:next_retry_at;index:idx_notification_status_retry,priority:2"` + LastErrorCode *string `gorm:"column:last_error_code;type:varchar(64)"` + LastError *string `gorm:"column:last_error;type:text"` + + ProviderMessageID *string `gorm:"column:provider_message_id;type:varchar(128)"` + ProviderRequestJSON *string `gorm:"column:provider_request_json;type:json"` + ProviderResponseJSON *string `gorm:"column:provider_response_json;type:json"` + SentAt *time.Time `gorm:"column:sent_at"` + TraceID string `gorm:"column:trace_id;type:varchar(128)"` + + CreatedAt time.Time `gorm:"column:created_at;autoCreateTime;index:idx_notification_user_created,priority:2"` + UpdatedAt time.Time `gorm:"column:updated_at;autoUpdateTime"` +} + +func (NotificationRecord) TableName() string { return "notification_records" } diff --git a/backend/services/notification/rpc/errors.go b/backend/services/notification/rpc/errors.go new file mode 100644 index 0000000..c341358 --- /dev/null +++ b/backend/services/notification/rpc/errors.go @@ -0,0 +1,76 @@ +package rpc + +import ( + "errors" + "log" + "strings" + + "github.com/LoveLosita/smartflow/backend/respond" + "google.golang.org/genproto/googleapis/rpc/errdetails" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +const notificationErrorDomain = "smartflow.notification" + +// grpcErrorFromServiceError 负责把 notification 内部错误收口成 gRPC status。 +// +// 职责边界: +// 1. 只负责把本服务内部的 respond.Response / 普通 error 转成 gRPC 可传输错误; +// 2. 不负责决定 HTTP 语义,也不负责写回前端响应体; +// 3. 上层 handler 只要直接 return 这个结果,就能让 client 侧按 `res, err :=` 的方式接收。 +func grpcErrorFromServiceError(err error) error { + if err == nil { + return nil + } + + var resp respond.Response + if errors.As(err, &resp) { + return grpcErrorFromResponse(resp) + } + log.Printf("notification rpc internal error: %v", err) + return status.Error(codes.Internal, "notification service internal error") +} + +// grpcErrorFromResponse 负责把项目内业务响应映射成 gRPC status。 +// +// 职责边界: +// 1. 只处理 notification 这组响应码到 gRPC code 的映射; +// 2. 业务码和业务文案通过 ErrorInfo 附带,方便 gateway 再反解回 respond.Response; +// 3. 失败时退化为普通 gRPC status,不阻断请求链路。 +func grpcErrorFromResponse(resp respond.Response) error { + code := grpcCodeFromRespondStatus(resp.Status) + message := strings.TrimSpace(resp.Info) + if message == "" { + message = strings.TrimSpace(resp.Status) + } + + st := status.New(code, message) + detail := &errdetails.ErrorInfo{ + Domain: notificationErrorDomain, + Reason: resp.Status, + Metadata: map[string]string{ + "info": resp.Info, + }, + } + withDetails, err := st.WithDetails(detail) + if err != nil { + return st.Err() + } + return withDetails.Err() +} + +func grpcCodeFromRespondStatus(statusValue string) codes.Code { + switch strings.TrimSpace(statusValue) { + case respond.MissingToken.Status, respond.InvalidToken.Status, respond.InvalidClaims.Status, + respond.ErrUnauthorized.Status, respond.WrongTokenType.Status, respond.UserLoggedOut.Status: + return codes.Unauthenticated + case respond.MissingParam.Status, respond.WrongParamType.Status, respond.ParamTooLong.Status: + return codes.InvalidArgument + } + + if strings.HasPrefix(strings.TrimSpace(statusValue), "5") { + return codes.Internal + } + return codes.InvalidArgument +} diff --git a/backend/services/notification/rpc/handler.go b/backend/services/notification/rpc/handler.go new file mode 100644 index 0000000..0498830 --- /dev/null +++ b/backend/services/notification/rpc/handler.go @@ -0,0 +1,133 @@ +package rpc + +import ( + "context" + "errors" + "time" + + "github.com/LoveLosita/smartflow/backend/respond" + "github.com/LoveLosita/smartflow/backend/services/notification/rpc/pb" + notificationsv "github.com/LoveLosita/smartflow/backend/services/notification/sv" + contracts "github.com/LoveLosita/smartflow/backend/shared/contracts/notification" +) + +type Handler struct { + pb.UnimplementedNotificationServer + svc *notificationsv.Service +} + +func NewHandler(svc *notificationsv.Service) *Handler { + return &Handler{svc: svc} +} + +// GetFeishuWebhook 负责把配置查询请求从 gRPC 协议转成内部服务调用。 +// +// 职责边界: +// 1. 只做 transport -> service 的参数搬运,不碰 DAO/provider/outbox 细节; +// 2. 业务错误统一转成 gRPC status,让 client 侧继续使用 `res, err :=`; +// 3. 成功时只回传业务数据,不在 payload 里塞 status/info。 +func (h *Handler) GetFeishuWebhook(ctx context.Context, req *pb.GetFeishuWebhookRequest) (*pb.ChannelResponse, error) { + if h == nil || h.svc == nil { + return nil, grpcErrorFromServiceError(errors.New("notification service dependency not initialized")) + } + if req == nil { + return nil, grpcErrorFromServiceError(respond.MissingParam) + } + + resp, err := h.svc.GetFeishuWebhook(ctx, int(req.UserId)) + if err != nil { + return nil, grpcErrorFromServiceError(err) + } + return channelToPB(resp), nil +} + +func (h *Handler) SaveFeishuWebhook(ctx context.Context, req *pb.SaveFeishuWebhookRequest) (*pb.ChannelResponse, error) { + if h == nil || h.svc == nil { + return nil, grpcErrorFromServiceError(errors.New("notification service dependency not initialized")) + } + if req == nil { + return nil, grpcErrorFromServiceError(respond.MissingParam) + } + + resp, err := h.svc.SaveFeishuWebhook(ctx, int(req.UserId), contracts.SaveFeishuWebhookRequest{ + UserID: int(req.UserId), + Enabled: req.Enabled, + WebhookURL: req.WebhookUrl, + AuthType: req.AuthType, + BearerToken: req.BearerToken, + }) + if err != nil { + return nil, grpcErrorFromServiceError(err) + } + return channelToPB(resp), nil +} + +func (h *Handler) DeleteFeishuWebhook(ctx context.Context, req *pb.DeleteFeishuWebhookRequest) (*pb.StatusResponse, error) { + if h == nil || h.svc == nil { + return nil, grpcErrorFromServiceError(errors.New("notification service dependency not initialized")) + } + if req == nil { + return nil, grpcErrorFromServiceError(respond.MissingParam) + } + + if err := h.svc.DeleteFeishuWebhook(ctx, int(req.UserId)); err != nil { + return nil, grpcErrorFromServiceError(err) + } + return &pb.StatusResponse{}, nil +} + +func (h *Handler) TestFeishuWebhook(ctx context.Context, req *pb.TestFeishuWebhookRequest) (*pb.TestResult, error) { + if h == nil || h.svc == nil { + return nil, grpcErrorFromServiceError(errors.New("notification service dependency not initialized")) + } + if req == nil { + return nil, grpcErrorFromServiceError(respond.MissingParam) + } + + resp, err := h.svc.TestFeishuWebhook(ctx, int(req.UserId)) + if err != nil { + return nil, grpcErrorFromServiceError(err) + } + return testResultToPB(resp), nil +} + +func channelToPB(resp contracts.ChannelResponse) *pb.ChannelResponse { + return &pb.ChannelResponse{ + Channel: resp.Channel, + Enabled: resp.Enabled, + Configured: resp.Configured, + WebhookUrlMask: resp.WebhookURLMask, + AuthType: resp.AuthType, + HasBearerToken: resp.HasBearerToken, + LastTestStatus: resp.LastTestStatus, + LastTestError: resp.LastTestError, + LastTestAtUnixNano: timePtrToUnixNano(resp.LastTestAt), + } +} + +func testResultToPB(resp contracts.TestResult) *pb.TestResult { + return &pb.TestResult{ + Channel: channelToPB(resp.Channel), + Status: resp.Status, + Outcome: resp.Outcome, + Message: resp.Message, + TraceId: resp.TraceID, + SentAtUnixNano: timeToUnixNano(resp.SentAt), + Skipped: resp.Skipped, + Provider: resp.Provider, + } +} + +func timePtrToUnixNano(value *time.Time) int64 { + if value == nil || value.IsZero() { + return 0 + } + return value.UnixNano() +} + +func timeToUnixNano(value time.Time) int64 { + if value.IsZero() { + return 0 + } + return value.UnixNano() +} diff --git a/backend/services/notification/rpc/notification.proto b/backend/services/notification/rpc/notification.proto new file mode 100644 index 0000000..c39375b --- /dev/null +++ b/backend/services/notification/rpc/notification.proto @@ -0,0 +1,58 @@ +syntax = "proto3"; + +package smartflow.notification; + +option go_package = "github.com/LoveLosita/smartflow/backend/services/notification/rpc/pb"; + +service Notification { + rpc GetFeishuWebhook(GetFeishuWebhookRequest) returns (ChannelResponse); + rpc SaveFeishuWebhook(SaveFeishuWebhookRequest) returns (ChannelResponse); + rpc DeleteFeishuWebhook(DeleteFeishuWebhookRequest) returns (StatusResponse); + rpc TestFeishuWebhook(TestFeishuWebhookRequest) returns (TestResult); +} + +message GetFeishuWebhookRequest { + int64 user_id = 1; +} + +message SaveFeishuWebhookRequest { + int64 user_id = 1; + bool enabled = 2; + string webhook_url = 3; + string auth_type = 4; + string bearer_token = 5; +} + +message DeleteFeishuWebhookRequest { + int64 user_id = 1; +} + +message TestFeishuWebhookRequest { + int64 user_id = 1; +} + +message StatusResponse { +} + +message ChannelResponse { + string channel = 1; + bool enabled = 2; + bool configured = 3; + string webhook_url_mask = 4; + string auth_type = 5; + bool has_bearer_token = 6; + string last_test_status = 7; + string last_test_error = 8; + int64 last_test_at_unix_nano = 9; +} + +message TestResult { + ChannelResponse channel = 1; + string status = 2; + string outcome = 3; + string message = 4; + string trace_id = 5; + int64 sent_at_unix_nano = 6; + bool skipped = 7; + string provider = 8; +} diff --git a/backend/services/notification/rpc/pb/notification.pb.go b/backend/services/notification/rpc/pb/notification.pb.go new file mode 100644 index 0000000..ace4c88 --- /dev/null +++ b/backend/services/notification/rpc/pb/notification.pb.go @@ -0,0 +1,102 @@ +package pb + +import proto "github.com/golang/protobuf/proto" + +var _ = proto.Marshal + +const _ = proto.ProtoPackageIsVersion3 + +type GetFeishuWebhookRequest struct { + UserId int64 `protobuf:"varint,1,opt,name=user_id,json=userId,proto3" json:"user_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetFeishuWebhookRequest) Reset() { *m = GetFeishuWebhookRequest{} } +func (m *GetFeishuWebhookRequest) String() string { return proto.CompactTextString(m) } +func (*GetFeishuWebhookRequest) ProtoMessage() {} + +type SaveFeishuWebhookRequest struct { + UserId int64 `protobuf:"varint,1,opt,name=user_id,json=userId,proto3" json:"user_id,omitempty"` + Enabled bool `protobuf:"varint,2,opt,name=enabled,proto3" json:"enabled,omitempty"` + WebhookUrl string `protobuf:"bytes,3,opt,name=webhook_url,json=webhookUrl,proto3" json:"webhook_url,omitempty"` + AuthType string `protobuf:"bytes,4,opt,name=auth_type,json=authType,proto3" json:"auth_type,omitempty"` + BearerToken string `protobuf:"bytes,5,opt,name=bearer_token,json=bearerToken,proto3" json:"bearer_token,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SaveFeishuWebhookRequest) Reset() { *m = SaveFeishuWebhookRequest{} } +func (m *SaveFeishuWebhookRequest) String() string { return proto.CompactTextString(m) } +func (*SaveFeishuWebhookRequest) ProtoMessage() {} + +type DeleteFeishuWebhookRequest struct { + UserId int64 `protobuf:"varint,1,opt,name=user_id,json=userId,proto3" json:"user_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *DeleteFeishuWebhookRequest) Reset() { *m = DeleteFeishuWebhookRequest{} } +func (m *DeleteFeishuWebhookRequest) String() string { return proto.CompactTextString(m) } +func (*DeleteFeishuWebhookRequest) ProtoMessage() {} + +type TestFeishuWebhookRequest struct { + UserId int64 `protobuf:"varint,1,opt,name=user_id,json=userId,proto3" json:"user_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *TestFeishuWebhookRequest) Reset() { *m = TestFeishuWebhookRequest{} } +func (m *TestFeishuWebhookRequest) String() string { return proto.CompactTextString(m) } +func (*TestFeishuWebhookRequest) ProtoMessage() {} + +type StatusResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StatusResponse) Reset() { *m = StatusResponse{} } +func (m *StatusResponse) String() string { return proto.CompactTextString(m) } +func (*StatusResponse) ProtoMessage() {} + +type ChannelResponse struct { + Channel string `protobuf:"bytes,1,opt,name=channel,proto3" json:"channel,omitempty"` + Enabled bool `protobuf:"varint,2,opt,name=enabled,proto3" json:"enabled,omitempty"` + Configured bool `protobuf:"varint,3,opt,name=configured,proto3" json:"configured,omitempty"` + WebhookUrlMask string `protobuf:"bytes,4,opt,name=webhook_url_mask,json=webhookUrlMask,proto3" json:"webhook_url_mask,omitempty"` + AuthType string `protobuf:"bytes,5,opt,name=auth_type,json=authType,proto3" json:"auth_type,omitempty"` + HasBearerToken bool `protobuf:"varint,6,opt,name=has_bearer_token,json=hasBearerToken,proto3" json:"has_bearer_token,omitempty"` + LastTestStatus string `protobuf:"bytes,7,opt,name=last_test_status,json=lastTestStatus,proto3" json:"last_test_status,omitempty"` + LastTestError string `protobuf:"bytes,8,opt,name=last_test_error,json=lastTestError,proto3" json:"last_test_error,omitempty"` + LastTestAtUnixNano int64 `protobuf:"varint,9,opt,name=last_test_at_unix_nano,json=lastTestAtUnixNano,proto3" json:"last_test_at_unix_nano,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ChannelResponse) Reset() { *m = ChannelResponse{} } +func (m *ChannelResponse) String() string { return proto.CompactTextString(m) } +func (*ChannelResponse) ProtoMessage() {} + +type TestResult struct { + Channel *ChannelResponse `protobuf:"bytes,1,opt,name=channel,proto3" json:"channel,omitempty"` + Status string `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` + Outcome string `protobuf:"bytes,3,opt,name=outcome,proto3" json:"outcome,omitempty"` + Message string `protobuf:"bytes,4,opt,name=message,proto3" json:"message,omitempty"` + TraceId string `protobuf:"bytes,5,opt,name=trace_id,json=traceId,proto3" json:"trace_id,omitempty"` + SentAtUnixNano int64 `protobuf:"varint,6,opt,name=sent_at_unix_nano,json=sentAtUnixNano,proto3" json:"sent_at_unix_nano,omitempty"` + Skipped bool `protobuf:"varint,7,opt,name=skipped,proto3" json:"skipped,omitempty"` + Provider string `protobuf:"bytes,8,opt,name=provider,proto3" json:"provider,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *TestResult) Reset() { *m = TestResult{} } +func (m *TestResult) String() string { return proto.CompactTextString(m) } +func (*TestResult) ProtoMessage() {} diff --git a/backend/services/notification/rpc/pb/notification_grpc.pb.go b/backend/services/notification/rpc/pb/notification_grpc.pb.go new file mode 100644 index 0000000..4a45a41 --- /dev/null +++ b/backend/services/notification/rpc/pb/notification_grpc.pb.go @@ -0,0 +1,193 @@ +package pb + +import ( + context "context" + + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +const ( + Notification_GetFeishuWebhook_FullMethodName = "/smartflow.notification.Notification/GetFeishuWebhook" + Notification_SaveFeishuWebhook_FullMethodName = "/smartflow.notification.Notification/SaveFeishuWebhook" + Notification_DeleteFeishuWebhook_FullMethodName = "/smartflow.notification.Notification/DeleteFeishuWebhook" + Notification_TestFeishuWebhook_FullMethodName = "/smartflow.notification.Notification/TestFeishuWebhook" +) + +type NotificationClient interface { + GetFeishuWebhook(ctx context.Context, in *GetFeishuWebhookRequest, opts ...grpc.CallOption) (*ChannelResponse, error) + SaveFeishuWebhook(ctx context.Context, in *SaveFeishuWebhookRequest, opts ...grpc.CallOption) (*ChannelResponse, error) + DeleteFeishuWebhook(ctx context.Context, in *DeleteFeishuWebhookRequest, opts ...grpc.CallOption) (*StatusResponse, error) + TestFeishuWebhook(ctx context.Context, in *TestFeishuWebhookRequest, opts ...grpc.CallOption) (*TestResult, error) +} + +type notificationClient struct { + cc grpc.ClientConnInterface +} + +func NewNotificationClient(cc grpc.ClientConnInterface) NotificationClient { + return ¬ificationClient{cc} +} + +func (c *notificationClient) GetFeishuWebhook(ctx context.Context, in *GetFeishuWebhookRequest, opts ...grpc.CallOption) (*ChannelResponse, error) { + out := new(ChannelResponse) + err := c.cc.Invoke(ctx, Notification_GetFeishuWebhook_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *notificationClient) SaveFeishuWebhook(ctx context.Context, in *SaveFeishuWebhookRequest, opts ...grpc.CallOption) (*ChannelResponse, error) { + out := new(ChannelResponse) + err := c.cc.Invoke(ctx, Notification_SaveFeishuWebhook_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *notificationClient) DeleteFeishuWebhook(ctx context.Context, in *DeleteFeishuWebhookRequest, opts ...grpc.CallOption) (*StatusResponse, error) { + out := new(StatusResponse) + err := c.cc.Invoke(ctx, Notification_DeleteFeishuWebhook_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *notificationClient) TestFeishuWebhook(ctx context.Context, in *TestFeishuWebhookRequest, opts ...grpc.CallOption) (*TestResult, error) { + out := new(TestResult) + err := c.cc.Invoke(ctx, Notification_TestFeishuWebhook_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +type NotificationServer interface { + GetFeishuWebhook(context.Context, *GetFeishuWebhookRequest) (*ChannelResponse, error) + SaveFeishuWebhook(context.Context, *SaveFeishuWebhookRequest) (*ChannelResponse, error) + DeleteFeishuWebhook(context.Context, *DeleteFeishuWebhookRequest) (*StatusResponse, error) + TestFeishuWebhook(context.Context, *TestFeishuWebhookRequest) (*TestResult, error) +} + +type UnimplementedNotificationServer struct{} + +func (UnimplementedNotificationServer) GetFeishuWebhook(context.Context, *GetFeishuWebhookRequest) (*ChannelResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetFeishuWebhook not implemented") +} + +func (UnimplementedNotificationServer) SaveFeishuWebhook(context.Context, *SaveFeishuWebhookRequest) (*ChannelResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method SaveFeishuWebhook not implemented") +} + +func (UnimplementedNotificationServer) DeleteFeishuWebhook(context.Context, *DeleteFeishuWebhookRequest) (*StatusResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method DeleteFeishuWebhook not implemented") +} + +func (UnimplementedNotificationServer) TestFeishuWebhook(context.Context, *TestFeishuWebhookRequest) (*TestResult, error) { + return nil, status.Errorf(codes.Unimplemented, "method TestFeishuWebhook not implemented") +} + +func RegisterNotificationServer(s grpc.ServiceRegistrar, srv NotificationServer) { + s.RegisterService(&Notification_ServiceDesc, srv) +} + +func _Notification_GetFeishuWebhook_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetFeishuWebhookRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(NotificationServer).GetFeishuWebhook(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Notification_GetFeishuWebhook_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(NotificationServer).GetFeishuWebhook(ctx, req.(*GetFeishuWebhookRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Notification_SaveFeishuWebhook_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(SaveFeishuWebhookRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(NotificationServer).SaveFeishuWebhook(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Notification_SaveFeishuWebhook_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(NotificationServer).SaveFeishuWebhook(ctx, req.(*SaveFeishuWebhookRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Notification_DeleteFeishuWebhook_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DeleteFeishuWebhookRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(NotificationServer).DeleteFeishuWebhook(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Notification_DeleteFeishuWebhook_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(NotificationServer).DeleteFeishuWebhook(ctx, req.(*DeleteFeishuWebhookRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Notification_TestFeishuWebhook_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(TestFeishuWebhookRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(NotificationServer).TestFeishuWebhook(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Notification_TestFeishuWebhook_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(NotificationServer).TestFeishuWebhook(ctx, req.(*TestFeishuWebhookRequest)) + } + return interceptor(ctx, in, info, handler) +} + +var Notification_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "smartflow.notification.Notification", + HandlerType: (*NotificationServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "GetFeishuWebhook", + Handler: _Notification_GetFeishuWebhook_Handler, + }, + { + MethodName: "SaveFeishuWebhook", + Handler: _Notification_SaveFeishuWebhook_Handler, + }, + { + MethodName: "DeleteFeishuWebhook", + Handler: _Notification_DeleteFeishuWebhook_Handler, + }, + { + MethodName: "TestFeishuWebhook", + Handler: _Notification_TestFeishuWebhook_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "services/notification/rpc/notification.proto", +} diff --git a/backend/services/notification/rpc/server.go b/backend/services/notification/rpc/server.go new file mode 100644 index 0000000..f272d52 --- /dev/null +++ b/backend/services/notification/rpc/server.go @@ -0,0 +1,54 @@ +package rpc + +import ( + "errors" + "strings" + "time" + + "github.com/LoveLosita/smartflow/backend/services/notification/rpc/pb" + notificationsv "github.com/LoveLosita/smartflow/backend/services/notification/sv" + "github.com/zeromicro/go-zero/core/service" + "github.com/zeromicro/go-zero/zrpc" + "google.golang.org/grpc" +) + +const ( + defaultListenOn = "0.0.0.0:9082" + defaultTimeout = 6 * time.Second +) + +type ServerOptions struct { + ListenOn string + Timeout time.Duration + Service *notificationsv.Service +} + +func NewServer(opts ServerOptions) (*zrpc.RpcServer, string, error) { + if opts.Service == nil { + return nil, "", errors.New("notification service dependency not initialized") + } + + listenOn := strings.TrimSpace(opts.ListenOn) + if listenOn == "" { + listenOn = defaultListenOn + } + timeout := opts.Timeout + if timeout <= 0 { + timeout = defaultTimeout + } + + server, err := zrpc.NewServer(zrpc.RpcServerConf{ + ServiceConf: service.ServiceConf{ + Name: "notification.rpc", + Mode: service.DevMode, + }, + ListenOn: listenOn, + Timeout: int64(timeout / time.Millisecond), + }, func(grpcServer *grpc.Server) { + pb.RegisterNotificationServer(grpcServer, NewHandler(opts.Service)) + }) + if err != nil { + return nil, "", err + } + return server, listenOn, nil +} diff --git a/backend/services/notification/sv/channel.go b/backend/services/notification/sv/channel.go new file mode 100644 index 0000000..85d3e03 --- /dev/null +++ b/backend/services/notification/sv/channel.go @@ -0,0 +1,158 @@ +package sv + +import ( + "context" + "errors" + "strings" + + "github.com/LoveLosita/smartflow/backend/respond" + notificationfeishu "github.com/LoveLosita/smartflow/backend/services/notification/internal/feishu" + notificationmodel "github.com/LoveLosita/smartflow/backend/services/notification/model" + contracts "github.com/LoveLosita/smartflow/backend/shared/contracts/notification" + "gorm.io/gorm" +) + +const ( + channelTestStatusSuccess = "success" + channelTestStatusFailed = "failed" +) + +// GetFeishuWebhook 查询当前用户的飞书 Webhook 触发器配置。 +func (s *Service) GetFeishuWebhook(ctx context.Context, userID int) (contracts.ChannelResponse, error) { + if userID <= 0 { + return contracts.ChannelResponse{}, respond.ErrUnauthorized + } + row, err := s.channelStore.GetUserNotificationChannel(ctx, userID, notificationmodel.ChannelFeishuWebhook) + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return contracts.ChannelResponse{ + Channel: notificationmodel.ChannelFeishuWebhook, + AuthType: notificationmodel.AuthTypeNone, + Configured: false, + }, nil + } + return contracts.ChannelResponse{}, err + } + return responseFromChannel(row), nil +} + +// SaveFeishuWebhook 幂等保存当前用户的飞书 Webhook 触发器配置。 +func (s *Service) SaveFeishuWebhook(ctx context.Context, userID int, req contracts.SaveFeishuWebhookRequest) (contracts.ChannelResponse, error) { + if userID <= 0 { + return contracts.ChannelResponse{}, respond.ErrUnauthorized + } + webhookURL := strings.TrimSpace(req.WebhookURL) + if webhookURL == "" { + return contracts.ChannelResponse{}, respond.MissingParam + } + if err := notificationfeishu.ValidateWebhookURL(webhookURL); err != nil { + return contracts.ChannelResponse{}, respond.WrongParamType + } + authType := normalizeAuthType(req.AuthType) + bearerToken := strings.TrimSpace(req.BearerToken) + if authType == notificationmodel.AuthTypeBearer && bearerToken == "" { + return contracts.ChannelResponse{}, respond.MissingParam + } + row := ¬ificationmodel.UserNotificationChannel{ + UserID: userID, + Channel: notificationmodel.ChannelFeishuWebhook, + Enabled: req.Enabled, + WebhookURL: webhookURL, + AuthType: authType, + BearerToken: bearerToken, + } + if err := s.channelStore.UpsertUserNotificationChannel(ctx, row); err != nil { + return contracts.ChannelResponse{}, err + } + return s.GetFeishuWebhook(ctx, userID) +} + +// DeleteFeishuWebhook 删除当前用户的飞书 Webhook 触发器配置。 +func (s *Service) DeleteFeishuWebhook(ctx context.Context, userID int) error { + if userID <= 0 { + return respond.ErrUnauthorized + } + return s.channelStore.DeleteUserNotificationChannel(ctx, userID, notificationmodel.ChannelFeishuWebhook) +} + +// TestFeishuWebhook 发送一条最小业务 JSON 到当前用户配置的飞书 Webhook。 +func (s *Service) TestFeishuWebhook(ctx context.Context, userID int) (contracts.TestResult, error) { + if userID <= 0 { + return contracts.TestResult{}, respond.ErrUnauthorized + } + now := s.options.Now() + traceID := "trace_feishu_webhook_test" + sendResult, sendErr := s.provider.Send(ctx, notificationfeishu.SendRequest{ + NotificationID: 0, + UserID: userID, + TriggerID: "ast_test_webhook", + PreviewID: "asp_test_webhook", + TriggerType: "manual_test", + TargetType: "notification_channel", + TargetID: 0, + TargetURL: "/assistant/00000000-0000-0000-0000-000000000000", + MessageText: "这是一条 SmartFlow 飞书 Webhook 测试消息。", + TraceID: traceID, + AttemptCount: 1, + }) + if sendErr != nil { + return contracts.TestResult{}, sendErr + } + + status := channelTestStatusFailed + testErr := strings.TrimSpace(sendResult.ErrorMessage) + if sendResult.Outcome == notificationfeishu.SendOutcomeSuccess { + status = channelTestStatusSuccess + testErr = "" + } + if sendResult.Outcome == notificationfeishu.SendOutcomeSkipped && testErr == "" { + testErr = "飞书 webhook 未配置或未启用" + } + if err := s.channelStore.UpdateUserNotificationChannelTestResult(ctx, userID, notificationmodel.ChannelFeishuWebhook, status, testErr, now); err != nil { + return contracts.TestResult{}, err + } + channel, err := s.GetFeishuWebhook(ctx, userID) + if err != nil { + return contracts.TestResult{}, err + } + return contracts.TestResult{ + Channel: channel, + Status: status, + Outcome: string(sendResult.Outcome), + Message: testErr, + TraceID: traceID, + SentAt: now, + Skipped: sendResult.Outcome == notificationfeishu.SendOutcomeSkipped, + Provider: notificationfeishu.Channel, + }, nil +} + +func responseFromChannel(row *notificationmodel.UserNotificationChannel) contracts.ChannelResponse { + if row == nil { + return contracts.ChannelResponse{ + Channel: notificationmodel.ChannelFeishuWebhook, + AuthType: notificationmodel.AuthTypeNone, + Configured: false, + } + } + return contracts.ChannelResponse{ + Channel: row.Channel, + Enabled: row.Enabled, + Configured: strings.TrimSpace(row.WebhookURL) != "", + WebhookURLMask: notificationfeishu.MaskWebhookURL(row.WebhookURL), + AuthType: normalizeAuthType(row.AuthType), + HasBearerToken: strings.TrimSpace(row.BearerToken) != "", + LastTestStatus: row.LastTestStatus, + LastTestError: row.LastTestError, + LastTestAt: row.LastTestAt, + } +} + +func normalizeAuthType(authType string) string { + switch strings.ToLower(strings.TrimSpace(authType)) { + case notificationmodel.AuthTypeBearer: + return notificationmodel.AuthTypeBearer + default: + return notificationmodel.AuthTypeNone + } +} diff --git a/backend/services/notification/sv/factory.go b/backend/services/notification/sv/factory.go new file mode 100644 index 0000000..a789d16 --- /dev/null +++ b/backend/services/notification/sv/factory.go @@ -0,0 +1,29 @@ +package sv + +import notificationfeishu "github.com/LoveLosita/smartflow/backend/services/notification/internal/feishu" + +// FeishuWebhookProviderOptions 定义生产默认飞书 Webhook provider 的启动参数。 +// +// 职责边界: +// 1. 只承载 provider 初始化需要的外部配置,不暴露 internal/feishu 的具体实现; +// 2. 不负责 notification 状态机参数,重试次数和扫描批量仍由 ServiceOptions 管理; +// 3. 后续若新增 OpenID 等 provider,应新增对应构造器,避免把多 provider 分支堆进 cmd 入口。 +type FeishuWebhookProviderOptions struct { + FrontendBaseURL string +} + +// NewNotificationServiceWithFeishuWebhook 创建生产默认的飞书 Webhook notification 服务。 +// +// 职责边界: +// 1. 在 sv 层完成 internal/feishu provider 装配,cmd 入口不直接依赖 internal 包; +// 2. 只组合 notification 领域内部依赖,不连接数据库、不读取配置; +// 3. provider 构造失败时直接返回 error,避免启动出半初始化服务。 +func NewNotificationServiceWithFeishuWebhook(recordStore RecordStore, channelStore ChannelStore, providerOpts FeishuWebhookProviderOptions, serviceOpts ServiceOptions) (*Service, error) { + provider, err := notificationfeishu.NewWebhookProvider(channelStore, notificationfeishu.WebhookProviderOptions{ + FrontendBaseURL: providerOpts.FrontendBaseURL, + }) + if err != nil { + return nil, err + } + return NewNotificationService(recordStore, channelStore, provider, serviceOpts) +} diff --git a/backend/service/events/notification_feishu.go b/backend/services/notification/sv/outbox.go similarity index 54% rename from backend/service/events/notification_feishu.go rename to backend/services/notification/sv/outbox.go index 41ea53c..85d2a3f 100644 --- a/backend/service/events/notification_feishu.go +++ b/backend/services/notification/sv/outbox.go @@ -1,4 +1,4 @@ -package events +package sv import ( "context" @@ -9,21 +9,23 @@ import ( kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" - "github.com/LoveLosita/smartflow/backend/notification" sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" ) -// RegisterFeishuNotificationHandler 注册 `notification.feishu.requested` 消费 handler。 +// OutboxBus 是 notification 服务注册消费 handler 需要的最小总线接口。 +// +// 职责边界:只要求具备 handler 注册能力,启动、关闭和发布由进程入口自己编排。 +type OutboxBus interface { + RegisterEventHandler(eventType string, handler outboxinfra.MessageHandler) error +} + +// RegisterFeishuRequestedHandler 注册 `notification.feishu.requested` 消费 handler。 // // 职责边界: // 1. 只负责事件解析、协议校验、调用 NotificationService 和推进 outbox consumed; -// 2. 不承担 notification_records 状态机细节,状态流转全部下沉到 notification 模块; +// 2. 不承担 notification_records 状态机细节,状态流转全部下沉到 notification 服务; // 3. 不在 handler 内部创建 provider/service,避免事件消费与 retry loop 使用两套不同配置。 -func RegisterFeishuNotificationHandler( - bus OutboxBus, - outboxRepo *outboxinfra.Repository, - svc *notification.NotificationService, -) error { +func RegisterFeishuRequestedHandler(bus OutboxBus, outboxRepo *outboxinfra.Repository, svc *Service) error { if bus == nil { return errors.New("event bus is nil") } @@ -33,27 +35,37 @@ func RegisterFeishuNotificationHandler( if svc == nil { return errors.New("notification service is nil") } - eventOutboxRepo, err := scopedOutboxRepoForEvent(outboxRepo, sharedevents.NotificationFeishuRequestedEventType) - if err != nil { + if err := outboxinfra.RegisterEventService(sharedevents.NotificationFeishuRequestedEventType, outboxinfra.ServiceNotification); err != nil { return err } + route, ok := outboxinfra.ResolveEventRoute(sharedevents.NotificationFeishuRequestedEventType) + if !ok { + return errors.New("notification.feishu.requested route is missing") + } + eventOutboxRepo := outboxRepo.WithRoute(route) handler := func(ctx context.Context, envelope kafkabus.Envelope) error { // 1. 先校验 event_version,避免未来协议破坏性升级后旧 handler 误吃新消息。 // 2. 当前阶段只接受 v1;版本不匹配属于不可恢复协议错误,直接标记 dead。 eventVersion := strings.TrimSpace(envelope.EventVersion) if eventVersion != "" && eventVersion != sharedevents.NotificationFeishuRequestedEventVersion { - _ = eventOutboxRepo.MarkDead(ctx, envelope.OutboxID, "notification.feishu.requested event_version 不匹配: "+eventVersion) + if err := eventOutboxRepo.MarkDead(ctx, envelope.OutboxID, "notification.feishu.requested event_version 不匹配: "+eventVersion); err != nil { + return err + } return nil } var payload sharedevents.FeishuNotificationRequestedPayload if unmarshalErr := json.Unmarshal(envelope.Payload, &payload); unmarshalErr != nil { - _ = eventOutboxRepo.MarkDead(ctx, envelope.OutboxID, "解析 notification.feishu.requested 载荷失败: "+unmarshalErr.Error()) + if err := eventOutboxRepo.MarkDead(ctx, envelope.OutboxID, "解析 notification.feishu.requested 载荷失败: "+unmarshalErr.Error()); err != nil { + return err + } return nil } if validateErr := payload.Validate(); validateErr != nil { - _ = eventOutboxRepo.MarkDead(ctx, envelope.OutboxID, "notification.feishu.requested 载荷非法: "+validateErr.Error()) + if err := eventOutboxRepo.MarkDead(ctx, envelope.OutboxID, "notification.feishu.requested 载荷非法: "+validateErr.Error()); err != nil { + return err + } return nil } @@ -80,30 +92,3 @@ func RegisterFeishuNotificationHandler( return bus.RegisterEventHandler(sharedevents.NotificationFeishuRequestedEventType, handler) } - -// PublishFeishuNotificationRequested 发布 `notification.feishu.requested` 事件。 -// -// 职责边界: -// 1. 只负责把 shared/events payload 投递到 outbox; -// 2. 不等待 provider 结果,也不提前创建 notification_records; -// 3. 供主动调度 preview 阶段后续切入通知时直接复用。 -func PublishFeishuNotificationRequested( - ctx context.Context, - publisher outboxinfra.EventPublisher, - payload sharedevents.FeishuNotificationRequestedPayload, -) error { - if publisher == nil { - return errors.New("event publisher is nil") - } - if err := payload.Validate(); err != nil { - return err - } - - return publisher.Publish(ctx, outboxinfra.PublishRequest{ - EventType: sharedevents.NotificationFeishuRequestedEventType, - EventVersion: sharedevents.NotificationFeishuRequestedEventVersion, - MessageKey: payload.MessageKey(), - AggregateID: payload.AggregateID(), - Payload: payload, - }) -} diff --git a/backend/notification/service.go b/backend/services/notification/sv/service.go similarity index 63% rename from backend/notification/service.go rename to backend/services/notification/sv/service.go index dbea7eb..f9a053f 100644 --- a/backend/notification/service.go +++ b/backend/services/notification/sv/service.go @@ -1,4 +1,4 @@ -package notification +package sv import ( "context" @@ -8,9 +8,9 @@ import ( "sync" "time" + notificationfeishu "github.com/LoveLosita/smartflow/backend/services/notification/internal/feishu" + notificationmodel "github.com/LoveLosita/smartflow/backend/services/notification/model" sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" - - "github.com/LoveLosita/smartflow/backend/model" "gorm.io/gorm" ) @@ -18,31 +18,60 @@ const ( defaultMaxAttempts = 5 defaultRetryBaseDelay = 5 * time.Minute defaultRetryMaxDelay = 30 * time.Minute + defaultSendingLease = 10 * time.Minute defaultSummaryMaxRunes = 180 defaultRetryScanBatch = 100 + sendingLeaseExpiredCode = "sending_lease_expired" defaultFallbackTemplate = "我为你生成了一份日程调整建议,请回到系统确认是否应用。" ) -// NotificationRecordStore 抽象出 notification 模块真正依赖的持久化能力。 +// RecordStore 抽象出 notification_records 真正依赖的持久化能力。 // // 职责边界: // 1. 只描述 notification_records 读写所需的最小接口; -// 2. 允许生产环境直接复用 ActiveScheduleDAO,也允许测试时替换成内存 fake; +// 2. 允许生产环境直接复用 notification DAO,也允许测试时替换成内存 fake; // 3. 不把 provider、事件总线和业务状态机耦合进存储接口。 -type NotificationRecordStore interface { - CreateNotificationRecord(ctx context.Context, record *model.NotificationRecord) error +type RecordStore interface { + CreateNotificationRecord(ctx context.Context, record *notificationmodel.NotificationRecord) error UpdateNotificationRecordFields(ctx context.Context, notificationID int64, updates map[string]any) error - GetNotificationRecordByID(ctx context.Context, notificationID int64) (*model.NotificationRecord, error) - FindNotificationRecordByDedupeKey(ctx context.Context, channel string, dedupeKey string) (*model.NotificationRecord, error) - ListRetryableNotificationRecords(ctx context.Context, now time.Time, limit int) ([]model.NotificationRecord, error) + GetNotificationRecordByID(ctx context.Context, notificationID int64) (*notificationmodel.NotificationRecord, error) + FindNotificationRecordByDedupeKey(ctx context.Context, channel string, dedupeKey string) (*notificationmodel.NotificationRecord, error) + ListRetryableNotificationRecords(ctx context.Context, now time.Time, sendingStaleBefore time.Time, limit int) ([]notificationmodel.NotificationRecord, error) + ClaimRetryableNotificationRecord(ctx context.Context, notificationID int64, now time.Time, sendingStaleBefore time.Time) (bool, error) } +// ChannelStore 抽象出用户通知通道配置所需的最小持久化能力。 +type ChannelStore interface { + GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*notificationmodel.UserNotificationChannel, error) + UpsertUserNotificationChannel(ctx context.Context, channel *notificationmodel.UserNotificationChannel) error + DeleteUserNotificationChannel(ctx context.Context, userID int, channel string) error + UpdateUserNotificationChannelTestResult(ctx context.Context, userID int, channel string, status string, testErr string, testedAt time.Time) error +} + +// Service 负责 notification_records 状态机、通道配置和 provider 调用编排。 +// +// 职责边界: +// 1. 负责飞书 webhook 通道配置、测试、消息投递、重试和 outbox 消费; +// 2. 不负责 active_schedule 的 dry-run / preview / trigger 状态机; +// 3. 不负责 gateway 的响应适配、路由聚合和 JWT 鉴权。 +type Service struct { + recordStore RecordStore + channelStore ChannelStore + provider notificationfeishu.Provider + options ServiceOptions + locks *keyedLocker +} + +// NotificationService 是阶段四对外暴露的语义化别名。 +type NotificationService = Service + // ServiceOptions 定义通知服务的可调参数。 type ServiceOptions struct { Now func() time.Time MaxAttempts int RetryBaseDelay time.Duration RetryMaxDelay time.Duration + SendingLease time.Duration SummaryMaxRunes int RetryScanBatch int } @@ -70,41 +99,24 @@ type RetryResult struct { Errors int } -// Service 负责 notification_records 状态机与 provider 调用编排。 -// -// 职责边界: -// 1. 消费 `notification.feishu.requested` payload,做去重、落库、状态流转与 provider 调用; -// 2. 只写 notification_records,不写 preview / trigger / 正式 schedule; -// 3. provider 可重试失败由本服务自己管理,outbox 只保证“通知请求被接收一次”。 -type Service struct { - store NotificationRecordStore - provider FeishuProvider - options ServiceOptions - locks *keyedLocker -} - -// NotificationService 是阶段四对外暴露的语义化别名。 -// -// 说明: -// 1. 当前包里已有 runner 等代码引用 `Service`; -// 2. 任务描述里又直接使用 “NotificationService” 这个业务名词; -// 3. 这里保留别名,既不打断已有代码,也让后续调用方可以按业务语义引用。 -type NotificationService = Service - // NewNotificationService 创建通知服务。 -func NewNotificationService(store NotificationRecordStore, provider FeishuProvider, opts ServiceOptions) (*Service, error) { - if store == nil { +func NewNotificationService(recordStore RecordStore, channelStore ChannelStore, provider notificationfeishu.Provider, opts ServiceOptions) (*Service, error) { + if recordStore == nil { return nil, errors.New("notification record store is nil") } + if channelStore == nil { + return nil, errors.New("notification channel store is nil") + } if provider == nil { return nil, errors.New("feishu provider is nil") } opts = normalizeServiceOptions(opts) return &Service{ - store: store, - provider: provider, - options: opts, - locks: newKeyedLocker(), + recordStore: recordStore, + channelStore: channelStore, + provider: provider, + options: opts, + locks: newKeyedLocker(), }, nil } @@ -119,7 +131,7 @@ func (s *Service) HandleFeishuRequested(ctx context.Context, payload sharedevent return HandleResult{}, err } - lockKey := buildNotificationLockKey(ChannelFeishu, payload.DedupeKey) + lockKey := buildNotificationLockKey(notificationfeishu.Channel, payload.DedupeKey) unlock := s.locks.Lock(lockKey) defer unlock() @@ -150,7 +162,7 @@ func (s *Service) RetryFeishuNotifications(ctx context.Context, now time.Time, l limit = s.options.RetryScanBatch } - records, err := s.store.ListRetryableNotificationRecords(ctx, now, limit) + records, err := s.recordStore.ListRetryableNotificationRecords(ctx, now, s.sendingStaleBefore(now), limit) if err != nil { return RetryResult{}, err } @@ -159,7 +171,7 @@ func (s *Service) RetryFeishuNotifications(ctx context.Context, now time.Time, l var firstErr error for _, record := range records { - if record.Channel != ChannelFeishu { + if record.Channel != notificationfeishu.Channel { result.Skipped++ continue } @@ -177,15 +189,15 @@ func (s *Service) RetryFeishuNotifications(ctx context.Context, now time.Time, l result.Retried++ } switch handleResult.Status { - case model.NotificationRecordStatusSent: + case notificationmodel.RecordStatusSent: if handleResult.Delivered { result.Sent++ } else { result.Skipped++ } - case model.NotificationRecordStatusFailed: + case notificationmodel.RecordStatusFailed: result.Failed++ - case model.NotificationRecordStatusDead: + case notificationmodel.RecordStatusDead: result.Dead++ default: result.Skipped++ @@ -204,7 +216,7 @@ func (s *Service) RetryDue(ctx context.Context, now time.Time, limit int) (int, } func (s *Service) retryOneRecord(ctx context.Context, notificationID int64) (HandleResult, error) { - record, err := s.store.GetNotificationRecordByID(ctx, notificationID) + record, err := s.recordStore.GetNotificationRecordByID(ctx, notificationID) if err != nil { return HandleResult{}, err } @@ -213,19 +225,37 @@ func (s *Service) retryOneRecord(ctx context.Context, notificationID int64) (Han unlock := s.locks.Lock(lockKey) defer unlock() - current, err := s.store.GetNotificationRecordByID(ctx, notificationID) + // 1. retry scanner 可能在滚动发布或多实例场景下并行运行,进程内锁只能保护当前进程。 + // 2. 这里先用条件 UPDATE 把 failed 且到期的记录 claim 成 sending;只有抢到 claim 的实例才能调用 provider。 + // 3. 未抢到说明记录已被其它实例处理或状态已变化,直接回读当前状态用于统计,不再重复发送。 + now := s.options.Now() + claimed, err := s.recordStore.ClaimRetryableNotificationRecord(ctx, notificationID, now, s.sendingStaleBefore(now)) if err != nil { return HandleResult{}, err } - return s.deliverRecord(ctx, current) + + current, err := s.recordStore.GetNotificationRecordByID(ctx, notificationID) + if err != nil { + return HandleResult{}, err + } + if !claimed { + return HandleResult{ + RecordID: current.ID, + Status: current.Status, + FallbackUsed: current.FallbackUsed, + AttemptCount: current.AttemptCount, + NextRetryAt: current.NextRetryAt, + }, nil + } + return s.sendRecordNow(ctx, current) } -func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload sharedevents.FeishuNotificationRequestedPayload) (*model.NotificationRecord, bool, error) { +func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload sharedevents.FeishuNotificationRequestedPayload) (*notificationmodel.NotificationRecord, bool, error) { // 1. 若 payload 已携带 notification_id,先尝试命中现有记录,便于后续扩展“指定 record 重放”场景。 // 2. 若 id 未命中或字段不一致,再退回到 channel + dedupe_key 这一版稳定幂等口径。 if payload.NotificationID > 0 { - record, err := s.store.GetNotificationRecordByID(ctx, payload.NotificationID) - if err == nil && record != nil && record.Channel == ChannelFeishu && record.DedupeKey == strings.TrimSpace(payload.DedupeKey) { + record, err := s.recordStore.GetNotificationRecordByID(ctx, payload.NotificationID) + if err == nil && record != nil && record.Channel == notificationfeishu.Channel && record.DedupeKey == strings.TrimSpace(payload.DedupeKey) { return record, true, nil } if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) { @@ -233,7 +263,7 @@ func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload shar } } - record, err := s.store.FindNotificationRecordByDedupeKey(ctx, ChannelFeishu, strings.TrimSpace(payload.DedupeKey)) + record, err := s.recordStore.FindNotificationRecordByDedupeKey(ctx, notificationfeishu.Channel, strings.TrimSpace(payload.DedupeKey)) if err == nil { return record, true, nil } @@ -242,8 +272,8 @@ func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload shar } summaryText, fallbackText, fallbackUsed := s.normalizeMessageTemplate(payload.SummaryText, payload.FallbackText) - record = &model.NotificationRecord{ - Channel: ChannelFeishu, + record = ¬ificationmodel.NotificationRecord{ + Channel: notificationfeishu.Channel, UserID: payload.UserID, TriggerID: strings.TrimSpace(payload.TriggerID), PreviewID: strings.TrimSpace(payload.PreviewID), @@ -255,15 +285,15 @@ func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload shar SummaryText: summaryText, FallbackText: fallbackText, FallbackUsed: fallbackUsed, - Status: model.NotificationRecordStatusPending, + Status: notificationmodel.RecordStatusPending, MaxAttempts: s.options.MaxAttempts, TraceID: strings.TrimSpace(payload.TraceID), } - if err = s.store.CreateNotificationRecord(ctx, record); err != nil { + if err = s.recordStore.CreateNotificationRecord(ctx, record); err != nil { // 1. 并发场景下若唯一索引已被别的协程抢先创建,这里回查 dedupe 记录即可; // 2. 若回查仍失败,说明不是幂等竞争而是真正落库异常,应交给上层重试。 - existing, findErr := s.store.FindNotificationRecordByDedupeKey(ctx, ChannelFeishu, record.DedupeKey) + existing, findErr := s.recordStore.FindNotificationRecordByDedupeKey(ctx, notificationfeishu.Channel, record.DedupeKey) if findErr == nil { return existing, true, nil } @@ -272,16 +302,22 @@ func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload shar return record, false, nil } -func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationRecord) (HandleResult, error) { +func (s *Service) deliverRecord(ctx context.Context, record *notificationmodel.NotificationRecord) (HandleResult, error) { if record == nil { return HandleResult{}, errors.New("notification record is nil") } switch record.Status { - case model.NotificationRecordStatusSending, - model.NotificationRecordStatusSent, - model.NotificationRecordStatusDead, - model.NotificationRecordStatusSkipped: + case notificationmodel.RecordStatusSending: + if !s.isSendingLeaseExpired(record) { + return HandleResult{}, errors.New("notification record 正在发送中,等待租约过期后再重试") + } + if err := s.claimStaleSendingRecord(ctx, record); err != nil { + return HandleResult{}, err + } + case notificationmodel.RecordStatusSent, + notificationmodel.RecordStatusDead, + notificationmodel.RecordStatusSkipped: return HandleResult{ RecordID: record.ID, Status: record.Status, @@ -289,7 +325,7 @@ func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationR AttemptCount: record.AttemptCount, NextRetryAt: record.NextRetryAt, }, nil - case model.NotificationRecordStatusPending, model.NotificationRecordStatusFailed: + case notificationmodel.RecordStatusPending, notificationmodel.RecordStatusFailed: // 继续向下走真正投递流程。 default: // 1. 未识别状态先保守短路,避免把未知脏数据继续推进到 provider。 @@ -303,6 +339,10 @@ func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationR }, nil } + return s.sendRecordNow(ctx, record) +} + +func (s *Service) sendRecordNow(ctx context.Context, record *notificationmodel.NotificationRecord) (HandleResult, error) { requestPayload := s.buildSendRequest(record) requestJSON, err := marshalJSONPointer(requestPayload) if err != nil { @@ -311,7 +351,7 @@ func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationR nextAttemptCount := record.AttemptCount + 1 updates := map[string]any{ - "status": model.NotificationRecordStatusSending, + "status": notificationmodel.RecordStatusSending, "attempt_count": nextAttemptCount, "next_retry_at": nil, "last_error_code": nil, @@ -322,27 +362,27 @@ func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationR updates["max_attempts"] = s.options.MaxAttempts record.MaxAttempts = s.options.MaxAttempts } - if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + if err = s.recordStore.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { return HandleResult{}, err } - record.Status = model.NotificationRecordStatusSending + record.Status = notificationmodel.RecordStatusSending record.AttemptCount = nextAttemptCount record.NextRetryAt = nil record.ProviderRequestJSON = requestJSON sendResult, sendErr := s.provider.Send(ctx, requestPayload) if sendErr != nil && sendResult.Outcome == "" { - sendResult = FeishuSendResult{ - Outcome: FeishuSendOutcomeTemporaryFail, - ErrorCode: FeishuErrorCodeNetworkError, + sendResult = notificationfeishu.SendResult{ + Outcome: notificationfeishu.SendOutcomeTemporaryFail, + ErrorCode: notificationfeishu.ErrorCodeNetworkError, ErrorMessage: sendErr.Error(), } } if sendResult.Outcome == "" { - sendResult.Outcome = FeishuSendOutcomeTemporaryFail + sendResult.Outcome = notificationfeishu.SendOutcomeTemporaryFail if sendResult.ErrorCode == "" { - sendResult.ErrorCode = FeishuErrorCodeNetworkError + sendResult.ErrorCode = notificationfeishu.ErrorCodeNetworkError } if sendResult.ErrorMessage == "" && sendErr != nil { sendResult.ErrorMessage = sendErr.Error() @@ -352,7 +392,47 @@ func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationR return s.applySendResult(ctx, record, sendResult) } -func (s *Service) applySendResult(ctx context.Context, record *model.NotificationRecord, sendResult FeishuSendResult) (HandleResult, error) { +func (s *Service) claimStaleSendingRecord(ctx context.Context, record *notificationmodel.NotificationRecord) error { + now := s.options.Now() + // 1. sending 只在超过租约后回收,避免多实例把仍在执行的 provider 调用重复发送。 + // 2. claim 使用条件 UPDATE,抢不到说明状态已被其它实例推进,本次交给 outbox/retry 下轮重试。 + // 3. 抢到后复用 sendRecordNow 重新进入统一投递状态机,不额外分叉 provider 调用路径。 + claimed, err := s.recordStore.ClaimRetryableNotificationRecord(ctx, record.ID, now, s.sendingStaleBefore(now)) + if err != nil { + return err + } + if !claimed { + return errors.New("notification record sending 租约已被其它实例处理") + } + record.Status = notificationmodel.RecordStatusFailed + record.NextRetryAt = &now + record.LastErrorCode = stringPtrOrNil(sendingLeaseExpiredCode) + record.LastError = stringPtrOrNil("上一次发送停留在 sending,租约过期后自动恢复重试") + return nil +} + +func (s *Service) isSendingLeaseExpired(record *notificationmodel.NotificationRecord) bool { + if record == nil || record.Status != notificationmodel.RecordStatusSending { + return false + } + if record.UpdatedAt.IsZero() { + return true + } + return !record.UpdatedAt.After(s.sendingStaleBefore(s.options.Now())) +} + +func (s *Service) sendingStaleBefore(now time.Time) time.Time { + if now.IsZero() { + now = time.Now() + } + lease := s.options.SendingLease + if lease <= 0 { + lease = defaultSendingLease + } + return now.Add(-lease) +} + +func (s *Service) applySendResult(ctx context.Context, record *notificationmodel.NotificationRecord, sendResult notificationfeishu.SendResult) (HandleResult, error) { now := s.options.Now() responseJSON, err := marshalJSONPointer(sendResult.ResponsePayload) if err != nil { @@ -371,10 +451,10 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio providerMessageID := stringPtrOrNil(sendResult.ProviderMessageID) switch sendResult.Outcome { - case FeishuSendOutcomeSuccess: + case notificationfeishu.SendOutcomeSuccess: sentAt := now updates := map[string]any{ - "status": model.NotificationRecordStatusSent, + "status": notificationmodel.RecordStatusSent, "provider_message_id": providerMessageID, "provider_request_json": requestJSON, "provider_response_json": responseJSON, @@ -383,19 +463,19 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio "next_retry_at": nil, "sent_at": &sentAt, } - if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + if err = s.recordStore.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { return HandleResult{}, err } return HandleResult{ RecordID: record.ID, - Status: model.NotificationRecordStatusSent, + Status: notificationmodel.RecordStatusSent, Delivered: true, FallbackUsed: record.FallbackUsed, AttemptCount: record.AttemptCount, }, nil - case FeishuSendOutcomeSkipped: + case notificationfeishu.SendOutcomeSkipped: updates := map[string]any{ - "status": model.NotificationRecordStatusSkipped, + "status": notificationmodel.RecordStatusSkipped, "provider_message_id": providerMessageID, "provider_request_json": requestJSON, "provider_response_json": responseJSON, @@ -403,20 +483,20 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio "last_error": errorMessage, "next_retry_at": nil, } - if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + if err = s.recordStore.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { return HandleResult{}, err } return HandleResult{ RecordID: record.ID, - Status: model.NotificationRecordStatusSkipped, + Status: notificationmodel.RecordStatusSkipped, Delivered: true, FallbackUsed: record.FallbackUsed, AttemptCount: record.AttemptCount, ProviderError: strings.TrimSpace(sendResult.ErrorCode), }, nil - case FeishuSendOutcomePermanentFail: + case notificationfeishu.SendOutcomePermanentFail: updates := map[string]any{ - "status": model.NotificationRecordStatusDead, + "status": notificationmodel.RecordStatusDead, "provider_message_id": providerMessageID, "provider_request_json": requestJSON, "provider_response_json": responseJSON, @@ -424,12 +504,12 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio "last_error": errorMessage, "next_retry_at": nil, } - if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + if err = s.recordStore.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { return HandleResult{}, err } return HandleResult{ RecordID: record.ID, - Status: model.NotificationRecordStatusDead, + Status: notificationmodel.RecordStatusDead, Delivered: true, FallbackUsed: record.FallbackUsed, AttemptCount: record.AttemptCount, @@ -438,7 +518,7 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio default: if record.AttemptCount >= s.effectiveMaxAttempts(record) { updates := map[string]any{ - "status": model.NotificationRecordStatusDead, + "status": notificationmodel.RecordStatusDead, "provider_message_id": providerMessageID, "provider_request_json": requestJSON, "provider_response_json": responseJSON, @@ -446,12 +526,12 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio "last_error": errorMessage, "next_retry_at": nil, } - if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + if err = s.recordStore.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { return HandleResult{}, err } return HandleResult{ RecordID: record.ID, - Status: model.NotificationRecordStatusDead, + Status: notificationmodel.RecordStatusDead, Delivered: true, FallbackUsed: record.FallbackUsed, AttemptCount: record.AttemptCount, @@ -461,7 +541,7 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio nextRetryAt := s.calcNextRetryAt(now, record.AttemptCount) updates := map[string]any{ - "status": model.NotificationRecordStatusFailed, + "status": notificationmodel.RecordStatusFailed, "provider_message_id": providerMessageID, "provider_request_json": requestJSON, "provider_response_json": responseJSON, @@ -469,12 +549,12 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio "last_error": errorMessage, "next_retry_at": &nextRetryAt, } - if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + if err = s.recordStore.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { return HandleResult{}, err } return HandleResult{ RecordID: record.ID, - Status: model.NotificationRecordStatusFailed, + Status: notificationmodel.RecordStatusFailed, Delivered: true, FallbackUsed: record.FallbackUsed, AttemptCount: record.AttemptCount, @@ -484,7 +564,7 @@ func (s *Service) applySendResult(ctx context.Context, record *model.Notificatio } } -func (s *Service) buildSendRequest(record *model.NotificationRecord) FeishuSendRequest { +func (s *Service) buildSendRequest(record *notificationmodel.NotificationRecord) notificationfeishu.SendRequest { messageText := strings.TrimSpace(record.SummaryText) if record.FallbackUsed || messageText == "" { messageText = strings.TrimSpace(record.FallbackText) @@ -496,7 +576,7 @@ func (s *Service) buildSendRequest(record *model.NotificationRecord) FeishuSendR messageText = strings.TrimSpace(messageText) + "\n" + strings.TrimSpace(record.TargetURL) } - return FeishuSendRequest{ + return notificationfeishu.SendRequest{ NotificationID: record.ID, UserID: record.UserID, TriggerID: record.TriggerID, @@ -552,7 +632,7 @@ func (s *Service) calcNextRetryAt(now time.Time, attemptCount int) time.Time { return now.Add(delay) } -func (s *Service) effectiveMaxAttempts(record *model.NotificationRecord) int { +func (s *Service) effectiveMaxAttempts(record *notificationmodel.NotificationRecord) int { if record != nil && record.MaxAttempts > 0 { return record.MaxAttempts } @@ -575,6 +655,9 @@ func normalizeServiceOptions(opts ServiceOptions) ServiceOptions { if opts.RetryMaxDelay < opts.RetryBaseDelay { opts.RetryMaxDelay = opts.RetryBaseDelay } + if opts.SendingLease <= 0 { + opts.SendingLease = defaultSendingLease + } if opts.SummaryMaxRunes <= 0 { opts.SummaryMaxRunes = defaultSummaryMaxRunes } diff --git a/backend/notification/runner.go b/backend/services/notification/sv/worker.go similarity index 82% rename from backend/notification/runner.go rename to backend/services/notification/sv/worker.go index 4636209..05cd017 100644 --- a/backend/notification/runner.go +++ b/backend/services/notification/sv/worker.go @@ -1,4 +1,4 @@ -package notification +package sv import ( "context" @@ -9,10 +9,10 @@ import ( // StartRetryLoop 启动 notification_records 重试扫描器。 // // 说明: -// 1. 只在 worker/all 模式启动,api 模式不启动; +// 1. 只在 worker/all 或独立 notification 进程启动;API / RPC 入口不主动扫重试; // 2. provider 失败后的重试由本循环负责,避免通用 outbox 被外部服务慢失败拖住; // 3. 每轮失败只写日志,下一轮继续扫描。 -func (s *NotificationService) StartRetryLoop(ctx context.Context, every time.Duration, limit int) { +func (s *Service) StartRetryLoop(ctx context.Context, every time.Duration, limit int) { if s == nil { return } diff --git a/backend/shared/contracts/notification/types.go b/backend/shared/contracts/notification/types.go new file mode 100644 index 0000000..77a6dd9 --- /dev/null +++ b/backend/shared/contracts/notification/types.go @@ -0,0 +1,62 @@ +package notification + +import "time" + +// SaveFeishuWebhookRequest 是 gateway 写入飞书 Webhook 通道配置的跨进程契约。 +// +// 职责边界: +// 1. 只承载用户提交的配置字段,不做 URL、鉴权类型或 token 的业务校验; +// 2. user_id 由 gateway 从 JWT 上下文取得,不能信任前端传入; +// 3. bearer_token 只在服务内持久化和投递时使用,响应契约只返回 has_bearer_token。 +type SaveFeishuWebhookRequest struct { + UserID int `json:"user_id"` + Enabled bool `json:"enabled"` + WebhookURL string `json:"webhook_url"` + AuthType string `json:"auth_type"` + BearerToken string `json:"bearer_token"` +} + +// GetFeishuWebhookRequest 是查询飞书 Webhook 通道配置的跨进程契约。 +type GetFeishuWebhookRequest struct { + UserID int `json:"user_id"` +} + +// DeleteFeishuWebhookRequest 是删除飞书 Webhook 通道配置的跨进程契约。 +type DeleteFeishuWebhookRequest struct { + UserID int `json:"user_id"` +} + +// TestFeishuWebhookRequest 是触发飞书 Webhook 测试消息的跨进程契约。 +type TestFeishuWebhookRequest struct { + UserID int `json:"user_id"` +} + +// ChannelResponse 是通知通道配置返回给前端的脱敏视图。 +// +// 职责边界: +// 1. 不返回 webhook_url 原文和 bearer_token 原文; +// 2. 只返回用户界面需要展示的开关、脱敏地址、鉴权类型和最近测试结果; +// 3. 不暴露 notification_records 的投递状态,二者属于不同读模型。 +type ChannelResponse struct { + Channel string `json:"channel"` + Enabled bool `json:"enabled"` + Configured bool `json:"configured"` + WebhookURLMask string `json:"webhook_url_mask,omitempty"` + AuthType string `json:"auth_type"` + HasBearerToken bool `json:"has_bearer_token"` + LastTestStatus string `json:"last_test_status,omitempty"` + LastTestError string `json:"last_test_error,omitempty"` + LastTestAt *time.Time `json:"last_test_at,omitempty"` +} + +// TestResult 描述一次飞书 Webhook 测试投递结果。 +type TestResult struct { + Channel ChannelResponse `json:"channel"` + Status string `json:"status"` + Outcome string `json:"outcome"` + Message string `json:"message,omitempty"` + TraceID string `json:"trace_id,omitempty"` + SentAt time.Time `json:"sent_at"` + Skipped bool `json:"skipped"` + Provider string `json:"provider"` +} diff --git a/backend/shared/events/notification.go b/backend/shared/events/notification.go index 66752e3..2405e0b 100644 --- a/backend/shared/events/notification.go +++ b/backend/shared/events/notification.go @@ -10,6 +10,8 @@ import ( const ( NotificationFeishuRequestedEventType = "notification.feishu.requested" NotificationFeishuRequestedEventVersion = "1" + // DefaultFeishuNotificationDedupeWindow 是 notification 第一版固定的 30 分钟去重窗口。 + DefaultFeishuNotificationDedupeWindow = 30 * time.Minute ) // FeishuNotificationRequestedPayload 是飞书通知请求事件载荷。 @@ -80,3 +82,20 @@ func (p FeishuNotificationRequestedPayload) MessageKey() string { func (p FeishuNotificationRequestedPayload) AggregateID() string { return strings.TrimSpace(p.PreviewID) } + +// BuildFeishuNotificationDedupeKey 构造“user_id + trigger_type + time_window”去重键。 +// +// 职责边界: +// 1. 供事件发布方在生成 `notification.feishu.requested` payload 时复用; +// 2. 只负责把固定窗口归一成稳定 key,不负责落 notification_records; +// 3. requestedAt 为空或非法时直接返回空字符串,让上游显式感知入参不完整。 +func BuildFeishuNotificationDedupeKey(userID int, triggerType string, requestedAt time.Time, window time.Duration) string { + if window <= 0 { + window = DefaultFeishuNotificationDedupeWindow + } + if userID <= 0 || strings.TrimSpace(triggerType) == "" || requestedAt.IsZero() { + return "" + } + windowStart := requestedAt.Truncate(window) + return strconv.Itoa(userID) + ":" + strings.TrimSpace(triggerType) + ":" + windowStart.Format(time.RFC3339) +} diff --git a/backend/shared/ports/notification.go b/backend/shared/ports/notification.go new file mode 100644 index 0000000..a06d868 --- /dev/null +++ b/backend/shared/ports/notification.go @@ -0,0 +1,20 @@ +package ports + +import ( + "context" + + contracts "github.com/LoveLosita/smartflow/backend/shared/contracts/notification" +) + +// NotificationCommandClient 是 gateway 调用 notification 服务的通道配置能力集合。 +// +// 职责边界: +// 1. 只描述 HTTP 入口需要的配置查询、保存、删除和测试能力; +// 2. 不暴露 notification_records、provider、outbox consumer 或 retry loop 细节; +// 3. 具体通信协议由 gateway adapter 决定,API 层保持 res, err 的统一调用语义。 +type NotificationCommandClient interface { + GetFeishuWebhook(ctx context.Context, req contracts.GetFeishuWebhookRequest) (*contracts.ChannelResponse, error) + SaveFeishuWebhook(ctx context.Context, req contracts.SaveFeishuWebhookRequest) (*contracts.ChannelResponse, error) + DeleteFeishuWebhook(ctx context.Context, req contracts.DeleteFeishuWebhookRequest) error + TestFeishuWebhook(ctx context.Context, req contracts.TestFeishuWebhookRequest) (*contracts.TestResult, error) +} diff --git a/docs/backend/微服务四步迁移与第二阶段并行开发计划.md b/docs/backend/微服务四步迁移与第二阶段并行开发计划.md index a6200fb..74d72f4 100644 --- a/docs/backend/微服务四步迁移与第二阶段并行开发计划.md +++ b/docs/backend/微服务四步迁移与第二阶段并行开发计划.md @@ -10,7 +10,7 @@ 2. 阶段 1 已完成:当前基线已经切成服务级 outbox 表、服务级 Kafka topic、服务级 consumer group;仍在单体进程内装配多个服务级 worker,后续拆微服务时再物理迁出。 3. 阶段 1.5 / 1.6 已完成:`backend/services/llm` 和 `backend/services/rag` 已经是当前 canonical 入口,`backend/infra/llm` 和 `backend/infra/rag` 的 `.go` 旧实现已删除。 4. 阶段 2 已完成:`user/auth` 已经从 Gin 单体抽成 `cmd/userauth` + `services/userauth` 的 go-zero zrpc 服务边界,gateway 只保留 user HTTP 入口、鉴权、额度门禁和轻量转发。 -5. 下一轮从阶段 3 开始,默认目标是拆 `notification`;不要再把 outbox、llm-service、rag-service 或 user/auth 当成未完成待办。 +5. 阶段 3 `notification` 服务化已完成实现、code review 修复和真实 smoke;不要再把 outbox、llm-service、rag-service 或 user/auth 当成未完成待办。 本计划遵守两个硬原则: @@ -97,6 +97,8 @@ gozero 服务负责领域能力: > 当前状态:`llm-service` / `rag-service` 这两个边界已经先做成 `backend/services/*` 的服务内模块,调用仍由 `backend/cmd/start.go` 在同一进程内装配,不是 gozero 独立进程。 > > 当前状态:`user/auth` 已经完成 go-zero zrpc 独立进程拆分,是阶段 2 样板。服务端在 `backend/services/userauth`,进程入口在 `backend/cmd/userauth`,gateway client 在 `backend/gateway/userauth`。 +> +> 当前状态:`notification` 已经完成阶段 3 拆分。服务端在 `backend/services/notification`,进程入口在 `backend/cmd/notification`,gateway client 在 `backend/gateway/notification`,服务级 outbox consumer 和 retry loop 已随服务入口迁出。 ### 3.3 事件层 @@ -127,6 +129,7 @@ gozero 服务负责领域能力: 5. `infra` 也不应该是一个大公共篮子:像 `kafka`、`outbox` 这类跨服务底座可以放到 `shared/infra`;`llm-service`、`rag-service` 这类模型与检索能力要单独成基础设施服务,不要塞进 `shared`;`prompt`、`tooling` 这类强业务依赖的适配器则应跟着具体服务走。 6. 换句话说,`shared` 是“跨进程契约层 + 少量跨服务底座”,不是“公共业务层”。 7. 阶段 2 已经新增 `backend/shared/contracts/userauth` 和 `backend/shared/ports`,只承载跨层契约和端口接口;user/auth 的 JWT、DAO、额度治理、黑名单实现不进入 `shared`。 +8. 阶段 3 已经新增 `backend/shared/contracts/notification`,只承载 notification 跨层 DTO;通知通道 DAO、投递状态机、provider、重试策略和 outbox handler 都留在 `backend/services/notification`。 --- @@ -141,7 +144,7 @@ gozero 服务负责领域能力: | 1.5 | 先抽 llm-service(已完成) | 已完成,`backend/services/llm` 作为当前 canonical 入口 | `go test ./...` + course / active-scheduler / memory 模型调用 smoke | | 1.6 | 再抽 rag-service(已完成) | 已完成,`backend/services/rag` 作为当前 canonical 入口 | `go test ./...` + memory retrieve / rerank smoke | | 2 | 先拆 user/auth(已完成) | 已完成,阶段 2 样板 commit 点:userauth zrpc、gateway userapi、JWT/黑名单/额度治理、启动与迁移边界已收口 | 已完成注册/登录/刷新/并发 refresh/登出/鉴权/token quota smoke | -| 3 | 再拆 notification(下一阶段) | notification 服务能独立消费和重试后 commit | notification E2E smoke + worker-only smoke | +| 3 | 再拆 notification(已完成) | 已完成,`cmd/notification` + `services/notification` zrpc / outbox consumer / retry loop 已收口,旧单体实现已删除;是否 commit 等用户明确要求 | 已完成 notification E2E smoke + worker-only smoke | | 4 | 再拆 active-scheduler | 预览生成和确认链路通过 gozero 服务跑通后 commit | dry-run / preview / confirm smoke | | 5 | 再拆 schedule / task / course / task-class | 每个领域完成一次切流就 commit 一次 | schedule/task/course/task-class 回归 + 全链路 smoke | | 6 | 再拆 agent / memory | agent 编排服务、memory 支撑服务和后台 worker 独立后 commit | agent chat / SSE / memory extract / memory retrieve smoke | @@ -373,6 +376,14 @@ flowchart LR 3. 重试扫描 smoke。 4. 停掉 notification 服务后,主动调度预览仍然可用的回归测试。 +本轮收口状态(2026-05-04): + +1. `cmd/notification` 已承载 notification zrpc 启动、DB 迁移、服务级 outbox consumer 和重试扫描。 +2. `backend/services/notification` 已收进 DAO、model、sv、rpc、飞书 provider 和 outbox handler;gateway 通过 `backend/gateway/notification` zrpc client 调用。 +3. 主动调度侧只写入 `notification.feishu.requested`,publisher 侧只注册事件归属到 `notification`,不再启动单体 notification consumer。 +4. 旧 `backend/notification`、旧 DAO/model 和旧 `service/events/notification_feishu.go` 已删除;review 发现的 sending 租约恢复和 RPC timeout 边界已修复。 +5. 真实 smoke 已通过:`notification_outbox_messages.id=3` 已从 `pending` 推进到 `consumed`,`smartflow.notification.outbox` 已出现 `outbox_id=3`,对应 `notification_records` 生成并按未启用通道进入 `skipped`。 + --- ### 4.8 阶段 4:再拆 active-scheduler @@ -498,20 +509,19 @@ flowchart LR 当前建议按这个顺序推进: -注:阶段 1.5 / 1.6 / 2 已完成,当前实际推进从阶段 3 `notification` 开始。 +注:阶段 1.5 / 1.6 / 2 / 3 已完成;`notification` 已完成实现、code review 修复和真实 smoke,不再作为下一轮待办。 1. 以阶段 1 的服务级 outbox 为当前基线,不再回头做共享 outbox 方案。 2. 保持 `backend/services/llm` 和 `backend/services/rag` 为 canonical 入口,不再把它们写成待办。 3. 保持 `backend/services/userauth` + `cmd/userauth` 为阶段 2 样板,不再回头恢复 Gin 单体 user/auth。 -4. 下一步切 notification。 -5. 再切 active-scheduler。 -6. 然后切 schedule / task / course / task-class。 -7. 再切 agent / memory,把聊天编排和记忆链路独立出去。 -8. 最后把 Gin 收口成纯 Gateway。 +4. 下一步进入阶段 4,优先切 `active-scheduler`。 +5. 然后切 schedule / task / course / task-class。 +6. 再切 agent / memory,把聊天编排和记忆链路独立出去。 +7. 最后把 Gin 收口成纯 Gateway。 一句话总结: -> outbox 的服务级基础设施、llm-service、rag-service 和 user/auth 样板服务都已经完成;下一轮从 notification 开始,把通知投递和重试切成独立服务;然后让 active-scheduler、schedule、task、course、task-class 按稳定边界逐步独立;再把 agent / memory 独立出来,完成聊天编排和记忆链路的服务化;最后把 Gin 收口成真正的 Gateway。 +> outbox 的服务级基础设施、llm-service、rag-service、user/auth 样板服务和 notification 阶段 3 都已经完成;下一步让 active-scheduler、schedule、task、course、task-class 按稳定边界逐步独立;再把 agent / memory 独立出来,完成聊天编排和记忆链路的服务化;最后把 Gin 收口成真正的 Gateway。 --- @@ -582,17 +592,14 @@ SmartFlow-Agent/ │ │ │ ├── batch/ │ │ │ └── item/ │ │ ├── notification/ -│ │ │ ├── start.go -│ │ │ ├── handler.go │ │ │ ├── sv/ │ │ │ ├── dao/ │ │ │ ├── model/ -│ │ │ └── internal/ -│ │ │ ├── provider/ -│ │ │ ├── runner/ -│ │ │ ├── dedupe/ -│ │ │ ├── channel/ -│ │ │ └── retry/ +│ │ │ ├── internal/ +│ │ │ │ └── feishu/ +│ │ │ └── rpc/ +│ │ │ ├── pb/ +│ │ │ └── notification.proto │ │ ├── active-scheduler/ │ │ │ ├── start.go │ │ │ ├── handler.go @@ -698,19 +705,19 @@ SmartFlow-Agent/ > 2. `backend/gateway/userapi/*` 是 user HTTP 入口,`backend/gateway/userauth/*` 是 userauth zrpc client,二者都属于 gateway 边缘层。 > 3. `backend/service/*.go` 这批现有业务逻辑,后面要分别迁到各自服务根目录下的 `sv/`。 > 4. `backend/service/agentsvc/*` 和 `backend/newAgent/*`,后面要收束到 `backend/services/agent/sv/` + `internal/{prompt,graph,stream,tool,session,router}`。 -> 5. `backend/notification/*`,下一阶段要收束到 `backend/services/notification/`,其中 `runner/provider/dedupe/channel_service` 归入 `sv/` 或 `internal/notification/`。 +> 5. `backend/services/notification/*` 已经是阶段 3 终态样板;`backend/cmd/notification` 是独立进程入口,`backend/gateway/notification` 是 gateway 侧 zrpc client,`backend/shared/contracts/notification` 只放跨层契约;旧 `backend/notification/*`、旧 DAO/model 和旧 `service/events/notification_feishu.go` 不再作为活跃实现。 > 6. `backend/active_scheduler/*`,后面要收束到 `backend/services/active-scheduler/`,其中 `graph/selection/feedbacklocate/apply/job` 归入 `internal/`。 > 7. `backend/memory/*`,后面要收束到 `backend/services/memory/`;当前 `memory/service/*` 只是迁移过渡态,终态还是按 `sv/` 或 `internal/` 拆开。 > > 说明 4:`shared` 先保留 `events` 和少量跨服务底座型 `infra`。以后如果真的出现跨服务 DTO / 枚举 / 常量,再新增 `contracts` 一类目录,但不要把 `dao`、`model`、`sv`、`handler` 这类服务私有层塞进去。 -> 说明 5:`notification` 和 `active-scheduler` 的服务内部建议继续收束成你熟悉的“服务内单体壳”风格,不要让一级目录一直长成一排小框架;复杂算法和编排细节可以继续拆文件,但尽量下沉到 `sv/` 或 `internal/` 下面。 +> 说明 5:`notification` 已经按 `userauth` 同款最小手搓 zrpc 样板收口:`rpc/server.go`、`rpc/handler.go`、`rpc/errors.go` + `rpc/pb`,不是 goctl 自动脚手架;`active-scheduler` 后续也按服务内单体壳继续收束,不要让一级目录长期长成一排小框架。 > > 说明 6:`llm-service` 和 `rag-service` 是独立基础设施服务,不放进 `shared`;`rag-service` 依赖 `llm-service` 做 embedding / rerank,不反向依赖业务服务。 > > 说明 7:目录树里如果暂时写成 `backend/services/llm/` 和 `backend/services/rag/`,那只是目录名写法;后文所有职责判断都以 `llm-service` / `rag-service` 这两个逻辑服务名为准。 > -> 说明 8:阶段 2 已经采用 `backend/services/userauth/` 作为实际目录名,不再使用 `user-auth`。gateway 侧 zrpc client 放在 `backend/gateway/userauth/`,进程入口放在 `backend/cmd/userauth/`;不要把 rpc client 放进 `cmd`。 +> 说明 8:阶段 2 已经采用 `backend/services/userauth/` 作为实际目录名,不再使用 `user-auth`。阶段 3 已经采用 `backend/services/notification/` 作为实际目录名。gateway 侧 zrpc client 放在 `backend/gateway/{userauth,notification}/`,进程入口放在 `backend/cmd/{userauth,notification}/`;不要把 rpc client 放进 `cmd`。 ### 6.3 哪些可以不用变 @@ -723,7 +730,7 @@ SmartFlow-Agent/ 1. `backend/cmd/start.go` 这种“大装配入口”后面要逐步拆成 gateway 启动和各服务启动。 2. `api` 这一层会收缩成纯 Gateway 职责,不再承载核心领域逻辑。 -3. 当前仓库里的 `backend/service` 目录和相关遗留入口,要按 `user/auth`、`course`、`task-class`、`notification`、`active-scheduler`、`schedule`、`task`、`agent`、`memory` 拆出去;其中 `notification` 和 `active-scheduler` 最终都要收束成更像 seckill 的服务内单体壳,不要长期维持一串顶层小包。 +3. 当前仓库里的 `backend/service` 目录和相关遗留入口,要继续按 `course`、`task-class`、`active-scheduler`、`schedule`、`task`、`agent`、`memory` 拆出去;`user/auth` 和 `notification` 已完成独立服务边界,后续不要回迁到单体。 4. 当前单体里的共享启动方式 `api / worker / all`,后面会拆成“gateway 进程 + 服务进程 + worker 进程”的组合。 5. 任何依赖 `users` 表直读、核心表直写的网关路径,都要迁到对应服务里。 6. 不再把服务私有的 `dao` / `model` / `sv` / `handler` 误放进 `shared`,避免它变成新的单体公共层。 @@ -847,7 +854,8 @@ graph TD 4. 图里的 outbox 是“每个服务自己的 outbox 表 + 专属 relay worker”的抽象,不代表所有服务共用一张表。 5. 当前阶段 1 已完成 `agent`、`task`、`memory`、`active-scheduler`、`notification` 的服务级 outbox 表、topic 和 consumer group;尚未物理拆出的服务后续沿用同一模式补齐。 6. 当前阶段 2 已完成 `user/auth` 物理拆分;gateway 到 userauth 的调用已经通过 zrpc client,不再通过本地 DAO/service。 -7. Kafka 是共享运输层,不是共享业务 topic;新流量不应再默认进入单一共享 topic。 +7. 当前阶段 3 已完成 `notification` 物理拆分;gateway 到 notification 的调用已经通过 zrpc client,notification outbox consumer、relay 和 retry loop 已迁入 `cmd/notification` 启动边界。 +8. Kafka 是共享运输层,不是共享业务 topic;新流量不应再默认进入单一共享 topic。 ### 6.9 切对话交接卡 @@ -857,11 +865,11 @@ graph TD 2. 已冻结的终态是 `Gin Gateway + gozero 服务群 + 服务级 outbox + Kafka 共享运输层`。 3. 阶段 1 已完成,当前 outbox 基线是服务级表、服务级 topic、服务级 consumer group;worker 仍在单体内装配,后续随对应服务迁出。 4. 阶段 2 已完成,`user/auth` 已经是样板服务,不要再把它当成下一轮待办。 -5. 下一轮默认从阶段 3 `notification` 开始;`llm-service`、`rag-service` 也已完成,不要重新当成待办。 -6. `notification` 和 `active-scheduler` 后续要回到更像 seckill 的服务内单体壳。 +5. 阶段 3 `notification` 已完成实现、code review 修复和真实 smoke;`llm-service`、`rag-service` 也已完成,不要重新当成待办。 +6. 下一轮默认从阶段 4 `active-scheduler` 开始;它后续要回到更像 seckill 的服务内单体壳。 7. `shared` 只保留跨进程契约和少量跨服务底座,不承载业务逻辑、DAO、模型或状态机。 8. 如果后续要改目录,必须先回答“这个文件属于哪一个典型用例”,回答不清楚就先别动结构。 -9. 当前文档已经可以作为切对话基线;后续代理默认按本文件推进。现阶段的迁移基线入口是 `backend/cmd/api`、`backend/cmd/worker`、`backend/cmd/all`,它们只是当前仓库的启动壳,不是终态。`backend/cmd/userauth` 是阶段 2 的独立服务入口。终态仍然是“一个服务一个独立 `main.go`”,只在出现新的契约风险、边界变化或业务语义变化时再重新讨论架构。 +9. 当前文档已经可以作为切对话基线;后续代理默认按本文件推进。现阶段的迁移基线入口是 `backend/cmd/api`、`backend/cmd/worker`、`backend/cmd/all`,它们只是当前仓库的启动壳,不是终态。`backend/cmd/userauth` 是阶段 2 的独立服务入口,`backend/cmd/notification` 是阶段 3 的独立服务入口。终态仍然是“一个服务一个独立 `main.go`”,只在出现新的契约风险、边界变化或业务语义变化时再重新讨论架构。 ### 6.10 启动方式与进程模型 @@ -984,7 +992,7 @@ graph TD 这段用于避免后续代理重复踩阶段 2 已经纠偏过的问题。 -1. 阶段 3 起步默认是 `notification`,不是 outbox、llm-service、rag-service 或 user/auth。 +1. 阶段 3 `notification` 已完成;后续起步默认是阶段 4 `active-scheduler`,不是 outbox、llm-service、rag-service、user/auth 或 notification。 2. 主代理负责 leader:先读必要文档和代码,拆任务,关键阻塞任务自己做;子代理只能承担并行、明确、非阻塞的侧翼任务。 3. 如果确实有会影响切分方向的不确定点,先总结成拍板点问用户;文档已经写清楚的内容不要重复问。 4. 查库一律用 `docker exec`。MySQL / Redis 都按这个规则走;不直接用本机客户端绕过容器。 @@ -999,7 +1007,7 @@ graph TD --- -### 6.14 阶段 0 历史基线与阶段 1/2 当前基线快照 +### 6.14 阶段 0 历史基线与阶段 1/2/3 当前基线快照 阶段 0 历史基线: @@ -1033,6 +1041,17 @@ graph TD 5. `cmd/all` 不再迁 `users`,`cmd/userauth` 自己迁 `users` 和 `user_token_usage_adjustments`。 6. 完整本地 smoke 需要同时启动 `cmd/all` 和 `cmd/userauth`。 +阶段 3 当前基线: + +1. `backend/cmd/notification/main.go` 是 notification 独立进程入口,负责 DB 迁移、zrpc server、notification outbox consumer 和 retry loop 的统一生命周期。 +2. `backend/services/notification` 拥有 notification 核心业务、DAO、模型、飞书 provider、幂等、投递记录状态机、重试扫描和 outbox handler。 +3. `backend/gateway/notification` 是 gateway 侧 zrpc client;gateway 只保留 notification HTTP 入口、鉴权和轻量组合逻辑,不再直连 notification DAO/service。 +4. `backend/shared/contracts/notification` 和 `backend/shared/ports` 只承载跨层契约和端口接口,不承载服务私有业务实现。 +5. notification 内部是 `userauth` 同款最小手搓 zrpc 框架,不使用 goctl 自动脚手架;`rpc` 只保留 `NewServer` 供 `cmd/notification` 管理 signal、outbox consumer、retry loop 和 server 生命周期。 +6. 旧 `backend/notification/*`、旧 `backend/dao/notification_channel.go`、旧 `backend/model/notification_channel.go` 和旧 `backend/service/events/notification_feishu.go` 已删除;若 `backend/notification` 目录壳仍存在,它不参与编译,也不作为活跃实现。 +7. notification outbox consumer 已迁入独立服务边界并处理 `notification.feishu.requested`,覆盖 payload/version 校验、dead/retry/consumed 状态推进和毒消息回退。 +8. 已完成真实 smoke:`notification_outbox_messages` 可从 `pending` 推进到 `consumed`,Kafka `smartflow.notification.outbox` 可看到对应 outbox 消息,`notification_records` 可生成幂等记录并按通道状态进入预期状态。 + --- ## 7. 风险与回退