diff --git a/backend/active_scheduler/apply/types.go b/backend/active_scheduler/apply/types.go index 0f43670..2bf29e4 100644 --- a/backend/active_scheduler/apply/types.go +++ b/backend/active_scheduler/apply/types.go @@ -93,6 +93,33 @@ func newApplyError(code ErrorCode, message string, err error) error { return &ApplyError{Code: code, Message: message, Err: err} } +// NewApplyError 构造 confirm/apply 链路可分类业务错误。 +// +// 职责边界: +// 1. 供 service/API 层把预览归属、幂等冲突、adapter 业务拒绝转换为统一错误语义; +// 2. 不负责写 preview 状态,也不决定 HTTP 状态码; +// 3. cause 仅用于保留底层错误,展示给前端的文案应放在 message。 +func NewApplyError(code ErrorCode, message string, cause error) error { + return newApplyError(code, message, cause) +} + +// AsApplyError 尝试把 error 还原为 ApplyError。 +// +// 职责边界: +// 1. 只做 errors.As 类型判断,方便 API 层统一映射业务错误; +// 2. 不把未知错误强行归类,避免数据库或系统故障被误判为 4xx; +// 3. 返回 bool=false 时,调用方应按普通系统错误处理。 +func AsApplyError(err error) (*ApplyError, bool) { + if err == nil { + return nil, false + } + var applyErr *ApplyError + if errors.As(err, &applyErr) { + return applyErr, true + } + return nil, false +} + func errorCodeOf(err error) ErrorCode { if err == nil { return "" diff --git a/backend/active_scheduler/job/scanner.go b/backend/active_scheduler/job/scanner.go new file mode 100644 index 0000000..c4d0c93 --- /dev/null +++ b/backend/active_scheduler/job/scanner.go @@ -0,0 +1,269 @@ +package job + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log" + "time" + + "github.com/LoveLosita/smartflow/backend/active_scheduler/ports" + activesvc "github.com/LoveLosita/smartflow/backend/active_scheduler/service" + "github.com/LoveLosita/smartflow/backend/active_scheduler/trigger" + "github.com/LoveLosita/smartflow/backend/dao" + "github.com/LoveLosita/smartflow/backend/model" +) + +const ( + defaultScanLimit = 50 +) + +// Scanner 扫描到期 active_schedule_jobs 并生成正式 trigger。 +// +// 职责边界: +// 1. 只负责 due job -> trigger,不执行 dry-run、不写 preview、不发 notification; +// 2. 扫描时必须重读 task 与 schedule 真值,避免过期 job 误触发; +// 3. 对已完成、已排入日程或不再符合条件的 job,只更新 job 状态,不物理删除。 +type Scanner struct { + activeDAO *dao.ActiveScheduleDAO + taskReader ports.TaskReader + scheduleReader ports.ScheduleReader + triggerService *activesvc.TriggerService + clock func() time.Time + limit int + scanEvery time.Duration +} + +type ScannerOptions struct { + Limit int + ScanEvery time.Duration + Clock func() time.Time +} + +type ScanResult struct { + Scanned int + Triggered int + Skipped int + Failed int +} + +func NewScanner(activeDAO *dao.ActiveScheduleDAO, readers ports.Readers, triggerService *activesvc.TriggerService, options ScannerOptions) (*Scanner, error) { + if activeDAO == nil { + return nil, errors.New("active schedule dao 不能为空") + } + if readers.TaskReader == nil { + return nil, errors.New("TaskReader 不能为空") + } + if readers.ScheduleReader == nil { + return nil, errors.New("ScheduleReader 不能为空") + } + if triggerService == nil { + return nil, errors.New("trigger service 不能为空") + } + limit := options.Limit + if limit <= 0 { + limit = defaultScanLimit + } + scanEvery := options.ScanEvery + if scanEvery <= 0 { + scanEvery = time.Minute + } + clock := options.Clock + if clock == nil { + clock = time.Now + } + return &Scanner{ + activeDAO: activeDAO, + taskReader: readers.TaskReader, + scheduleReader: readers.ScheduleReader, + triggerService: triggerService, + clock: clock, + limit: limit, + scanEvery: scanEvery, + }, nil +} + +// Start 启动 due job 周期扫描。 +// +// 说明: +// 1. worker/all 模式调用;api 模式不启动,避免 API 进程承担后台职责; +// 2. 每轮扫描失败只记录日志,下一轮继续; +// 3. ctx 取消后 goroutine 自然退出。 +func (s *Scanner) Start(ctx context.Context) { + if s == nil { + return + } + go func() { + ticker := time.NewTicker(s.scanEvery) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + result, err := s.ScanDue(ctx, s.now()) + if err != nil { + log.Printf("主动调度 due job 扫描失败: err=%v", err) + continue + } + if result.Scanned > 0 { + log.Printf("主动调度 due job 扫描完成: scanned=%d triggered=%d skipped=%d failed=%d", result.Scanned, result.Triggered, result.Skipped, result.Failed) + } + } + } + }() +} + +// ScanDue 扫描并处理一批到期 job。 +func (s *Scanner) ScanDue(ctx context.Context, now time.Time) (ScanResult, error) { + if s == nil || s.activeDAO == nil { + return ScanResult{}, errors.New("scanner 未初始化") + } + jobs, err := s.activeDAO.ListDueJobs(ctx, now, s.limit) + if err != nil { + return ScanResult{}, err + } + result := ScanResult{Scanned: len(jobs)} + for _, item := range jobs { + handled, handleErr := s.processJob(ctx, item, now) + switch { + case handleErr != nil: + result.Failed++ + log.Printf("主动调度 due job 处理失败: job_id=%s err=%v", item.ID, handleErr) + case handled == model.ActiveScheduleJobStatusTriggered: + result.Triggered++ + default: + result.Skipped++ + } + } + return result, nil +} + +func (s *Scanner) processJob(ctx context.Context, item model.ActiveScheduleJob, now time.Time) (string, error) { + task, found, err := s.taskReader.GetTaskForActiveSchedule(ctx, ports.TaskRequest{ + UserID: item.UserID, + TaskID: item.TaskID, + Now: now, + }) + if err != nil { + _ = s.markJobFailed(ctx, item.ID, "task_read_failed", err, now) + return "", err + } + if !found { + return model.ActiveScheduleJobStatusSkipped, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusSkipped, "task_not_found", now) + } + if task.IsCompleted { + return model.ActiveScheduleJobStatusCanceled, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusCanceled, "task_completed", now) + } + if task.UrgencyThresholdAt == nil { + // 1. 到期扫描必须重读 task 真值。 + // 2. 若上游已经移除了 urgency_threshold_at,说明这条 due job 已经不再具备触发前提。 + // 3. 这里直接收敛为 canceled,避免继续错误地产生 trigger。 + return model.ActiveScheduleJobStatusCanceled, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusCanceled, "task_not_schedulable", now) + } + if task.UrgencyThresholdAt != nil && task.UrgencyThresholdAt.After(now) { + return model.ActiveScheduleJobStatusPending, s.activeDAO.UpdateJobFields(ctx, item.ID, map[string]any{ + "trigger_at": *task.UrgencyThresholdAt, + "last_error_code": "threshold_moved_future", + "last_scanned_at": &now, + }) + } + if task.Priority != 1 && task.Priority != 2 { + return model.ActiveScheduleJobStatusSkipped, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusSkipped, "task_not_important", now) + } + alreadyScheduled, err := s.isTaskAlreadyScheduled(ctx, item.UserID, item.TaskID, now) + if err != nil { + _ = s.markJobFailed(ctx, item.ID, "schedule_read_failed", err, now) + return "", err + } + if alreadyScheduled { + return model.ActiveScheduleJobStatusSkipped, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusSkipped, "task_already_scheduled", now) + } + + payload := struct { + JobID string `json:"job_id"` + UrgencyThresholdAt time.Time `json:"urgency_threshold_at"` + }{ + JobID: item.ID, + UrgencyThresholdAt: item.TriggerAt, + } + rawPayload, _ := json.Marshal(payload) + jobID := item.ID + resp, err := s.triggerService.CreateAndPublish(ctx, activesvc.TriggerRequest{ + UserID: item.UserID, + TriggerType: trigger.TriggerTypeImportantUrgentTask, + Source: trigger.SourceWorkerDueJob, + TargetType: trigger.TargetTypeTaskPool, + TargetID: item.TaskID, + DedupeKey: item.DedupeKey, + RequestedAt: now, + Payload: rawPayload, + JobID: &jobID, + TraceID: firstNonEmpty(item.TraceID, fmt.Sprintf("trace_active_job_%s", item.ID)), + }) + if err != nil { + _ = s.markJobFailed(ctx, item.ID, "trigger_publish_failed", err, now) + return "", err + } + return model.ActiveScheduleJobStatusTriggered, s.activeDAO.UpdateJobFields(ctx, item.ID, map[string]any{ + "status": model.ActiveScheduleJobStatusTriggered, + "last_trigger_id": &resp.TriggerID, + "last_error_code": nil, + "last_error": nil, + "last_scanned_at": &now, + }) +} + +func (s *Scanner) isTaskAlreadyScheduled(ctx context.Context, userID int, taskID int, now time.Time) (bool, error) { + facts, err := s.scheduleReader.GetScheduleFactsByWindow(ctx, ports.ScheduleWindowRequest{ + UserID: userID, + TargetType: string(trigger.TargetTypeTaskPool), + TargetID: taskID, + WindowStart: now, + WindowEnd: now.Add(24 * time.Hour), + Now: now, + }) + if err != nil { + return false, err + } + return facts.TargetAlreadyScheduled, nil +} + +func (s *Scanner) markJobSkipped(ctx context.Context, jobID string, status string, code string, now time.Time) error { + return s.activeDAO.UpdateJobFields(ctx, jobID, map[string]any{ + "status": status, + "last_error_code": code, + "last_error": nil, + "last_scanned_at": &now, + }) +} + +func (s *Scanner) markJobFailed(ctx context.Context, jobID string, code string, err error, now time.Time) error { + message := "" + if err != nil { + message = err.Error() + } + return s.activeDAO.UpdateJobFields(ctx, jobID, map[string]any{ + "status": model.ActiveScheduleJobStatusFailed, + "last_error_code": code, + "last_error": &message, + "last_scanned_at": &now, + }) +} + +func (s *Scanner) now() time.Time { + if s == nil || s.clock == nil { + return time.Now() + } + return s.clock() +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} diff --git a/backend/active_scheduler/service/preview_confirm.go b/backend/active_scheduler/service/preview_confirm.go index 8abf064..c0a636b 100644 --- a/backend/active_scheduler/service/preview_confirm.go +++ b/backend/active_scheduler/service/preview_confirm.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "errors" - "fmt" "time" activeapply "github.com/LoveLosita/smartflow/backend/active_scheduler/apply" @@ -12,6 +11,7 @@ import ( activepreview "github.com/LoveLosita/smartflow/backend/active_scheduler/preview" "github.com/LoveLosita/smartflow/backend/dao" "github.com/LoveLosita/smartflow/backend/model" + "gorm.io/gorm" ) // PreviewConfirmService 编排第三阶段的预览生成、查询和确认应用。 @@ -88,16 +88,19 @@ func (s *PreviewConfirmService) ConfirmPreview(ctx context.Context, req activeap } previewRow, err := s.activeDAO.GetPreviewByID(ctx, req.PreviewID) if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, activeapply.NewApplyError(activeapply.ErrorCodeTargetNotFound, "预览不存在或已被删除", err) + } return nil, err } if previewRow.UserID != req.UserID { - return nil, fmt.Errorf("preview 不属于当前用户") + return nil, activeapply.NewApplyError(activeapply.ErrorCodeForbidden, "预览不属于当前用户", nil) } if previewRow.ApplyStatus == model.ActiveScheduleApplyStatusApplied { if previewRow.ApplyIdempotencyKey == req.IdempotencyKey { return alreadyAppliedResult(*previewRow), nil } - return nil, fmt.Errorf("preview 已应用,不能使用新的幂等键重复确认") + return nil, activeapply.NewApplyError(activeapply.ErrorCodeAlreadyApplied, "预览已经应用,不能使用新的幂等键重复确认", nil) } applyReq, err := activeapply.ConvertConfirmToApplyRequest(*previewRow, req, now) @@ -106,7 +109,7 @@ func (s *PreviewConfirmService) ConfirmPreview(ctx context.Context, req activeap return nil, err } if len(applyReq.Commands) == 0 { - return nil, fmt.Errorf("当前候选没有可正式应用的日程变更") + return s.markNoopApplied(ctx, *applyReq) } if err = s.markApplying(ctx, *applyReq); err != nil { return nil, err @@ -115,8 +118,9 @@ func (s *PreviewConfirmService) ConfirmPreview(ctx context.Context, req activeap adapterReq := toAdapterRequest(*applyReq) adapterResult, err := s.applyAdapter.ApplyActiveScheduleChanges(ctx, adapterReq) if err != nil { - _ = s.markApplyFailed(ctx, previewRow.ID, applyReq.ApplyID, err) - return nil, err + classifiedErr := classifyAdapterApplyError(err) + _ = s.markApplyFailed(ctx, previewRow.ID, applyReq.ApplyID, classifiedErr) + return nil, classifiedErr } result := activeapply.ApplyActiveScheduleResult{ @@ -155,13 +159,48 @@ func (s *PreviewConfirmService) markApplying(ctx context.Context, req activeappl }) } +// markNoopApplied 处理 notify_only / ask_user / close 这类“确认成功但不写正式日程”的候选。 +// +// 职责边界: +// 1. 只把 preview 标记为已处理,并保留幂等字段,便于同 key 重试直接命中历史结果; +// 2. 不调用 apply adapter,因为这些 change 在转换阶段已经被归类为 skipped_changes; +// 3. 失败时直接返回数据库错误,调用方应按系统错误处理,避免前端误以为确认成功。 +func (s *PreviewConfirmService) markNoopApplied(ctx context.Context, req activeapply.ApplyActiveScheduleRequest) (*activeapply.ConfirmResult, error) { + result := activeapply.ApplyActiveScheduleResult{ + ApplyID: req.ApplyID, + ApplyStatus: activeapply.ApplyStatusApplied, + AppliedChanges: []activeapply.ApplyChange{}, + SkippedChanges: req.SkippedChanges, + RequestHash: req.RequestHash, + NormalizedChangeHash: req.NormalizedChangesHash, + } + if err := s.markApplied(ctx, req, result); err != nil { + return nil, err + } + return &activeapply.ConfirmResult{ + PreviewID: req.PreviewID, + ApplyID: req.ApplyID, + ApplyStatus: activeapply.ApplyStatusApplied, + CandidateID: req.CandidateID, + RequestHash: req.RequestHash, + RequestBodyHash: req.RequestBodyHash, + ApplyRequest: &req, + ApplyResult: &result, + SkippedChanges: req.SkippedChanges, + }, nil +} + func (s *PreviewConfirmService) markApplied(ctx context.Context, req activeapply.ApplyActiveScheduleRequest, result activeapply.ApplyActiveScheduleResult) error { now := s.now() appliedChangesJSON := mustJSON(result.AppliedChanges) appliedEventIDsJSON := mustJSON(result.AppliedEventIDs) return s.activeDAO.UpdatePreviewFields(ctx, req.PreviewID, map[string]any{ "status": model.ActiveSchedulePreviewStatusApplied, + "apply_id": req.ApplyID, "apply_status": model.ActiveScheduleApplyStatusApplied, + "apply_candidate_id": req.CandidateID, + "apply_idempotency_key": req.IdempotencyKey, + "apply_request_hash": req.RequestHash, "applied_changes_json": &appliedChangesJSON, "applied_event_ids_json": &appliedEventIDsJSON, "apply_error": nil, @@ -177,8 +216,19 @@ func (s *PreviewConfirmService) markApplyFailed(ctx context.Context, previewID s if err != nil { message = err.Error() } + status := model.ActiveScheduleApplyStatusFailed + if applyErr, ok := activeapply.AsApplyError(err); ok { + switch applyErr.Code { + case activeapply.ErrorCodeExpired: + status = model.ActiveScheduleApplyStatusExpired + case activeapply.ErrorCodeDBError: + status = model.ActiveScheduleApplyStatusFailed + default: + status = model.ActiveScheduleApplyStatusRejected + } + } updates := map[string]any{ - "apply_status": model.ActiveScheduleApplyStatusFailed, + "apply_status": status, "apply_error": &message, } if applyID != "" { @@ -187,6 +237,40 @@ func (s *PreviewConfirmService) markApplyFailed(ctx context.Context, previewID s return s.activeDAO.UpdatePreviewFields(ctx, previewID, updates) } +// classifyAdapterApplyError 把正式写库 adapter 的错误转换为 confirm 层统一错误码。 +// +// 职责边界: +// 1. 只处理 applyadapter 已声明的业务错误码,保持 API 层只理解 active_scheduler/apply 包; +// 2. 未知错误统一归为 db_error,避免把真实系统故障错误映射为用户可修正的 4xx; +// 3. 原始错误作为 cause 保留,日志和 apply_error 仍能追到 adapter 返回的完整信息。 +func classifyAdapterApplyError(err error) error { + if err == nil { + return nil + } + var adapterErr *applyadapter.ApplyError + if !errors.As(err, &adapterErr) { + return activeapply.NewApplyError(activeapply.ErrorCodeDBError, "主动调度正式写库失败", err) + } + switch adapterErr.Code { + case applyadapter.ErrorCodeInvalidRequest: + return activeapply.NewApplyError(activeapply.ErrorCodeInvalidRequest, adapterErr.Message, err) + case applyadapter.ErrorCodeUnsupportedChangeType: + return activeapply.NewApplyError(activeapply.ErrorCodeUnsupportedChangeType, adapterErr.Message, err) + case applyadapter.ErrorCodeTargetNotFound: + return activeapply.NewApplyError(activeapply.ErrorCodeTargetNotFound, adapterErr.Message, err) + case applyadapter.ErrorCodeTargetCompleted: + return activeapply.NewApplyError(activeapply.ErrorCodeTargetCompleted, adapterErr.Message, err) + case applyadapter.ErrorCodeTargetAlreadyScheduled: + return activeapply.NewApplyError(activeapply.ErrorCodeTargetAlreadySchedule, adapterErr.Message, err) + case applyadapter.ErrorCodeSlotConflict: + return activeapply.NewApplyError(activeapply.ErrorCodeSlotConflict, adapterErr.Message, err) + case applyadapter.ErrorCodeInvalidEditedChanges: + return activeapply.NewApplyError(activeapply.ErrorCodeInvalidEditedChanges, adapterErr.Message, err) + default: + return activeapply.NewApplyError(activeapply.ErrorCodeDBError, adapterErr.Message, err) + } +} + func (s *PreviewConfirmService) now() time.Time { if s == nil || s.clock == nil { return time.Now() diff --git a/backend/active_scheduler/service/trigger.go b/backend/active_scheduler/service/trigger.go new file mode 100644 index 0000000..42f0c74 --- /dev/null +++ b/backend/active_scheduler/service/trigger.go @@ -0,0 +1,270 @@ +package service + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "github.com/LoveLosita/smartflow/backend/active_scheduler/trigger" + "github.com/LoveLosita/smartflow/backend/dao" + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + "github.com/LoveLosita/smartflow/backend/model" + sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" + "github.com/google/uuid" + "gorm.io/gorm" +) + +const triggerDedupeWindow = 30 * time.Minute + +// TriggerRequest 是正式主动调度触发入口的请求 DTO。 +// +// 职责边界: +// 1. 负责承载 API trigger、worker due job、用户反馈归一后的触发事实; +// 2. 不承载 dry-run 结果、preview 快照或 notification provider 参数; +// 3. Payload 只保存触发来源补充信息,不能塞任意业务写库参数。 +type TriggerRequest struct { + UserID int + TriggerType trigger.TriggerType + Source trigger.Source + TargetType trigger.TargetType + TargetID int + FeedbackID string + IdempotencyKey string + DedupeKey string + MockNow *time.Time + IsMockTime bool + RequestedAt time.Time + Payload json.RawMessage + JobID *string + TraceID string +} + +// TriggerResponse 是正式触发写入后的结果。 +type TriggerResponse struct { + TriggerID string `json:"trigger_id"` + Status string `json:"status"` + PreviewID *string `json:"preview_id,omitempty"` + DedupeHit bool `json:"dedupe_hit"` + TraceID string `json:"trace_id,omitempty"` +} + +// TriggerService 负责写入正式 trigger 并发布 active_schedule.triggered 事件。 +// +// 职责边界: +// 1. 只负责触发信号持久化、去重和事件发布; +// 2. 不执行 dry-run、不写 preview、不发飞书; +// 3. outbox 未启用时返回明确错误,避免调用方误以为正式链路已启动。 +type TriggerService struct { + activeDAO *dao.ActiveScheduleDAO + publisher outboxinfra.EventPublisher + clock func() time.Time +} + +func NewTriggerService(activeDAO *dao.ActiveScheduleDAO, publisher outboxinfra.EventPublisher) (*TriggerService, error) { + if activeDAO == nil { + return nil, errors.New("active schedule dao 不能为空") + } + return &TriggerService{ + activeDAO: activeDAO, + publisher: publisher, + clock: time.Now, + }, nil +} + +func (s *TriggerService) SetClock(clock func() time.Time) { + if s != nil && clock != nil { + s.clock = clock + } +} + +// CreateAndPublish 创建正式 trigger 并发布 outbox 事件。 +// +// 步骤化说明: +// 1. 先按主动调度 trigger DTO 做入口校验,确保 mock_now 不会从 worker 入口混入; +// 2. 再用 idempotency_key / dedupe_key 查询已有 trigger,命中则直接返回旧状态; +// 3. 新 trigger 先落库,再发布 outbox;发布失败会把 trigger 标记 failed,便于排障; +// 4. 返回 nil error 只表示事件已入 outbox,不表示 worker 已经生成 preview。 +func (s *TriggerService) CreateAndPublish(ctx context.Context, req TriggerRequest) (*TriggerResponse, error) { + if s == nil || s.activeDAO == nil { + return nil, errors.New("trigger service 未初始化") + } + if s.publisher == nil { + return nil, errors.New("outbox event bus 未启用,无法执行正式主动调度 trigger") + } + + now := s.now() + if req.RequestedAt.IsZero() { + req.RequestedAt = now + } + if req.IsMockTime && req.MockNow == nil { + return nil, errors.New("is_mock_time=true 时 mock_now 不能为空") + } + trig := trigger.ActiveScheduleTrigger{ + UserID: req.UserID, + TriggerType: req.TriggerType, + Source: req.Source, + TargetType: req.TargetType, + TargetID: req.TargetID, + FeedbackID: req.FeedbackID, + IdempotencyKey: req.IdempotencyKey, + MockNow: req.MockNow, + IsMockTime: req.IsMockTime, + RequestedAt: req.RequestedAt, + TraceID: firstNonEmpty(req.TraceID, fmt.Sprintf("trace_active_trigger_%d", now.UnixNano())), + } + if err := trig.Validate(); err != nil { + return nil, err + } + if trig.Source == trigger.SourceAPIDryRun { + return nil, errors.New("api_dry_run 不允许创建正式 trigger") + } + + dedupeKey := strings.TrimSpace(req.DedupeKey) + if dedupeKey == "" { + dedupeKey = BuildTriggerDedupeKey(req.UserID, req.TriggerType, req.TargetType, req.TargetID, req.FeedbackID, req.IdempotencyKey, trig.EffectiveNow(req.RequestedAt)) + } + if existing, ok, err := s.findExistingTrigger(ctx, req.UserID, string(req.TriggerType), req.IdempotencyKey, dedupeKey); err != nil { + return nil, err + } else if ok { + return triggerResponseFromModel(existing, true), nil + } + + payloadJSON := string(req.Payload) + if strings.TrimSpace(payloadJSON) == "" { + payloadJSON = "{}" + } + triggerID := "ast_" + uuid.NewString() + row := &model.ActiveScheduleTrigger{ + ID: triggerID, + UserID: req.UserID, + TriggerType: string(req.TriggerType), + Source: string(req.Source), + TargetType: string(req.TargetType), + TargetID: req.TargetID, + FeedbackID: strings.TrimSpace(req.FeedbackID), + JobID: req.JobID, + IdempotencyKey: strings.TrimSpace(req.IdempotencyKey), + DedupeKey: dedupeKey, + Status: model.ActiveScheduleTriggerStatusPending, + MockNow: req.MockNow, + IsMockTime: req.IsMockTime, + RequestedAt: req.RequestedAt, + PayloadJSON: &payloadJSON, + TraceID: trig.TraceID, + } + if err := s.activeDAO.CreateTrigger(ctx, row); err != nil { + return nil, err + } + + eventPayload := sharedevents.ActiveScheduleTriggeredPayload{ + TriggerID: row.ID, + UserID: row.UserID, + TriggerType: row.TriggerType, + Source: row.Source, + TargetType: row.TargetType, + TargetID: row.TargetID, + FeedbackID: row.FeedbackID, + IdempotencyKey: row.IdempotencyKey, + DedupeKey: row.DedupeKey, + MockNow: row.MockNow, + IsMockTime: row.IsMockTime, + RequestedAt: row.RequestedAt, + Payload: json.RawMessage(payloadJSON), + TraceID: row.TraceID, + } + if err := eventPayload.Validate(); err != nil { + _ = s.markTriggerFailed(ctx, row.ID, "payload_invalid", err) + return nil, err + } + if err := s.publisher.Publish(ctx, outboxinfra.PublishRequest{ + EventType: sharedevents.ActiveScheduleTriggeredEventType, + EventVersion: sharedevents.ActiveScheduleTriggeredEventVersion, + MessageKey: eventPayload.MessageKey(), + AggregateID: eventPayload.AggregateID(), + Payload: eventPayload, + }); err != nil { + _ = s.markTriggerFailed(ctx, row.ID, "outbox_publish_failed", err) + return nil, err + } + + return triggerResponseFromModel(row, false), nil +} + +func (s *TriggerService) findExistingTrigger(ctx context.Context, userID int, triggerType string, idempotencyKey string, dedupeKey string) (*model.ActiveScheduleTrigger, bool, error) { + if strings.TrimSpace(idempotencyKey) != "" { + existing, err := s.activeDAO.FindTriggerByIdempotencyKey(ctx, userID, triggerType, idempotencyKey) + if err == nil { + return existing, true, nil + } + if !errors.Is(err, gorm.ErrRecordNotFound) { + return nil, false, err + } + } + statuses := []string{ + model.ActiveScheduleTriggerStatusPending, + model.ActiveScheduleTriggerStatusProcessing, + model.ActiveScheduleTriggerStatusPreviewGenerated, + } + existing, err := s.activeDAO.FindTriggerByDedupeKey(ctx, dedupeKey, statuses) + if err == nil { + return existing, true, nil + } + if !errors.Is(err, gorm.ErrRecordNotFound) { + return nil, false, err + } + return nil, false, nil +} + +func (s *TriggerService) markTriggerFailed(ctx context.Context, triggerID string, code string, err error) error { + message := "" + if err != nil { + message = err.Error() + } + now := s.now() + return s.activeDAO.UpdateTriggerFields(ctx, triggerID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusFailed, + "last_error_code": code, + "last_error": &message, + "completed_at": &now, + }) +} + +func (s *TriggerService) now() time.Time { + if s == nil || s.clock == nil { + return time.Now() + } + return s.clock() +} + +// BuildTriggerDedupeKey 生成正式触发去重键。 +// +// 说明: +// 1. important_urgent_task 按 30 分钟窗口聚合,避免同一任务反复生成预览; +// 2. unfinished_feedback 优先使用 feedback_id/idempotency_key,不做固定时间窗强去重; +// 3. 参数非法时仍返回可读字符串,调用方会在 trigger.Validate 阶段拒绝非法输入。 +func BuildTriggerDedupeKey(userID int, triggerType trigger.TriggerType, targetType trigger.TargetType, targetID int, feedbackID string, idempotencyKey string, at time.Time) string { + if triggerType == trigger.TriggerTypeUnfinishedFeedback { + return fmt.Sprintf("%d:%s:%s", userID, triggerType, firstNonEmpty(feedbackID, idempotencyKey, fmt.Sprintf("%s:%d", targetType, targetID))) + } + if at.IsZero() { + at = time.Now() + } + windowStart := at.Truncate(triggerDedupeWindow) + return fmt.Sprintf("%d:%s:%s:%d:%s", userID, triggerType, targetType, targetID, windowStart.Format(time.RFC3339)) +} + +func triggerResponseFromModel(row *model.ActiveScheduleTrigger, dedupeHit bool) *TriggerResponse { + if row == nil { + return &TriggerResponse{DedupeHit: dedupeHit} + } + return &TriggerResponse{ + TriggerID: row.ID, + Status: row.Status, + PreviewID: row.PreviewID, + DedupeHit: dedupeHit, + TraceID: row.TraceID, + } +} diff --git a/backend/active_scheduler/service/trigger_outbox.go b/backend/active_scheduler/service/trigger_outbox.go new file mode 100644 index 0000000..6b68204 --- /dev/null +++ b/backend/active_scheduler/service/trigger_outbox.go @@ -0,0 +1,219 @@ +package service + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + "github.com/LoveLosita/smartflow/backend/model" + sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" +) + +const requestedNotificationDedupeWindow = 30 * time.Minute + +// EnqueueActiveScheduleTriggeredInTx 在事务内写入 active_schedule.triggered outbox 消息。 +// +// 职责边界: +// 1. 只负责把已经校验好的事件契约写入 outbox; +// 2. 不负责创建 trigger 记录,trigger 真值应由调用方先落库; +// 3. 失败时返回 error,让上层决定是否整体回滚与重试。 +func EnqueueActiveScheduleTriggeredInTx( + ctx context.Context, + outboxRepo *outboxinfra.Repository, + kafkaCfg kafkabus.Config, + payload sharedevents.ActiveScheduleTriggeredPayload, +) error { + return enqueueContractEventInTx( + ctx, + outboxRepo, + kafkaCfg, + sharedevents.ActiveScheduleTriggeredEventType, + sharedevents.ActiveScheduleTriggeredEventVersion, + payload.MessageKey(), + payload.AggregateID(), + payload.AggregateID(), + payload, + payload.Validate, + ) +} + +// EnqueueNotificationFeishuRequestedInTx 在事务内写入 notification.feishu.requested outbox 消息。 +// +// 职责边界: +// 1. 只做事件契约序列化和 outbox 入队; +// 2. 不负责 notification_records 幂等与 provider 调用; +// 3. 失败时直接返回,让 trigger -> preview -> notification 保持同事务回滚。 +func EnqueueNotificationFeishuRequestedInTx( + ctx context.Context, + outboxRepo *outboxinfra.Repository, + kafkaCfg kafkabus.Config, + payload sharedevents.FeishuNotificationRequestedPayload, +) error { + return enqueueContractEventInTx( + ctx, + outboxRepo, + kafkaCfg, + sharedevents.NotificationFeishuRequestedEventType, + sharedevents.NotificationFeishuRequestedEventVersion, + payload.MessageKey(), + payload.AggregateID(), + payload.AggregateID(), + payload, + payload.Validate, + ) +} + +// BuildTriggeredPayloadFromModel 把持久化 trigger 还原成事件载荷。 +// +// 职责边界: +// 1. 只做 model -> contract DTO 映射; +// 2. 不校验 trigger 是否应该被处理,业务真值判断由 scanner / worker 完成; +// 3. 若 payload_json 不是合法 JSON,返回 error,让调用方回滚本次触发。 +func BuildTriggeredPayloadFromModel(row model.ActiveScheduleTrigger) (sharedevents.ActiveScheduleTriggeredPayload, error) { + var rawPayload json.RawMessage + if row.PayloadJSON != nil && strings.TrimSpace(*row.PayloadJSON) != "" { + rawPayload = json.RawMessage(strings.TrimSpace(*row.PayloadJSON)) + if !json.Valid(rawPayload) { + return sharedevents.ActiveScheduleTriggeredPayload{}, errors.New("trigger payload_json 不是合法 JSON") + } + } + + payload := sharedevents.ActiveScheduleTriggeredPayload{ + TriggerID: row.ID, + UserID: row.UserID, + TriggerType: row.TriggerType, + Source: row.Source, + TargetType: row.TargetType, + TargetID: row.TargetID, + FeedbackID: row.FeedbackID, + IdempotencyKey: row.IdempotencyKey, + DedupeKey: row.DedupeKey, + MockNow: row.MockNow, + IsMockTime: row.IsMockTime, + RequestedAt: row.RequestedAt, + Payload: rawPayload, + TraceID: row.TraceID, + } + if err := payload.Validate(); err != nil { + return sharedevents.ActiveScheduleTriggeredPayload{}, err + } + return payload, nil +} + +// BuildFeishuRequestedPayload 生成通知事件载荷。 +// +// 职责边界: +// 1. 只做 trigger/preview 快照到通知契约的拼装; +// 2. 不判断是否真的要发通知,上层应先根据 decision.ShouldNotify 决定是否调用; +// 3. fallback 文案只做兜底,不替代后续 notification handler 的 provider 级策略。 +func BuildFeishuRequestedPayload( + triggerRow model.ActiveScheduleTrigger, + previewID string, + notificationSummary string, + requestedAt time.Time, +) sharedevents.FeishuNotificationRequestedPayload { + summary := strings.TrimSpace(notificationSummary) + return sharedevents.FeishuNotificationRequestedPayload{ + UserID: triggerRow.UserID, + TriggerID: triggerRow.ID, + PreviewID: strings.TrimSpace(previewID), + TriggerType: triggerRow.TriggerType, + TargetType: triggerRow.TargetType, + TargetID: triggerRow.TargetID, + DedupeKey: BuildNotificationDedupeKey(triggerRow.UserID, triggerRow.TriggerType, triggerRow.RequestedAt), + TargetURL: fmt.Sprintf("/schedule-adjust/%s", strings.TrimSpace(previewID)), + SummaryText: summary, + FallbackText: buildNotificationFallbackText(summary, strings.TrimSpace(previewID)), + TraceID: triggerRow.TraceID, + RequestedAt: requestedAt, + } +} + +// BuildNotificationDedupeKey 生成通知 30 分钟窗口去重键。 +// +// 说明: +// 1. 第一版按 user_id + trigger_type + time_window 聚合; +// 2. 当 requested_at 缺失时回退到当前时间,避免空值直接写出脏 dedupe_key; +// 3. 不拼 preview_id,保证同一窗口内多次重试只会落到同一组通知记录。 +func BuildNotificationDedupeKey(userID int, triggerType string, requestedAt time.Time) string { + if requestedAt.IsZero() { + requestedAt = time.Now() + } + windowStart := requestedAt.Truncate(requestedNotificationDedupeWindow) + return fmt.Sprintf("%d:%s:%s", + userID, + strings.TrimSpace(triggerType), + windowStart.Format(time.RFC3339), + ) +} + +func enqueueContractEventInTx( + ctx context.Context, + outboxRepo *outboxinfra.Repository, + kafkaCfg kafkabus.Config, + eventType string, + eventVersion string, + messageKey string, + aggregateID string, + eventID string, + payload any, + validate func() error, +) error { + if outboxRepo == nil { + return errors.New("outbox repository 不能为空") + } + if validate == nil { + return errors.New("事件校验函数不能为空") + } + if err := validate(); err != nil { + return err + } + + payloadJSON, err := json.Marshal(payload) + if err != nil { + return err + } + + cfg := normalizeKafkaConfig(kafkaCfg) + wrapped := outboxinfra.OutboxEventPayload{ + EventID: strings.TrimSpace(eventID), + EventType: eventType, + EventVersion: strings.TrimSpace(eventVersion), + AggregateID: strings.TrimSpace(aggregateID), + Payload: payloadJSON, + } + _, err = outboxRepo.CreateMessage(ctx, eventType, cfg.Topic, strings.TrimSpace(messageKey), wrapped, cfg.MaxRetry) + return err +} + +func normalizeKafkaConfig(cfg kafkabus.Config) kafkabus.Config { + if strings.TrimSpace(cfg.Topic) == "" { + cfg.Topic = kafkabus.DefaultTopic + } + if cfg.MaxRetry <= 0 { + cfg.MaxRetry = 20 + } + return cfg +} + +func buildNotificationFallbackText(summary string, previewID string) string { + link := fmt.Sprintf("/schedule-adjust/%s", previewID) + if summary == "" { + return "你有一条新的日程调整建议,请查看:" + link + } + return summary + ",请查看:" + link +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return strings.TrimSpace(value) + } + } + return "" +} diff --git a/backend/active_scheduler/service/trigger_pipeline.go b/backend/active_scheduler/service/trigger_pipeline.go new file mode 100644 index 0000000..63bb572 --- /dev/null +++ b/backend/active_scheduler/service/trigger_pipeline.go @@ -0,0 +1,338 @@ +package service + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + activepreview "github.com/LoveLosita/smartflow/backend/active_scheduler/preview" + "github.com/LoveLosita/smartflow/backend/active_scheduler/trigger" + "github.com/LoveLosita/smartflow/backend/dao" + kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + "github.com/LoveLosita/smartflow/backend/model" + sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" + "github.com/google/uuid" + "gorm.io/gorm" + "gorm.io/gorm/clause" +) + +const ( + triggerErrorCodePayloadMismatch = "payload_mismatch" + triggerErrorCodeWorkerFailed = "worker_failed" +) + +// TriggerWorkflowService 负责第四阶段的 trigger -> dry-run -> preview -> notification 编排。 +// +// 职责边界: +// 1. 只推进主动调度 trigger 的后台状态机,不负责启动 outbox worker; +// 2. dry-run 与 preview 复用现有 service,不再单独实现第二套候选生成逻辑; +// 3. notification 只发布 requested 事件,不直接接真实飞书 provider。 +type TriggerWorkflowService struct { + activeDAO *dao.ActiveScheduleDAO + dryRun *DryRunService + outbox *outboxinfra.Repository + kafkaCfg kafkabus.Config + clock func() time.Time +} + +func NewTriggerWorkflowService( + activeDAO *dao.ActiveScheduleDAO, + dryRun *DryRunService, + outboxRepo *outboxinfra.Repository, + kafkaCfg kafkabus.Config, +) (*TriggerWorkflowService, error) { + if activeDAO == nil { + return nil, errors.New("active schedule dao 不能为空") + } + if dryRun == nil { + return nil, errors.New("dry-run service 不能为空") + } + if outboxRepo == nil { + return nil, errors.New("outbox repository 不能为空") + } + return &TriggerWorkflowService{ + activeDAO: activeDAO, + dryRun: dryRun, + outbox: outboxRepo, + kafkaCfg: kafkaCfg, + clock: time.Now, + }, nil +} + +func (s *TriggerWorkflowService) SetClock(clock func() time.Time) { + if s != nil && clock != nil { + s.clock = clock + } +} + +// ProcessTriggeredInTx 在 outbox 消费事务内推进 trigger 主链路。 +// +// 步骤化说明: +// 1. 先锁 trigger 行,确保同一 trigger 在并发 worker 下只能由一个事务推进; +// 2. 再把状态切到 processing,避免排障时看不出消息已经被消费; +// 3. 复用 dry-run + preview service 生成预览;若发现已有 preview,则直接复用,避免重复写库; +// 4. preview 成功后回写 trigger 状态,并在同一事务里补发 notification.requested outbox; +// 5. 任一步失败都返回 error,由外层 handler 负责记录 failed 状态并触发 outbox retry。 +func (s *TriggerWorkflowService) ProcessTriggeredInTx( + ctx context.Context, + tx *gorm.DB, + payload sharedevents.ActiveScheduleTriggeredPayload, +) error { + if s == nil || s.activeDAO == nil || s.dryRun == nil || s.outbox == nil { + return errors.New("trigger workflow service 未初始化") + } + if tx == nil { + return errors.New("gorm tx 不能为空") + } + if err := payload.Validate(); err != nil { + return err + } + + now := s.now() + triggerRow, err := s.lockTrigger(ctx, tx, payload.TriggerID) + if err != nil { + return err + } + + txDAO := s.activeDAO.WithTx(tx) + if completed, err := s.tryFinishByTerminalStatus(ctx, txDAO, *triggerRow); err != nil || completed { + return err + } + if handled, err := s.tryRejectMismatchedPayload(ctx, txDAO, *triggerRow, payload, now); err != nil || handled { + return err + } + + if err := txDAO.UpdateTriggerFields(ctx, triggerRow.ID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusProcessing, + "processed_at": &now, + "last_error_code": nil, + "last_error": nil, + }); err != nil { + return err + } + + existingPreview, err := txDAO.GetPreviewByTriggerID(ctx, triggerRow.ID) + switch { + case err == nil: + return s.finishWithExistingPreview(ctx, txDAO, *triggerRow, *existingPreview, now) + case errors.Is(err, gorm.ErrRecordNotFound): + // 继续创建新 preview。 + default: + return err + } + + domainTrigger := buildDomainTriggerFromModel(*triggerRow, payload) + dryRunResult, err := s.dryRun.DryRun(ctx, domainTrigger) + if err != nil { + return err + } + if len(dryRunResult.Candidates) == 0 { + return s.markClosedWithoutPreview(ctx, txDAO, triggerRow.ID, now) + } + if !dryRunResult.Observation.Decision.ShouldNotify && !dryRunResult.Observation.Decision.ShouldWritePreview { + return s.markClosedWithoutPreview(ctx, txDAO, triggerRow.ID, now) + } + + previewService, err := activepreview.NewService(txDAO) + if err != nil { + return err + } + previewResp, err := previewService.CreatePreview(ctx, activepreview.CreatePreviewRequest{ + ActiveContext: dryRunResult.Context, + Observation: dryRunResult.Observation, + Candidates: dryRunResult.Candidates, + TriggerID: triggerRow.ID, + GeneratedAt: now, + }) + if err != nil { + return err + } + + previewID := previewResp.Detail.PreviewID + if err = txDAO.UpdateTriggerFields(ctx, triggerRow.ID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusPreviewGenerated, + "preview_id": &previewID, + "completed_at": &now, + "last_error_code": nil, + "last_error": nil, + }); err != nil { + return err + } + + if !dryRunResult.Observation.Decision.ShouldNotify { + return nil + } + + notificationPayload := BuildFeishuRequestedPayload( + *triggerRow, + previewID, + previewResp.Detail.Notification, + now, + ) + return EnqueueNotificationFeishuRequestedInTx(ctx, s.outbox.WithTx(tx), s.kafkaCfg, notificationPayload) +} + +// MarkTriggerFailedBestEffort 在事务外补记 trigger failed 状态,供 outbox retry 前排障。 +// +// 职责边界: +// 1. 只做 best-effort 状态回写,不能影响外层对原始错误的返回; +// 2. 不负责错误分类,当前统一记为 worker_failed; +// 3. 失败时静默返回,让真正的重试仍由 outbox 状态机负责。 +func (s *TriggerWorkflowService) MarkTriggerFailedBestEffort(ctx context.Context, triggerID string, err error) { + if s == nil || s.activeDAO == nil || strings.TrimSpace(triggerID) == "" { + return + } + message := "" + if err != nil { + message = err.Error() + } + _ = s.activeDAO.UpdateTriggerFields(ctx, triggerID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusFailed, + "last_error_code": triggerErrorCodeWorkerFailed, + "last_error": &message, + }) +} + +func (s *TriggerWorkflowService) lockTrigger(ctx context.Context, tx *gorm.DB, triggerID string) (*model.ActiveScheduleTrigger, error) { + var row model.ActiveScheduleTrigger + err := tx.WithContext(ctx). + Clauses(clause.Locking{Strength: "UPDATE"}). + Where("id = ?", triggerID). + First(&row).Error + if err != nil { + return nil, err + } + return &row, nil +} + +func (s *TriggerWorkflowService) tryFinishByTerminalStatus( + ctx context.Context, + txDAO *dao.ActiveScheduleDAO, + row model.ActiveScheduleTrigger, +) (bool, error) { + switch row.Status { + case model.ActiveScheduleTriggerStatusPreviewGenerated, + model.ActiveScheduleTriggerStatusClosed, + model.ActiveScheduleTriggerStatusSkipped, + model.ActiveScheduleTriggerStatusRejected: + return true, nil + case model.ActiveScheduleTriggerStatusPending, + model.ActiveScheduleTriggerStatusProcessing, + model.ActiveScheduleTriggerStatusFailed: + return false, nil + default: + // 1. 遇到未知状态时,不直接报错中断,而是继续按 processing 流程推进。 + // 2. 这样可以兼容迁移期历史脏数据,避免单条异常阻塞整批消费。 + // 3. 真实状态最终会被下面的 UpdateTriggerFields 覆盖为 processing。 + return false, nil + } +} + +func (s *TriggerWorkflowService) tryRejectMismatchedPayload( + ctx context.Context, + txDAO *dao.ActiveScheduleDAO, + row model.ActiveScheduleTrigger, + payload sharedevents.ActiveScheduleTriggeredPayload, + now time.Time, +) (bool, error) { + mismatchReason := buildPayloadMismatchReason(row, payload) + if mismatchReason == "" { + return false, nil + } + if err := txDAO.UpdateTriggerFields(ctx, row.ID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusRejected, + "last_error_code": triggerErrorCodePayloadMismatch, + "last_error": &mismatchReason, + "completed_at": &now, + }); err != nil { + return false, err + } + return true, nil +} + +func (s *TriggerWorkflowService) finishWithExistingPreview( + ctx context.Context, + txDAO *dao.ActiveScheduleDAO, + triggerRow model.ActiveScheduleTrigger, + previewRow model.ActiveSchedulePreview, + now time.Time, +) error { + previewID := previewRow.ID + return txDAO.UpdateTriggerFields(ctx, triggerRow.ID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusPreviewGenerated, + "preview_id": &previewID, + "completed_at": &now, + "last_error_code": nil, + "last_error": nil, + }) +} + +func (s *TriggerWorkflowService) markClosedWithoutPreview( + ctx context.Context, + txDAO *dao.ActiveScheduleDAO, + triggerID string, + now time.Time, +) error { + return txDAO.UpdateTriggerFields(ctx, triggerID, map[string]any{ + "status": model.ActiveScheduleTriggerStatusClosed, + "completed_at": &now, + "last_error_code": nil, + "last_error": nil, + }) +} + +func (s *TriggerWorkflowService) now() time.Time { + if s == nil || s.clock == nil { + return time.Now() + } + return s.clock() +} + +func buildDomainTriggerFromModel( + row model.ActiveScheduleTrigger, + payload sharedevents.ActiveScheduleTriggeredPayload, +) trigger.ActiveScheduleTrigger { + mockNow := row.MockNow + if mockNow == nil && payload.MockNow != nil { + mockNow = payload.MockNow + } + traceID := strings.TrimSpace(row.TraceID) + if traceID == "" { + traceID = strings.TrimSpace(payload.TraceID) + } + if traceID == "" { + traceID = "trace_active_trigger_" + uuid.NewString() + } + return trigger.ActiveScheduleTrigger{ + TriggerID: row.ID, + UserID: row.UserID, + TriggerType: trigger.TriggerType(row.TriggerType), + Source: trigger.Source(row.Source), + TargetType: trigger.TargetType(row.TargetType), + TargetID: row.TargetID, + FeedbackID: row.FeedbackID, + IdempotencyKey: row.IdempotencyKey, + MockNow: mockNow, + IsMockTime: row.IsMockTime || payload.IsMockTime, + RequestedAt: row.RequestedAt, + TraceID: traceID, + } +} + +func buildPayloadMismatchReason(row model.ActiveScheduleTrigger, payload sharedevents.ActiveScheduleTriggeredPayload) string { + switch { + case row.UserID != payload.UserID: + return fmt.Sprintf("trigger 事件 user_id 不一致: row=%d payload=%d", row.UserID, payload.UserID) + case row.TriggerType != payload.TriggerType: + return fmt.Sprintf("trigger 事件 trigger_type 不一致: row=%s payload=%s", row.TriggerType, payload.TriggerType) + case row.TargetType != payload.TargetType: + return fmt.Sprintf("trigger 事件 target_type 不一致: row=%s payload=%s", row.TargetType, payload.TargetType) + case row.TargetID != payload.TargetID: + return fmt.Sprintf("trigger 事件 target_id 不一致: row=%d payload=%d", row.TargetID, payload.TargetID) + default: + return "" + } +} diff --git a/backend/api/active_schedule.go b/backend/api/active_schedule.go index 73e244a..47f7573 100644 --- a/backend/api/active_schedule.go +++ b/backend/api/active_schedule.go @@ -2,6 +2,8 @@ package api import ( "context" + "encoding/json" + "errors" "fmt" "net/http" "time" @@ -12,6 +14,7 @@ import ( "github.com/LoveLosita/smartflow/backend/active_scheduler/trigger" "github.com/LoveLosita/smartflow/backend/respond" "github.com/gin-gonic/gin" + "gorm.io/gorm" ) // ActiveScheduleAPI 承载主动调度开发期和验收期 API。 @@ -23,12 +26,14 @@ import ( type ActiveScheduleAPI struct { dryRunService *activesvc.DryRunService previewConfirmService *activesvc.PreviewConfirmService + triggerService *activesvc.TriggerService } -func NewActiveScheduleAPI(dryRunService *activesvc.DryRunService, previewConfirmService *activesvc.PreviewConfirmService) *ActiveScheduleAPI { +func NewActiveScheduleAPI(dryRunService *activesvc.DryRunService, previewConfirmService *activesvc.PreviewConfirmService, triggerService *activesvc.TriggerService) *ActiveScheduleAPI { return &ActiveScheduleAPI{ dryRunService: dryRunService, previewConfirmService: previewConfirmService, + triggerService: triggerService, } } @@ -39,6 +44,7 @@ type ActiveScheduleDryRunRequest struct { FeedbackID string `json:"feedback_id"` IdempotencyKey string `json:"idempotency_key"` MockNow *time.Time `json:"mock_now"` + Payload any `json:"payload"` } // DryRun 同步执行主动调度诊断,不写 preview、不发通知、不修改正式日程。 @@ -81,6 +87,51 @@ func (api *ActiveScheduleAPI) DryRun(c *gin.Context) { c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, result)) } +// Trigger 写入正式主动调度 trigger 并发布 active_schedule.triggered。 +func (api *ActiveScheduleAPI) Trigger(c *gin.Context) { + if api == nil || api.triggerService == nil { + c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("主动调度 trigger service 未初始化"))) + return + } + + var req ActiveScheduleDryRunRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, respond.WrongParamType) + return + } + rawPayload, err := json.Marshal(req.Payload) + if err != nil { + c.JSON(http.StatusBadRequest, respond.WrongParamType) + return + } + if string(rawPayload) == "null" { + rawPayload = []byte("{}") + } + + now := time.Now() + ctx, cancel := context.WithTimeout(c.Request.Context(), 3*time.Second) + defer cancel() + result, err := api.triggerService.CreateAndPublish(ctx, activesvc.TriggerRequest{ + UserID: c.GetInt("user_id"), + TriggerType: trigger.TriggerType(req.TriggerType), + Source: trigger.SourceAPITrigger, + TargetType: trigger.TargetType(req.TargetType), + TargetID: req.TargetID, + FeedbackID: req.FeedbackID, + IdempotencyKey: req.IdempotencyKey, + MockNow: req.MockNow, + IsMockTime: req.MockNow != nil, + RequestedAt: now, + Payload: rawPayload, + TraceID: fmt.Sprintf("trace_api_trigger_%d_%d", c.GetInt("user_id"), now.UnixNano()), + }) + if err != nil { + respond.DealWithError(c, err) + return + } + c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, result)) +} + // CreatePreview 先同步 dry-run,再把 top1 候选固化为待确认预览。 func (api *ActiveScheduleAPI) CreatePreview(c *gin.Context) { if api == nil || api.dryRunService == nil || api.previewConfirmService == nil { @@ -168,12 +219,85 @@ func (api *ActiveScheduleAPI) ConfirmPreview(c *gin.Context) { defer cancel() result, err := api.previewConfirmService.ConfirmPreview(ctx, req) if err != nil { - respond.DealWithError(c, err) + writeActiveScheduleConfirmError(c, err) return } c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, result)) } +// writeActiveScheduleConfirmError 将 confirm/apply 的可预期业务拒绝映射为 4xx。 +// +// 职责边界: +// 1. 只处理主动调度 confirm/apply 链路已经分类的 ApplyError; +// 2. 不吞掉数据库、超时、panic recover 等系统错误,未知错误继续交给通用 respond 走 500; +// 3. 响应体保留 error_code / error_message,便于前端按过期、冲突、越权等场景给出明确交互。 +func writeActiveScheduleConfirmError(c *gin.Context, err error) { + if applyErr, ok := activeapply.AsApplyError(err); ok { + status := activeScheduleApplyHTTPStatus(applyErr.Code) + message := applyErr.Message + if message == "" { + message = applyErr.Error() + } + applyStatus := activeapply.ApplyStatusRejected + if applyErr.Code == activeapply.ErrorCodeExpired { + applyStatus = activeapply.ApplyStatusExpired + } + if applyErr.Code == activeapply.ErrorCodeDBError { + applyStatus = activeapply.ApplyStatusFailed + } + c.JSON(status, respond.RespWithData(respond.Response{ + Status: fmt.Sprintf("%d", status), + Info: message, + }, activeapply.ConfirmResult{ + ApplyStatus: applyStatus, + ErrorCode: applyErr.Code, + ErrorMessage: message, + })) + return + } + if errors.Is(err, gorm.ErrRecordNotFound) { + c.JSON(http.StatusNotFound, respond.RespWithData(respond.Response{ + Status: fmt.Sprintf("%d", http.StatusNotFound), + Info: "预览不存在或已被删除", + }, activeapply.ConfirmResult{ + ApplyStatus: activeapply.ApplyStatusRejected, + ErrorCode: activeapply.ErrorCodeTargetNotFound, + ErrorMessage: "预览不存在或已被删除", + })) + return + } + respond.DealWithError(c, err) +} + +// activeScheduleApplyHTTPStatus 只负责错误码到 HTTP 语义的稳定映射。 +// +// 说明: +// 1. 请求体/编辑范围问题返回 400; +// 2. 越权返回 403,目标缺失返回 404; +// 3. 过期、幂等冲突、节次冲突、目标状态变化统一返回 409,提示前端刷新预览或重新生成。 +func activeScheduleApplyHTTPStatus(code activeapply.ErrorCode) int { + switch code { + case activeapply.ErrorCodeInvalidRequest, + activeapply.ErrorCodeInvalidEditedChanges, + activeapply.ErrorCodeUnsupportedChangeType: + return http.StatusBadRequest + case activeapply.ErrorCodeForbidden: + return http.StatusForbidden + case activeapply.ErrorCodeTargetNotFound: + return http.StatusNotFound + case activeapply.ErrorCodeExpired, + activeapply.ErrorCodeIdempotencyConflict, + activeapply.ErrorCodeBaseVersionChanged, + activeapply.ErrorCodeTargetCompleted, + activeapply.ErrorCodeTargetAlreadySchedule, + activeapply.ErrorCodeSlotConflict, + activeapply.ErrorCodeAlreadyApplied: + return http.StatusConflict + default: + return http.StatusInternalServerError + } +} + type nilServiceError string func (e nilServiceError) Error() string { diff --git a/backend/api/container.go b/backend/api/container.go index f119c77..1154a4f 100644 --- a/backend/api/container.go +++ b/backend/api/container.go @@ -9,4 +9,5 @@ type ApiHandlers struct { AgentHandler *AgentHandler MemoryHandler *MemoryHandler ActiveSchedule *ActiveScheduleAPI + Notification *NotificationAPI } diff --git a/backend/api/notification.go b/backend/api/notification.go new file mode 100644 index 0000000..ac78876 --- /dev/null +++ b/backend/api/notification.go @@ -0,0 +1,129 @@ +package api + +import ( + "context" + "errors" + "net/http" + "time" + + "github.com/LoveLosita/smartflow/backend/notification" + "github.com/LoveLosita/smartflow/backend/respond" + "github.com/gin-gonic/gin" +) + +const notificationAPITimeout = 8 * time.Second + +// NotificationAPI 承载当前用户的外部通知通道配置接口。 +// +// 职责边界: +// 1. 只负责从 JWT 上下文取得当前 user_id、绑定请求体并调用 notification.ChannelService; +// 2. 不直接读写 user_notification_channels,避免 API 层绕过 webhook 校验和脱敏规则; +// 3. 不参与主动调度、notification_records 状态机和 outbox 消费。 +type NotificationAPI struct { + channelService *notification.ChannelService +} + +func NewNotificationAPI(channelService *notification.ChannelService) *NotificationAPI { + return &NotificationAPI{channelService: channelService} +} + +type saveFeishuWebhookRequest struct { + Enabled *bool `json:"enabled"` + WebhookURL string `json:"webhook_url" binding:"required"` + AuthType string `json:"auth_type"` + BearerToken string `json:"bearer_token"` +} + +// GetFeishuWebhook 查询当前用户的飞书 Webhook 触发器配置。 +func (api *NotificationAPI) GetFeishuWebhook(c *gin.Context) { + if api == nil || api.channelService == nil { + c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) + return + } + + ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) + defer cancel() + + channel, err := api.channelService.GetFeishuWebhook(ctx, c.GetInt("user_id")) + if err != nil { + writeNotificationError(c, err) + return + } + c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, channel)) +} + +// SaveFeishuWebhook 幂等保存当前用户的飞书 Webhook 触发器配置。 +func (api *NotificationAPI) SaveFeishuWebhook(c *gin.Context) { + if api == nil || api.channelService == nil { + c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) + return + } + + var req saveFeishuWebhookRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, respond.WrongParamType) + return + } + enabled := true + if req.Enabled != nil { + enabled = *req.Enabled + } + + ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) + defer cancel() + + channel, err := api.channelService.SaveFeishuWebhook(ctx, c.GetInt("user_id"), notification.SaveFeishuWebhookRequest{ + Enabled: enabled, + WebhookURL: req.WebhookURL, + AuthType: req.AuthType, + BearerToken: req.BearerToken, + }) + if err != nil { + writeNotificationError(c, err) + return + } + c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, channel)) +} + +// DeleteFeishuWebhook 删除当前用户的飞书 Webhook 触发器配置。 +func (api *NotificationAPI) DeleteFeishuWebhook(c *gin.Context) { + if api == nil || api.channelService == nil { + c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) + return + } + + ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) + defer cancel() + + if err := api.channelService.DeleteFeishuWebhook(ctx, c.GetInt("user_id")); err != nil { + writeNotificationError(c, err) + return + } + c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, gin.H{"deleted": true})) +} + +// TestFeishuWebhook 发送一条最小业务 JSON 到当前用户配置的飞书 Webhook。 +func (api *NotificationAPI) TestFeishuWebhook(c *gin.Context) { + if api == nil || api.channelService == nil { + c.JSON(http.StatusInternalServerError, respond.InternalError(nilServiceError("通知通道 service 未初始化"))) + return + } + + ctx, cancel := context.WithTimeout(c.Request.Context(), notificationAPITimeout) + defer cancel() + + result, err := api.channelService.TestFeishuWebhook(ctx, c.GetInt("user_id")) + if err != nil { + writeNotificationError(c, err) + return + } + c.JSON(http.StatusOK, respond.RespWithData(respond.Ok, result)) +} + +func writeNotificationError(c *gin.Context, err error) { + if errors.Is(err, notification.ErrInvalidChannelConfig) { + c.JSON(http.StatusBadRequest, respond.WrongParamType) + return + } + respond.DealWithError(c, err) +} diff --git a/backend/cmd/start.go b/backend/cmd/start.go index 05a8e00..6deb943 100644 --- a/backend/cmd/start.go +++ b/backend/cmd/start.go @@ -12,6 +12,7 @@ import ( activeadapters "github.com/LoveLosita/smartflow/backend/active_scheduler/adapters" "github.com/LoveLosita/smartflow/backend/active_scheduler/applyadapter" + activejob "github.com/LoveLosita/smartflow/backend/active_scheduler/job" activepreview "github.com/LoveLosita/smartflow/backend/active_scheduler/preview" activesvc "github.com/LoveLosita/smartflow/backend/active_scheduler/service" "github.com/LoveLosita/smartflow/backend/api" @@ -31,6 +32,7 @@ import ( newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model" newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools" "github.com/LoveLosita/smartflow/backend/newAgent/tools/web" + "github.com/LoveLosita/smartflow/backend/notification" "github.com/LoveLosita/smartflow/backend/pkg" "github.com/LoveLosita/smartflow/backend/routers" "github.com/LoveLosita/smartflow/backend/service" @@ -47,18 +49,21 @@ import ( // 2. 不承载业务逻辑,业务仍然由 service / newAgent / memory 等领域模块负责; // 3. 不决定进程角色,api / worker / all 由 StartAPI、StartWorker、StartAll 选择启动哪些生命周期。 type appRuntime struct { - db *gorm.DB - redisClient *redis.Client - cacheRepo *dao.CacheDAO - userRepo *dao.UserDAO - agentRepo *dao.AgentDAO - agentCache *dao.AgentCache - manager *dao.RepoManager - outboxRepo *outboxinfra.Repository - eventBus *outboxinfra.EventBus - memoryModule *memory.Module - limiter *pkg.RateLimiter - handlers *api.ApiHandlers + db *gorm.DB + redisClient *redis.Client + cacheRepo *dao.CacheDAO + userRepo *dao.UserDAO + agentRepo *dao.AgentDAO + agentCache *dao.AgentCache + manager *dao.RepoManager + outboxRepo *outboxinfra.Repository + eventBus *outboxinfra.EventBus + memoryModule *memory.Module + activeJobScanner *activejob.Scanner + activeTriggerWorkflow *activesvc.TriggerWorkflowService + notificationService *notification.NotificationService + limiter *pkg.RateLimiter + handlers *api.ApiHandlers } // loadConfig 加载应用配置。 @@ -220,7 +225,12 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) { memoryCfg, ) - activeScheduleDryRun, err := buildActiveScheduleDryRunService(db) + activeReaders := activeadapters.NewGormReaders(db) + activeScheduleDryRun, err := activesvc.NewDryRunService(activeadapters.ReadersFromGorm(activeReaders)) + if err != nil { + return nil, err + } + activeScheduleTrigger, err := activesvc.NewTriggerService(manager.ActiveSchedule, eventBus) if err != nil { return nil, err } @@ -228,21 +238,55 @@ func buildRuntime(ctx context.Context) (*appRuntime, error) { if err != nil { return nil, err } - handlers := buildAPIHandlers(userService, taskSv, taskClassService, courseService, scheduleService, agentService, memoryModule, activeScheduleDryRun, activeSchedulePreviewConfirm) + // 1. 生产投递先切到用户级飞书 Webhook provider,mock provider 文件继续保留给后续单测和本地隔离验证。 + // 2. provider 与配置测试接口共用同一个实例,保证“测试成功”和“正式投递”走同一套 URL 校验、JSON 拼装和 HTTP 结果分类。 + feishuProvider, err := notification.NewWebhookFeishuProvider(manager.Notification, notification.WebhookFeishuProviderOptions{ + FrontendBaseURL: viper.GetString("notification.frontendBaseURL"), + }) + if err != nil { + return nil, err + } + notificationService, err := notification.NewNotificationService(manager.ActiveSchedule, feishuProvider, notification.ServiceOptions{}) + if err != nil { + return nil, err + } + notificationChannelService, err := notification.NewChannelService(manager.Notification, feishuProvider, notification.ChannelServiceOptions{}) + if err != nil { + return nil, err + } + var activeTriggerWorkflow *activesvc.TriggerWorkflowService + var activeJobScanner *activejob.Scanner + if eventBus != nil { + activeTriggerWorkflow, err = activesvc.NewTriggerWorkflowService(manager.ActiveSchedule, activeScheduleDryRun, outboxRepo, kafkabus.LoadConfig()) + if err != nil { + return nil, err + } + activeJobScanner, err = activejob.NewScanner(manager.ActiveSchedule, activeadapters.ReadersFromGorm(activeReaders), activeScheduleTrigger, activejob.ScannerOptions{ + ScanEvery: viper.GetDuration("activeScheduler.jobScanEvery"), + Limit: viper.GetInt("activeScheduler.jobScanLimit"), + }) + if err != nil { + return nil, err + } + } + handlers := buildAPIHandlers(userService, taskSv, taskClassService, courseService, scheduleService, agentService, memoryModule, activeScheduleDryRun, activeSchedulePreviewConfirm, activeScheduleTrigger, notificationChannelService) return &appRuntime{ - db: db, - redisClient: rdb, - cacheRepo: cacheRepo, - userRepo: userRepo, - agentRepo: agentRepo, - agentCache: agentCacheRepo, - manager: manager, - outboxRepo: outboxRepo, - eventBus: eventBus, - memoryModule: memoryModule, - limiter: limiter, - handlers: handlers, + db: db, + redisClient: rdb, + cacheRepo: cacheRepo, + userRepo: userRepo, + agentRepo: agentRepo, + agentCache: agentCacheRepo, + manager: manager, + outboxRepo: outboxRepo, + eventBus: eventBus, + memoryModule: memoryModule, + activeJobScanner: activeJobScanner, + activeTriggerWorkflow: activeTriggerWorkflow, + notificationService: notificationService, + limiter: limiter, + handlers: handlers, }, nil } @@ -505,6 +549,8 @@ func buildAPIHandlers( memoryModule *memory.Module, activeScheduleDryRun *activesvc.DryRunService, activeSchedulePreviewConfirm *activesvc.PreviewConfirmService, + activeScheduleTrigger *activesvc.TriggerService, + notificationChannelService *notification.ChannelService, ) *api.ApiHandlers { return &api.ApiHandlers{ UserHandler: api.NewUserHandler(userService), @@ -514,7 +560,8 @@ func buildAPIHandlers( ScheduleHandler: api.NewScheduleAPI(scheduleService), AgentHandler: api.NewAgentHandler(agentService), MemoryHandler: api.NewMemoryHandler(memoryModule), - ActiveSchedule: api.NewActiveScheduleAPI(activeScheduleDryRun, activeSchedulePreviewConfirm), + ActiveSchedule: api.NewActiveScheduleAPI(activeScheduleDryRun, activeSchedulePreviewConfirm, activeScheduleTrigger), + Notification: api.NewNotificationAPI(notificationChannelService), } } @@ -536,11 +583,30 @@ func (r *appRuntime) startWorkers(ctx context.Context) { if r.memoryModule != nil { r.memoryModule.StartWorker(ctx) } + if r.activeJobScanner != nil { + r.activeJobScanner.Start(ctx) + log.Println("Active schedule due job scanner started") + } + if r.notificationService != nil { + r.notificationService.StartRetryLoop(ctx, viper.GetDuration("notification.retryScanEvery"), viper.GetInt("notification.retryBatchSize")) + log.Println("Notification retry scanner started") + } } func (r *appRuntime) registerEventHandlers() error { // 调用目的:worker/all 启动时复用同一套核心事件注册顺序,避免未来新增入口后复制多份 handler 接线。 - return eventsvc.RegisterCoreOutboxHandlers(r.eventBus, r.outboxRepo, r.manager, r.agentRepo, r.cacheRepo, r.memoryModule) + if err := eventsvc.RegisterCoreOutboxHandlers(r.eventBus, r.outboxRepo, r.manager, r.agentRepo, r.cacheRepo, r.memoryModule); err != nil { + return err + } + if err := eventsvc.RegisterActiveScheduleTriggeredHandler(r.eventBus, r.outboxRepo, r.activeTriggerWorkflow); err != nil { + return fmt.Errorf("注册主动调度触发 handler 失败: %w", err) + } + // 调用目的:飞书通知事件消费与 notification retry loop 复用同一个服务实例, + // 保证后续接入真实 provider 时只需要替换启动期注入配置。 + if err := eventsvc.RegisterFeishuNotificationHandler(r.eventBus, r.outboxRepo, r.notificationService); err != nil { + return fmt.Errorf("注册飞书通知 handler 失败: %w", err) + } + return nil } func (r *appRuntime) startHTTP() { diff --git a/backend/config.example.yaml b/backend/config.example.yaml index cc11c26..9af75fa 100644 --- a/backend/config.example.yaml +++ b/backend/config.example.yaml @@ -40,6 +40,12 @@ kafka: retryBatchSize: 100 maxRetry: 20 +# 通知投递配置。 +notification: + frontendBaseURL: "http://localhost:5173" + retryScanEvery: 1m + retryBatchSize: 50 + # 时间与学期边界配置。 time: zone: "Asia/Shanghai" diff --git a/backend/dao/base.go b/backend/dao/base.go index 798b088..ce0140e 100644 --- a/backend/dao/base.go +++ b/backend/dao/base.go @@ -16,6 +16,7 @@ type RepoManager struct { User *UserDAO Agent *AgentDAO ActiveSchedule *ActiveScheduleDAO + Notification *NotificationChannelDAO } func NewManager(db *gorm.DB) *RepoManager { @@ -28,6 +29,7 @@ func NewManager(db *gorm.DB) *RepoManager { User: NewUserDAO(db), Agent: NewAgentDAO(db), ActiveSchedule: NewActiveScheduleDAO(db), + Notification: NewNotificationChannelDAO(db), } } @@ -47,6 +49,7 @@ func (m *RepoManager) WithTx(tx *gorm.DB) *RepoManager { User: m.User.WithTx(tx), Agent: m.Agent.WithTx(tx), ActiveSchedule: m.ActiveSchedule.WithTx(tx), + Notification: m.Notification.WithTx(tx), } } diff --git a/backend/dao/notification_channel.go b/backend/dao/notification_channel.go new file mode 100644 index 0000000..6b4812d --- /dev/null +++ b/backend/dao/notification_channel.go @@ -0,0 +1,127 @@ +package dao + +import ( + "context" + "errors" + "time" + + "github.com/LoveLosita/smartflow/backend/model" + "gorm.io/gorm" + "gorm.io/gorm/clause" +) + +// NotificationChannelDAO 管理用户外部通知通道配置。 +// +// 职责边界: +// 1. 只负责 user_notification_channels 的基础读写; +// 2. 不负责 webhook 请求发送、notification_records 状态机或 outbox 消费; +// 3. webhook_url / bearer_token 的脱敏由 API/service 层处理,DAO 保持真实持久化值。 +type NotificationChannelDAO struct { + db *gorm.DB +} + +func NewNotificationChannelDAO(db *gorm.DB) *NotificationChannelDAO { + return &NotificationChannelDAO{db: db} +} + +func (d *NotificationChannelDAO) WithTx(tx *gorm.DB) *NotificationChannelDAO { + return &NotificationChannelDAO{db: tx} +} + +func (d *NotificationChannelDAO) ensureDB() error { + if d == nil || d.db == nil { + return errors.New("notification channel dao 未初始化") + } + return nil +} + +// UpsertUserNotificationChannel 按 user_id + channel 幂等保存用户通知配置。 +// +// 说明: +// 1. 只覆盖开关、webhook、鉴权配置和 updated_at; +// 2. 不清空 last_test_*,避免用户保存配置后丢掉最近一次测试结果; +// 3. channel.ID 由数据库自增,调用方不应依赖传入 ID。 +func (d *NotificationChannelDAO) UpsertUserNotificationChannel(ctx context.Context, channel *model.UserNotificationChannel) error { + if err := d.ensureDB(); err != nil { + return err + } + if channel == nil || channel.UserID <= 0 || channel.Channel == "" { + return errors.New("notification channel 必须包含 user_id 和 channel") + } + now := time.Now() + values := map[string]any{ + "user_id": channel.UserID, + "channel": channel.Channel, + "enabled": channel.Enabled, + "webhook_url": channel.WebhookURL, + "auth_type": channel.AuthType, + "bearer_token": channel.BearerToken, + "created_at": now, + "updated_at": now, + } + return d.db.WithContext(ctx). + Model(&model.UserNotificationChannel{}). + Clauses(clause.OnConflict{ + Columns: []clause.Column{{Name: "user_id"}, {Name: "channel"}}, + DoUpdates: clause.Assignments(map[string]any{ + "enabled": channel.Enabled, + "webhook_url": channel.WebhookURL, + "auth_type": channel.AuthType, + "bearer_token": channel.BearerToken, + "updated_at": now, + }), + }). + Create(values).Error +} + +// GetUserNotificationChannel 查询用户指定通知通道配置。 +func (d *NotificationChannelDAO) GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*model.UserNotificationChannel, error) { + if err := d.ensureDB(); err != nil { + return nil, err + } + if userID <= 0 || channel == "" { + return nil, gorm.ErrRecordNotFound + } + var row model.UserNotificationChannel + err := d.db.WithContext(ctx). + Where("user_id = ? AND channel = ?", userID, channel). + First(&row).Error + if err != nil { + return nil, err + } + return &row, nil +} + +// DeleteUserNotificationChannel 删除用户指定通知通道配置。 +// +// 说明:当前表不保留软删除列;删除后再次保存会重新创建配置。 +func (d *NotificationChannelDAO) DeleteUserNotificationChannel(ctx context.Context, userID int, channel string) error { + if err := d.ensureDB(); err != nil { + return err + } + if userID <= 0 || channel == "" { + return nil + } + return d.db.WithContext(ctx). + Where("user_id = ? AND channel = ?", userID, channel). + Delete(&model.UserNotificationChannel{}).Error +} + +// UpdateUserNotificationChannelTestResult 回写用户 webhook 测试结果。 +func (d *NotificationChannelDAO) UpdateUserNotificationChannelTestResult(ctx context.Context, userID int, channel string, status string, testErr string, testedAt time.Time) error { + if err := d.ensureDB(); err != nil { + return err + } + if userID <= 0 || channel == "" { + return errors.New("user_id 和 channel 不能为空") + } + updates := map[string]any{ + "last_test_status": status, + "last_test_error": testErr, + "last_test_at": &testedAt, + } + return d.db.WithContext(ctx). + Model(&model.UserNotificationChannel{}). + Where("user_id = ? AND channel = ?", userID, channel). + Updates(updates).Error +} diff --git a/backend/inits/mysql.go b/backend/inits/mysql.go index 6f27a88..f87a552 100644 --- a/backend/inits/mysql.go +++ b/backend/inits/mysql.go @@ -25,6 +25,7 @@ func autoMigrateModels(db *gorm.DB) error { &model.ActiveScheduleTrigger{}, &model.ActiveSchedulePreview{}, &model.NotificationRecord{}, + &model.UserNotificationChannel{}, &model.AgentOutboxMessage{}, &model.AgentScheduleState{}, &model.AgentStateSnapshotRecord{}, diff --git a/backend/model/notification_channel.go b/backend/model/notification_channel.go new file mode 100644 index 0000000..d794648 --- /dev/null +++ b/backend/model/notification_channel.go @@ -0,0 +1,43 @@ +package model + +import ( + "time" +) + +const ( + // NotificationChannelFeishuWebhook 表示用户配置的是飞书 Webhook 触发器。 + NotificationChannelFeishuWebhook = "feishu_webhook" +) + +const ( + // NotificationAuthTypeNone 表示 webhook 不需要额外鉴权头。 + NotificationAuthTypeNone = "none" + // NotificationAuthTypeBearer 表示 webhook 需要 Authorization: Bearer token。 + NotificationAuthTypeBearer = "bearer" +) + +// UserNotificationChannel 保存单个用户的外部通知通道配置。 +// +// 职责边界: +// 1. 只记录 user_id 到具体通知 provider 配置的映射; +// 2. 不记录 notification_records 投递状态,投递状态仍属于 NotificationRecord; +// 3. 当前 webhook_url / bearer_token 暂以明文字段承载,接口和日志必须脱敏;后续接入统一密钥加密能力后再替换存储实现。 +type UserNotificationChannel struct { + ID int64 `gorm:"column:id;primaryKey;autoIncrement" json:"id"` + + UserID int `gorm:"column:user_id;not null;uniqueIndex:uk_user_notification_channel,priority:1;index:idx_user_notification_channel_user"` + Channel string `gorm:"column:channel;type:varchar(32);not null;uniqueIndex:uk_user_notification_channel,priority:2"` + Enabled bool `gorm:"column:enabled;not null;default:true"` + WebhookURL string `gorm:"column:webhook_url;type:text;not null"` + AuthType string `gorm:"column:auth_type;type:varchar(32);not null;default:'none'"` + BearerToken string `gorm:"column:bearer_token;type:text;not null"` + + LastTestStatus string `gorm:"column:last_test_status;type:varchar(32)"` + LastTestError string `gorm:"column:last_test_error;type:text"` + LastTestAt *time.Time `gorm:"column:last_test_at"` + + CreatedAt time.Time `gorm:"column:created_at;autoCreateTime"` + UpdatedAt time.Time `gorm:"column:updated_at;autoUpdateTime"` +} + +func (UserNotificationChannel) TableName() string { return "user_notification_channels" } diff --git a/backend/notification/channel_service.go b/backend/notification/channel_service.go new file mode 100644 index 0000000..8227704 --- /dev/null +++ b/backend/notification/channel_service.go @@ -0,0 +1,222 @@ +package notification + +import ( + "context" + "errors" + "strings" + "time" + + "github.com/LoveLosita/smartflow/backend/model" + "gorm.io/gorm" +) + +const ( + ChannelTestStatusSuccess = "success" + ChannelTestStatusFailed = "failed" +) + +var ErrInvalidChannelConfig = errors.New("notification channel config invalid") + +type UserNotificationChannelStore interface { + UserNotificationChannelReader + UpsertUserNotificationChannel(ctx context.Context, channel *model.UserNotificationChannel) error + DeleteUserNotificationChannel(ctx context.Context, userID int, channel string) error + UpdateUserNotificationChannelTestResult(ctx context.Context, userID int, channel string, status string, testErr string, testedAt time.Time) error +} + +type SaveFeishuWebhookRequest struct { + Enabled bool + WebhookURL string + AuthType string + BearerToken string +} + +type ChannelResponse struct { + Channel string `json:"channel"` + Enabled bool `json:"enabled"` + Configured bool `json:"configured"` + WebhookURLMask string `json:"webhook_url_mask,omitempty"` + AuthType string `json:"auth_type"` + HasBearerToken bool `json:"has_bearer_token"` + LastTestStatus string `json:"last_test_status,omitempty"` + LastTestError string `json:"last_test_error,omitempty"` + LastTestAt *time.Time `json:"last_test_at,omitempty"` +} + +type TestResult struct { + Channel ChannelResponse `json:"channel"` + Status string `json:"status"` + Outcome string `json:"outcome"` + Message string `json:"message,omitempty"` + TraceID string `json:"trace_id,omitempty"` + SentAt time.Time `json:"sent_at"` + Skipped bool `json:"skipped"` + Provider string `json:"provider"` +} + +type ChannelServiceOptions struct { + Now func() time.Time +} + +// ChannelService 管理用户通知通道配置和测试发送。 +// +// 职责边界: +// 1. 负责保存、查询、删除当前用户的飞书 webhook 配置; +// 2. 负责调用同一套 provider 发送测试事件并回写 last_test_*; +// 3. 不参与主动调度 trigger / preview / notification_records 状态机。 +type ChannelService struct { + store UserNotificationChannelStore + provider FeishuProvider + now func() time.Time +} + +func NewChannelService(store UserNotificationChannelStore, provider FeishuProvider, opts ChannelServiceOptions) (*ChannelService, error) { + if store == nil { + return nil, errors.New("notification channel store is nil") + } + if provider == nil { + return nil, errors.New("feishu provider is nil") + } + now := opts.Now + if now == nil { + now = time.Now + } + return &ChannelService{ + store: store, + provider: provider, + now: now, + }, nil +} + +func (s *ChannelService) GetFeishuWebhook(ctx context.Context, userID int) (ChannelResponse, error) { + if userID <= 0 { + return ChannelResponse{}, ErrInvalidChannelConfig + } + row, err := s.store.GetUserNotificationChannel(ctx, userID, model.NotificationChannelFeishuWebhook) + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return ChannelResponse{ + Channel: model.NotificationChannelFeishuWebhook, + AuthType: model.NotificationAuthTypeNone, + Configured: false, + }, nil + } + return ChannelResponse{}, err + } + return responseFromChannel(row), nil +} + +func (s *ChannelService) SaveFeishuWebhook(ctx context.Context, userID int, req SaveFeishuWebhookRequest) (ChannelResponse, error) { + if userID <= 0 { + return ChannelResponse{}, ErrInvalidChannelConfig + } + webhookURL := strings.TrimSpace(req.WebhookURL) + if err := ValidateFeishuWebhookURL(webhookURL); err != nil { + return ChannelResponse{}, ErrInvalidChannelConfig + } + authType := normalizeAuthType(req.AuthType) + bearerToken := strings.TrimSpace(req.BearerToken) + if authType == model.NotificationAuthTypeBearer && bearerToken == "" { + return ChannelResponse{}, ErrInvalidChannelConfig + } + row := &model.UserNotificationChannel{ + UserID: userID, + Channel: model.NotificationChannelFeishuWebhook, + Enabled: req.Enabled, + WebhookURL: webhookURL, + AuthType: authType, + BearerToken: bearerToken, + } + if err := s.store.UpsertUserNotificationChannel(ctx, row); err != nil { + return ChannelResponse{}, err + } + return s.GetFeishuWebhook(ctx, userID) +} + +func (s *ChannelService) DeleteFeishuWebhook(ctx context.Context, userID int) error { + if userID <= 0 { + return ErrInvalidChannelConfig + } + return s.store.DeleteUserNotificationChannel(ctx, userID, model.NotificationChannelFeishuWebhook) +} + +func (s *ChannelService) TestFeishuWebhook(ctx context.Context, userID int) (TestResult, error) { + if userID <= 0 { + return TestResult{}, ErrInvalidChannelConfig + } + now := s.now() + traceID := "trace_feishu_webhook_test" + sendResult, sendErr := s.provider.Send(ctx, FeishuSendRequest{ + NotificationID: 0, + UserID: userID, + TriggerID: "ast_test_webhook", + PreviewID: "asp_test_webhook", + TriggerType: "manual_test", + TargetType: "notification_channel", + TargetID: 0, + TargetURL: "/schedule-adjust/asp_test_webhook", + MessageText: "这是一条 SmartFlow 飞书 Webhook 测试消息。", + TraceID: traceID, + AttemptCount: 1, + }) + if sendErr != nil { + return TestResult{}, sendErr + } + + status := ChannelTestStatusFailed + testErr := strings.TrimSpace(sendResult.ErrorMessage) + if sendResult.Outcome == FeishuSendOutcomeSuccess { + status = ChannelTestStatusSuccess + testErr = "" + } + if sendResult.Outcome == FeishuSendOutcomeSkipped && testErr == "" { + testErr = "飞书 webhook 未配置或未启用" + } + if err := s.store.UpdateUserNotificationChannelTestResult(ctx, userID, model.NotificationChannelFeishuWebhook, status, testErr, now); err != nil { + return TestResult{}, err + } + channel, err := s.GetFeishuWebhook(ctx, userID) + if err != nil { + return TestResult{}, err + } + return TestResult{ + Channel: channel, + Status: status, + Outcome: string(sendResult.Outcome), + Message: testErr, + TraceID: traceID, + SentAt: now, + Skipped: sendResult.Outcome == FeishuSendOutcomeSkipped, + Provider: ChannelFeishu, + }, nil +} + +func responseFromChannel(row *model.UserNotificationChannel) ChannelResponse { + if row == nil { + return ChannelResponse{ + Channel: model.NotificationChannelFeishuWebhook, + AuthType: model.NotificationAuthTypeNone, + Configured: false, + } + } + return ChannelResponse{ + Channel: row.Channel, + Enabled: row.Enabled, + Configured: strings.TrimSpace(row.WebhookURL) != "", + WebhookURLMask: MaskWebhookURL(row.WebhookURL), + AuthType: normalizeAuthType(row.AuthType), + HasBearerToken: strings.TrimSpace(row.BearerToken) != "", + LastTestStatus: row.LastTestStatus, + LastTestError: row.LastTestError, + LastTestAt: row.LastTestAt, + } +} + +func normalizeAuthType(authType string) string { + switch strings.ToLower(strings.TrimSpace(authType)) { + case model.NotificationAuthTypeBearer: + return model.NotificationAuthTypeBearer + default: + return model.NotificationAuthTypeNone + } +} diff --git a/backend/notification/dedupe.go b/backend/notification/dedupe.go new file mode 100644 index 0000000..d018fe8 --- /dev/null +++ b/backend/notification/dedupe.go @@ -0,0 +1,32 @@ +package notification + +import ( + "fmt" + "strings" + "time" +) + +const ( + // DefaultFeishuDedupeWindow 是 notification 第一版固定的 30 分钟去重窗口。 + DefaultFeishuDedupeWindow = 30 * time.Minute +) + +// BuildTimeWindowDedupeKey 构造“user_id + trigger_type + time_window”去重键。 +// +// 职责边界: +// 1. 供事件发布方在生成 `notification.feishu.requested` payload 时复用; +// 2. 只负责把 30 分钟窗口归一成稳定 key,不负责落 notification_records; +// 3. unfinished_feedback 若要改用 feedback_id / idempotency_key,可不使用这个 helper。 +func BuildTimeWindowDedupeKey(userID int, triggerType string, requestedAt time.Time, window time.Duration) string { + if window <= 0 { + window = DefaultFeishuDedupeWindow + } + if userID <= 0 || strings.TrimSpace(triggerType) == "" || requestedAt.IsZero() { + return "" + } + + // 1. 先把请求时间归一到固定窗口起点,保证 30 分钟内多次触发得到同一 key。 + // 2. requestedAt 为空或非法时直接返回空字符串,让上游显式感知入参不完整。 + windowStartUnix := requestedAt.Unix() / int64(window.Seconds()) + return fmt.Sprintf("%d:%s:%d", userID, strings.TrimSpace(triggerType), windowStartUnix) +} diff --git a/backend/notification/mock_provider.go b/backend/notification/mock_provider.go new file mode 100644 index 0000000..fdeea12 --- /dev/null +++ b/backend/notification/mock_provider.go @@ -0,0 +1,142 @@ +package notification + +import ( + "context" + "fmt" + "sync" + "time" +) + +// MockFeishuMode 描述 mock provider 下一次返回哪类结果。 +type MockFeishuMode string + +const ( + MockFeishuModeSuccess MockFeishuMode = "success" + MockFeishuModeTemporaryFail MockFeishuMode = "temporary_fail" + MockFeishuModePermanentFail MockFeishuMode = "permanent_fail" +) + +// MockFeishuProvider 是进程内 mock provider。 +// +// 职责边界: +// 1. 只用于本地联调、单元测试和阶段性验收; +// 2. 不做真实 HTTP 调用,直接根据预设 mode 返回 success / temporary_fail / permanent_fail; +// 3. 保留调用历史,方便测试断言“有没有重复发飞书”。 +type MockFeishuProvider struct { + mu sync.Mutex + defaultMode MockFeishuMode + queuedModes []MockFeishuMode + calls []FeishuSendRequest +} + +// NewMockFeishuProvider 创建一个进程内 mock provider。 +func NewMockFeishuProvider(defaultMode MockFeishuMode) *MockFeishuProvider { + if defaultMode == "" { + defaultMode = MockFeishuModeSuccess + } + return &MockFeishuProvider{defaultMode: defaultMode} +} + +// SetDefaultMode 设置默认返回模式。 +func (p *MockFeishuProvider) SetDefaultMode(mode MockFeishuMode) { + p.mu.Lock() + defer p.mu.Unlock() + if mode == "" { + mode = MockFeishuModeSuccess + } + p.defaultMode = mode +} + +// PushModes 追加一组“一次性模式”。 +// +// 说明: +// 1. 先进先出消费,便于测试“先失败再成功”的重试路径; +// 2. 队列用尽后回退到 defaultMode; +// 3. 空模式会被自动忽略,避免测试代码误塞脏数据。 +func (p *MockFeishuProvider) PushModes(modes ...MockFeishuMode) { + p.mu.Lock() + defer p.mu.Unlock() + for _, mode := range modes { + if mode == "" { + continue + } + p.queuedModes = append(p.queuedModes, mode) + } +} + +// Calls 返回当前 provider 已记录的调用快照。 +func (p *MockFeishuProvider) Calls() []FeishuSendRequest { + p.mu.Lock() + defer p.mu.Unlock() + copied := make([]FeishuSendRequest, len(p.calls)) + copy(copied, p.calls) + return copied +} + +// Send 按预设模式返回模拟结果。 +// +// 步骤说明: +// 1. 先记录本次请求,方便测试校验是否发生重复投递; +// 2. 再按 queuedModes -> defaultMode 的顺序决定 outcome; +// 3. 最后返回可落库审计的 request/response 摘要。 +func (p *MockFeishuProvider) Send(_ context.Context, req FeishuSendRequest) (FeishuSendResult, error) { + p.mu.Lock() + p.calls = append(p.calls, req) + + mode := p.defaultMode + if len(p.queuedModes) > 0 { + mode = p.queuedModes[0] + p.queuedModes = p.queuedModes[1:] + } + p.mu.Unlock() + + switch mode { + case MockFeishuModeTemporaryFail: + return FeishuSendResult{ + Outcome: FeishuSendOutcomeTemporaryFail, + ErrorCode: FeishuErrorCodeProviderTimeout, + ErrorMessage: "mock feishu provider temporary failure", + RequestPayload: map[string]any{ + "notification_id": req.NotificationID, + "user_id": req.UserID, + "preview_id": req.PreviewID, + "target_url": req.TargetURL, + }, + ResponsePayload: map[string]any{ + "mode": string(mode), + "reason": "mock temporary failure", + }, + }, nil + case MockFeishuModePermanentFail: + return FeishuSendResult{ + Outcome: FeishuSendOutcomePermanentFail, + ErrorCode: FeishuErrorCodePayloadInvalid, + ErrorMessage: "mock feishu provider permanent failure", + RequestPayload: map[string]any{ + "notification_id": req.NotificationID, + "user_id": req.UserID, + "preview_id": req.PreviewID, + "target_url": req.TargetURL, + }, + ResponsePayload: map[string]any{ + "mode": string(mode), + "reason": "mock permanent failure", + }, + }, nil + default: + return FeishuSendResult{ + Outcome: FeishuSendOutcomeSuccess, + ProviderMessageID: fmt.Sprintf("mock_feishu_%d", time.Now().UnixNano()), + RequestPayload: map[string]any{ + "notification_id": req.NotificationID, + "user_id": req.UserID, + "preview_id": req.PreviewID, + "target_url": req.TargetURL, + }, + ResponsePayload: map[string]any{ + "mode": string(MockFeishuModeSuccess), + "status": "ok", + }, + }, nil + } +} diff --git a/backend/notification/provider.go b/backend/notification/provider.go new file mode 100644 index 0000000..95f56c9 --- /dev/null +++ b/backend/notification/provider.go @@ -0,0 +1,88 @@ +package notification + +import "context" + +const ( + // ChannelFeishu 表示当前通知记录走飞书通道。 + ChannelFeishu = "feishu" +) + +const ( + // FeishuErrorCodeProviderTimeout 表示 provider 超时,属于可重试错误。 + FeishuErrorCodeProviderTimeout = "provider_timeout" + // FeishuErrorCodeProviderRateLimited 表示 provider 限流,属于可重试错误。 + FeishuErrorCodeProviderRateLimited = "provider_rate_limited" + // FeishuErrorCodeProvider5xx 表示 provider 服务端异常,属于可重试错误。 + FeishuErrorCodeProvider5xx = "provider_5xx" + // FeishuErrorCodeNetworkError 表示网络层异常,属于可重试错误。 + FeishuErrorCodeNetworkError = "network_error" + // FeishuErrorCodeRecipientMissing 表示缺少接收方,属于不可恢复错误。 + FeishuErrorCodeRecipientMissing = "recipient_missing" + // FeishuErrorCodeInvalidURL 表示目标链接非法,属于不可恢复错误。 + FeishuErrorCodeInvalidURL = "invalid_url" + // FeishuErrorCodeProviderAuthFailed 表示 provider 认证失败,属于不可恢复错误。 + FeishuErrorCodeProviderAuthFailed = "provider_auth_failed" + // FeishuErrorCodePayloadInvalid 表示请求体非法,属于不可恢复错误。 + FeishuErrorCodePayloadInvalid = "payload_invalid" +) + +// FeishuSendOutcome 表示 provider 对一次投递尝试的分类结果。 +// +// 职责边界: +// 1. 只表达 provider 层对“这次投递”是否成功、是否可重试的判断; +// 2. 不直接承载 notification_records 的状态机,状态流转由 NotificationService 决定; +// 3. future webhook/open_id provider 只要返回同一套枚举,即可复用现有重试逻辑。 +type FeishuSendOutcome string + +const ( + FeishuSendOutcomeSuccess FeishuSendOutcome = "success" + FeishuSendOutcomeTemporaryFail FeishuSendOutcome = "temporary_fail" + FeishuSendOutcomePermanentFail FeishuSendOutcome = "permanent_fail" + FeishuSendOutcomeSkipped FeishuSendOutcome = "skipped" +) + +// FeishuSendRequest 是通知服务传给 provider 的稳定输入。 +// +// 职责边界: +// 1. 只描述 provider 真正发消息所需的信息; +// 2. 不暴露 GORM model,避免 provider 依赖数据库细节; +// 3. 同时保留审计字段,方便 mock/webhook provider 记录请求摘要。 +type FeishuSendRequest struct { + NotificationID int64 `json:"notification_id"` + UserID int `json:"user_id"` + TriggerID string `json:"trigger_id"` + PreviewID string `json:"preview_id"` + TriggerType string `json:"trigger_type"` + TargetType string `json:"target_type"` + TargetID int `json:"target_id"` + TargetURL string `json:"target_url"` + MessageText string `json:"message_text"` + FallbackUsed bool `json:"fallback_used"` + TraceID string `json:"trace_id,omitempty"` + AttemptCount int `json:"attempt_count"` +} + +// FeishuSendResult 是 provider 对外返回的投递结果。 +// +// 职责边界: +// 1. outcome 决定 NotificationService 应该进入 sent / failed / dead 中哪一条路径; +// 2. request/response payload 仅用于落库审计,不要求与任意具体 SDK 强绑定; +// 3. error_code 需要尽量稳定,便于后续按错误码做告警和排障。 +type FeishuSendResult struct { + Outcome FeishuSendOutcome `json:"outcome"` + ProviderMessageID string `json:"provider_message_id,omitempty"` + ErrorCode string `json:"error_code,omitempty"` + ErrorMessage string `json:"error_message,omitempty"` + RequestPayload any `json:"request_payload,omitempty"` + ResponsePayload any `json:"response_payload,omitempty"` +} + +// FeishuProvider 是飞书投递能力的抽象边界。 +// +// 职责边界: +// 1. 负责把最终文案发给具体 provider; +// 2. 不负责 notification_records 的创建、去重、状态机和重试节奏; +// 3. 后续新增 WebhookFeishuProvider / OpenIDFeishuProvider 时,只需实现这个接口。 +type FeishuProvider interface { + Send(ctx context.Context, req FeishuSendRequest) (FeishuSendResult, error) +} diff --git a/backend/notification/runner.go b/backend/notification/runner.go new file mode 100644 index 0000000..4636209 --- /dev/null +++ b/backend/notification/runner.go @@ -0,0 +1,44 @@ +package notification + +import ( + "context" + "log" + "time" +) + +// StartRetryLoop 启动 notification_records 重试扫描器。 +// +// 说明: +// 1. 只在 worker/all 模式启动,api 模式不启动; +// 2. provider 失败后的重试由本循环负责,避免通用 outbox 被外部服务慢失败拖住; +// 3. 每轮失败只写日志,下一轮继续扫描。 +func (s *NotificationService) StartRetryLoop(ctx context.Context, every time.Duration, limit int) { + if s == nil { + return + } + if every <= 0 { + every = time.Minute + } + if limit <= 0 { + limit = 50 + } + go func() { + ticker := time.NewTicker(every) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + result, err := s.RetryFeishuNotifications(ctx, time.Now(), limit) + if err != nil { + log.Printf("飞书通知重试扫描失败: err=%v", err) + continue + } + if result.Scanned > 0 { + log.Printf("飞书通知重试扫描完成: scanned=%d sent=%d failed=%d dead=%d skipped=%d", result.Scanned, result.Sent, result.Failed, result.Dead, result.Skipped) + } + } + } + }() +} diff --git a/backend/notification/service.go b/backend/notification/service.go new file mode 100644 index 0000000..dbea7eb --- /dev/null +++ b/backend/notification/service.go @@ -0,0 +1,664 @@ +package notification + +import ( + "context" + "encoding/json" + "errors" + "strings" + "sync" + "time" + + sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" + + "github.com/LoveLosita/smartflow/backend/model" + "gorm.io/gorm" +) + +const ( + defaultMaxAttempts = 5 + defaultRetryBaseDelay = 5 * time.Minute + defaultRetryMaxDelay = 30 * time.Minute + defaultSummaryMaxRunes = 180 + defaultRetryScanBatch = 100 + defaultFallbackTemplate = "我为你生成了一份日程调整建议,请回到系统确认是否应用。" +) + +// NotificationRecordStore 抽象出 notification 模块真正依赖的持久化能力。 +// +// 职责边界: +// 1. 只描述 notification_records 读写所需的最小接口; +// 2. 允许生产环境直接复用 ActiveScheduleDAO,也允许测试时替换成内存 fake; +// 3. 不把 provider、事件总线和业务状态机耦合进存储接口。 +type NotificationRecordStore interface { + CreateNotificationRecord(ctx context.Context, record *model.NotificationRecord) error + UpdateNotificationRecordFields(ctx context.Context, notificationID int64, updates map[string]any) error + GetNotificationRecordByID(ctx context.Context, notificationID int64) (*model.NotificationRecord, error) + FindNotificationRecordByDedupeKey(ctx context.Context, channel string, dedupeKey string) (*model.NotificationRecord, error) + ListRetryableNotificationRecords(ctx context.Context, now time.Time, limit int) ([]model.NotificationRecord, error) +} + +// ServiceOptions 定义通知服务的可调参数。 +type ServiceOptions struct { + Now func() time.Time + MaxAttempts int + RetryBaseDelay time.Duration + RetryMaxDelay time.Duration + SummaryMaxRunes int + RetryScanBatch int +} + +// HandleResult 描述一次事件处理或一次 retry 尝试的结果。 +type HandleResult struct { + RecordID int64 + Status string + Reused bool + Delivered bool + FallbackUsed bool + AttemptCount int + NextRetryAt *time.Time + ProviderError string +} + +// RetryResult 汇总一次批量 retry 扫描的结果。 +type RetryResult struct { + Scanned int + Retried int + Sent int + Failed int + Dead int + Skipped int + Errors int +} + +// Service 负责 notification_records 状态机与 provider 调用编排。 +// +// 职责边界: +// 1. 消费 `notification.feishu.requested` payload,做去重、落库、状态流转与 provider 调用; +// 2. 只写 notification_records,不写 preview / trigger / 正式 schedule; +// 3. provider 可重试失败由本服务自己管理,outbox 只保证“通知请求被接收一次”。 +type Service struct { + store NotificationRecordStore + provider FeishuProvider + options ServiceOptions + locks *keyedLocker +} + +// NotificationService 是阶段四对外暴露的语义化别名。 +// +// 说明: +// 1. 当前包里已有 runner 等代码引用 `Service`; +// 2. 任务描述里又直接使用 “NotificationService” 这个业务名词; +// 3. 这里保留别名,既不打断已有代码,也让后续调用方可以按业务语义引用。 +type NotificationService = Service + +// NewNotificationService 创建通知服务。 +func NewNotificationService(store NotificationRecordStore, provider FeishuProvider, opts ServiceOptions) (*Service, error) { + if store == nil { + return nil, errors.New("notification record store is nil") + } + if provider == nil { + return nil, errors.New("feishu provider is nil") + } + opts = normalizeServiceOptions(opts) + return &Service{ + store: store, + provider: provider, + options: opts, + locks: newKeyedLocker(), + }, nil +} + +// HandleFeishuRequested 处理一条 `notification.feishu.requested` 事件。 +// +// 步骤说明: +// 1. 先校验 shared/events payload,避免脏数据进入状态机; +// 2. 再按 `channel + dedupe_key` 串行化处理,保证进程内不会并发重复发同一条飞书; +// 3. 若已有 pending/failed,则复用同一条 record 继续投递;sending/sent/dead/skipped 则直接短路。 +func (s *Service) HandleFeishuRequested(ctx context.Context, payload sharedevents.FeishuNotificationRequestedPayload) (HandleResult, error) { + if err := payload.Validate(); err != nil { + return HandleResult{}, err + } + + lockKey := buildNotificationLockKey(ChannelFeishu, payload.DedupeKey) + unlock := s.locks.Lock(lockKey) + defer unlock() + + record, reused, err := s.findOrCreateRecordForPayload(ctx, payload) + if err != nil { + return HandleResult{}, err + } + + result, err := s.deliverRecord(ctx, record) + if err != nil { + return HandleResult{}, err + } + result.Reused = reused + return result, nil +} + +// RetryFeishuNotifications 扫描并重试到点的 failed 记录。 +// +// 步骤说明: +// 1. 先按 DAO 提供的 retry 查询口径拉取 `status=failed && next_retry_at<=now`; +// 2. 再逐条加进程内锁并复用同一条 record 重试,避免 scanner 和事件 handler 打架; +// 3. 单条失败不会中断整批扫描,但会在返回值中累计 Errors,并把首个错误回传给调用方。 +func (s *Service) RetryFeishuNotifications(ctx context.Context, now time.Time, limit int) (RetryResult, error) { + if now.IsZero() { + now = s.options.Now() + } + if limit <= 0 { + limit = s.options.RetryScanBatch + } + + records, err := s.store.ListRetryableNotificationRecords(ctx, now, limit) + if err != nil { + return RetryResult{}, err + } + + result := RetryResult{Scanned: len(records)} + var firstErr error + + for _, record := range records { + if record.Channel != ChannelFeishu { + result.Skipped++ + continue + } + + handleResult, retryErr := s.retryOneRecord(ctx, record.ID) + if retryErr != nil { + result.Errors++ + if firstErr == nil { + firstErr = retryErr + } + continue + } + + if handleResult.Delivered { + result.Retried++ + } + switch handleResult.Status { + case model.NotificationRecordStatusSent: + if handleResult.Delivered { + result.Sent++ + } else { + result.Skipped++ + } + case model.NotificationRecordStatusFailed: + result.Failed++ + case model.NotificationRecordStatusDead: + result.Dead++ + default: + result.Skipped++ + } + } + + return result, firstErr +} + +func (s *Service) RetryDue(ctx context.Context, now time.Time, limit int) (int, error) { + result, err := s.RetryFeishuNotifications(ctx, now, limit) + if err != nil { + return result.Retried, err + } + return result.Retried, nil +} + +func (s *Service) retryOneRecord(ctx context.Context, notificationID int64) (HandleResult, error) { + record, err := s.store.GetNotificationRecordByID(ctx, notificationID) + if err != nil { + return HandleResult{}, err + } + + lockKey := buildNotificationLockKey(record.Channel, record.DedupeKey) + unlock := s.locks.Lock(lockKey) + defer unlock() + + current, err := s.store.GetNotificationRecordByID(ctx, notificationID) + if err != nil { + return HandleResult{}, err + } + return s.deliverRecord(ctx, current) +} + +func (s *Service) findOrCreateRecordForPayload(ctx context.Context, payload sharedevents.FeishuNotificationRequestedPayload) (*model.NotificationRecord, bool, error) { + // 1. 若 payload 已携带 notification_id,先尝试命中现有记录,便于后续扩展“指定 record 重放”场景。 + // 2. 若 id 未命中或字段不一致,再退回到 channel + dedupe_key 这一版稳定幂等口径。 + if payload.NotificationID > 0 { + record, err := s.store.GetNotificationRecordByID(ctx, payload.NotificationID) + if err == nil && record != nil && record.Channel == ChannelFeishu && record.DedupeKey == strings.TrimSpace(payload.DedupeKey) { + return record, true, nil + } + if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) { + return nil, false, err + } + } + + record, err := s.store.FindNotificationRecordByDedupeKey(ctx, ChannelFeishu, strings.TrimSpace(payload.DedupeKey)) + if err == nil { + return record, true, nil + } + if !errors.Is(err, gorm.ErrRecordNotFound) { + return nil, false, err + } + + summaryText, fallbackText, fallbackUsed := s.normalizeMessageTemplate(payload.SummaryText, payload.FallbackText) + record = &model.NotificationRecord{ + Channel: ChannelFeishu, + UserID: payload.UserID, + TriggerID: strings.TrimSpace(payload.TriggerID), + PreviewID: strings.TrimSpace(payload.PreviewID), + TriggerType: strings.TrimSpace(payload.TriggerType), + TargetType: strings.TrimSpace(payload.TargetType), + TargetID: payload.TargetID, + DedupeKey: strings.TrimSpace(payload.DedupeKey), + TargetURL: strings.TrimSpace(payload.TargetURL), + SummaryText: summaryText, + FallbackText: fallbackText, + FallbackUsed: fallbackUsed, + Status: model.NotificationRecordStatusPending, + MaxAttempts: s.options.MaxAttempts, + TraceID: strings.TrimSpace(payload.TraceID), + } + + if err = s.store.CreateNotificationRecord(ctx, record); err != nil { + // 1. 并发场景下若唯一索引已被别的协程抢先创建,这里回查 dedupe 记录即可; + // 2. 若回查仍失败,说明不是幂等竞争而是真正落库异常,应交给上层重试。 + existing, findErr := s.store.FindNotificationRecordByDedupeKey(ctx, ChannelFeishu, record.DedupeKey) + if findErr == nil { + return existing, true, nil + } + return nil, false, err + } + return record, false, nil +} + +func (s *Service) deliverRecord(ctx context.Context, record *model.NotificationRecord) (HandleResult, error) { + if record == nil { + return HandleResult{}, errors.New("notification record is nil") + } + + switch record.Status { + case model.NotificationRecordStatusSending, + model.NotificationRecordStatusSent, + model.NotificationRecordStatusDead, + model.NotificationRecordStatusSkipped: + return HandleResult{ + RecordID: record.ID, + Status: record.Status, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + NextRetryAt: record.NextRetryAt, + }, nil + case model.NotificationRecordStatusPending, model.NotificationRecordStatusFailed: + // 继续向下走真正投递流程。 + default: + // 1. 未识别状态先保守短路,避免把未知脏数据继续推进到 provider。 + // 2. 后续若新增新状态,应显式扩展这里的状态机分支。 + return HandleResult{ + RecordID: record.ID, + Status: record.Status, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + NextRetryAt: record.NextRetryAt, + }, nil + } + + requestPayload := s.buildSendRequest(record) + requestJSON, err := marshalJSONPointer(requestPayload) + if err != nil { + return HandleResult{}, err + } + + nextAttemptCount := record.AttemptCount + 1 + updates := map[string]any{ + "status": model.NotificationRecordStatusSending, + "attempt_count": nextAttemptCount, + "next_retry_at": nil, + "last_error_code": nil, + "last_error": nil, + "provider_request_json": requestJSON, + } + if record.MaxAttempts <= 0 { + updates["max_attempts"] = s.options.MaxAttempts + record.MaxAttempts = s.options.MaxAttempts + } + if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + return HandleResult{}, err + } + + record.Status = model.NotificationRecordStatusSending + record.AttemptCount = nextAttemptCount + record.NextRetryAt = nil + record.ProviderRequestJSON = requestJSON + + sendResult, sendErr := s.provider.Send(ctx, requestPayload) + if sendErr != nil && sendResult.Outcome == "" { + sendResult = FeishuSendResult{ + Outcome: FeishuSendOutcomeTemporaryFail, + ErrorCode: FeishuErrorCodeNetworkError, + ErrorMessage: sendErr.Error(), + } + } + if sendResult.Outcome == "" { + sendResult.Outcome = FeishuSendOutcomeTemporaryFail + if sendResult.ErrorCode == "" { + sendResult.ErrorCode = FeishuErrorCodeNetworkError + } + if sendResult.ErrorMessage == "" && sendErr != nil { + sendResult.ErrorMessage = sendErr.Error() + } + } + + return s.applySendResult(ctx, record, sendResult) +} + +func (s *Service) applySendResult(ctx context.Context, record *model.NotificationRecord, sendResult FeishuSendResult) (HandleResult, error) { + now := s.options.Now() + responseJSON, err := marshalJSONPointer(sendResult.ResponsePayload) + if err != nil { + return HandleResult{}, err + } + requestJSON, err := marshalJSONPointer(sendResult.RequestPayload) + if err != nil { + return HandleResult{}, err + } + if requestJSON == nil { + requestJSON = record.ProviderRequestJSON + } + + errorCode := stringPtrOrNil(sendResult.ErrorCode) + errorMessage := stringPtrOrNil(truncateText(sendResult.ErrorMessage, 2000)) + providerMessageID := stringPtrOrNil(sendResult.ProviderMessageID) + + switch sendResult.Outcome { + case FeishuSendOutcomeSuccess: + sentAt := now + updates := map[string]any{ + "status": model.NotificationRecordStatusSent, + "provider_message_id": providerMessageID, + "provider_request_json": requestJSON, + "provider_response_json": responseJSON, + "last_error_code": nil, + "last_error": nil, + "next_retry_at": nil, + "sent_at": &sentAt, + } + if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + return HandleResult{}, err + } + return HandleResult{ + RecordID: record.ID, + Status: model.NotificationRecordStatusSent, + Delivered: true, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + }, nil + case FeishuSendOutcomeSkipped: + updates := map[string]any{ + "status": model.NotificationRecordStatusSkipped, + "provider_message_id": providerMessageID, + "provider_request_json": requestJSON, + "provider_response_json": responseJSON, + "last_error_code": errorCode, + "last_error": errorMessage, + "next_retry_at": nil, + } + if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + return HandleResult{}, err + } + return HandleResult{ + RecordID: record.ID, + Status: model.NotificationRecordStatusSkipped, + Delivered: true, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + ProviderError: strings.TrimSpace(sendResult.ErrorCode), + }, nil + case FeishuSendOutcomePermanentFail: + updates := map[string]any{ + "status": model.NotificationRecordStatusDead, + "provider_message_id": providerMessageID, + "provider_request_json": requestJSON, + "provider_response_json": responseJSON, + "last_error_code": errorCode, + "last_error": errorMessage, + "next_retry_at": nil, + } + if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + return HandleResult{}, err + } + return HandleResult{ + RecordID: record.ID, + Status: model.NotificationRecordStatusDead, + Delivered: true, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + ProviderError: strings.TrimSpace(sendResult.ErrorCode), + }, nil + default: + if record.AttemptCount >= s.effectiveMaxAttempts(record) { + updates := map[string]any{ + "status": model.NotificationRecordStatusDead, + "provider_message_id": providerMessageID, + "provider_request_json": requestJSON, + "provider_response_json": responseJSON, + "last_error_code": errorCode, + "last_error": errorMessage, + "next_retry_at": nil, + } + if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + return HandleResult{}, err + } + return HandleResult{ + RecordID: record.ID, + Status: model.NotificationRecordStatusDead, + Delivered: true, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + ProviderError: strings.TrimSpace(sendResult.ErrorCode), + }, nil + } + + nextRetryAt := s.calcNextRetryAt(now, record.AttemptCount) + updates := map[string]any{ + "status": model.NotificationRecordStatusFailed, + "provider_message_id": providerMessageID, + "provider_request_json": requestJSON, + "provider_response_json": responseJSON, + "last_error_code": errorCode, + "last_error": errorMessage, + "next_retry_at": &nextRetryAt, + } + if err = s.store.UpdateNotificationRecordFields(ctx, record.ID, updates); err != nil { + return HandleResult{}, err + } + return HandleResult{ + RecordID: record.ID, + Status: model.NotificationRecordStatusFailed, + Delivered: true, + FallbackUsed: record.FallbackUsed, + AttemptCount: record.AttemptCount, + NextRetryAt: &nextRetryAt, + ProviderError: strings.TrimSpace(sendResult.ErrorCode), + }, nil + } +} + +func (s *Service) buildSendRequest(record *model.NotificationRecord) FeishuSendRequest { + messageText := strings.TrimSpace(record.SummaryText) + if record.FallbackUsed || messageText == "" { + messageText = strings.TrimSpace(record.FallbackText) + } + if messageText == "" { + messageText = defaultFallbackTemplate + } + if !strings.Contains(messageText, strings.TrimSpace(record.TargetURL)) { + messageText = strings.TrimSpace(messageText) + "\n" + strings.TrimSpace(record.TargetURL) + } + + return FeishuSendRequest{ + NotificationID: record.ID, + UserID: record.UserID, + TriggerID: record.TriggerID, + PreviewID: record.PreviewID, + TriggerType: record.TriggerType, + TargetType: record.TargetType, + TargetID: record.TargetID, + TargetURL: record.TargetURL, + MessageText: strings.TrimSpace(messageText), + FallbackUsed: record.FallbackUsed, + TraceID: record.TraceID, + AttemptCount: record.AttemptCount + 1, + } +} + +func (s *Service) normalizeMessageTemplate(summaryText, fallbackText string) (string, string, bool) { + normalizedFallback := strings.TrimSpace(fallbackText) + if normalizedFallback == "" { + normalizedFallback = defaultFallbackTemplate + } + + normalizedSummary := strings.TrimSpace(summaryText) + if normalizedSummary == "" { + return "", normalizedFallback, true + } + if containsExternalLink(normalizedSummary) { + return "", normalizedFallback, true + } + + runes := []rune(normalizedSummary) + if len(runes) > s.options.SummaryMaxRunes { + normalizedSummary = string(runes[:s.options.SummaryMaxRunes]) + } + return strings.TrimSpace(normalizedSummary), normalizedFallback, false +} + +func (s *Service) calcNextRetryAt(now time.Time, attemptCount int) time.Time { + if attemptCount <= 0 { + attemptCount = 1 + } + + delay := s.options.RetryBaseDelay + for idx := 1; idx < attemptCount; idx++ { + delay *= 2 + if delay >= s.options.RetryMaxDelay { + delay = s.options.RetryMaxDelay + break + } + } + if delay > s.options.RetryMaxDelay { + delay = s.options.RetryMaxDelay + } + return now.Add(delay) +} + +func (s *Service) effectiveMaxAttempts(record *model.NotificationRecord) int { + if record != nil && record.MaxAttempts > 0 { + return record.MaxAttempts + } + return s.options.MaxAttempts +} + +func normalizeServiceOptions(opts ServiceOptions) ServiceOptions { + if opts.Now == nil { + opts.Now = time.Now + } + if opts.MaxAttempts <= 0 { + opts.MaxAttempts = defaultMaxAttempts + } + if opts.RetryBaseDelay <= 0 { + opts.RetryBaseDelay = defaultRetryBaseDelay + } + if opts.RetryMaxDelay <= 0 { + opts.RetryMaxDelay = defaultRetryMaxDelay + } + if opts.RetryMaxDelay < opts.RetryBaseDelay { + opts.RetryMaxDelay = opts.RetryBaseDelay + } + if opts.SummaryMaxRunes <= 0 { + opts.SummaryMaxRunes = defaultSummaryMaxRunes + } + if opts.RetryScanBatch <= 0 { + opts.RetryScanBatch = defaultRetryScanBatch + } + return opts +} + +func buildNotificationLockKey(channel, dedupeKey string) string { + return strings.TrimSpace(channel) + "|" + strings.TrimSpace(dedupeKey) +} + +func marshalJSONPointer(value any) (*string, error) { + if value == nil { + return nil, nil + } + raw, err := json.Marshal(value) + if err != nil { + return nil, err + } + text := string(raw) + return &text, nil +} + +func stringPtrOrNil(value string) *string { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return nil + } + return &trimmed +} + +func truncateText(value string, limit int) string { + if limit <= 0 { + return "" + } + runes := []rune(strings.TrimSpace(value)) + if len(runes) <= limit { + return string(runes) + } + return string(runes[:limit]) +} + +func containsExternalLink(text string) bool { + lowered := strings.ToLower(strings.TrimSpace(text)) + return strings.Contains(lowered, "://") || strings.Contains(lowered, "www.") +} + +type keyedLocker struct { + mu sync.Mutex + locks map[string]*keyedLockEntry +} + +type keyedLockEntry struct { + mu sync.Mutex + refs int +} + +func newKeyedLocker() *keyedLocker { + return &keyedLocker{ + locks: make(map[string]*keyedLockEntry), + } +} + +func (l *keyedLocker) Lock(key string) func() { + l.mu.Lock() + entry := l.locks[key] + if entry == nil { + entry = &keyedLockEntry{} + l.locks[key] = entry + } + entry.refs++ + l.mu.Unlock() + + entry.mu.Lock() + + return func() { + entry.mu.Unlock() + l.mu.Lock() + entry.refs-- + if entry.refs == 0 { + delete(l.locks, key) + } + l.mu.Unlock() + } +} diff --git a/backend/notification/webhook_provider.go b/backend/notification/webhook_provider.go new file mode 100644 index 0000000..be30f7a --- /dev/null +++ b/backend/notification/webhook_provider.go @@ -0,0 +1,361 @@ +package notification + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/LoveLosita/smartflow/backend/model" + "gorm.io/gorm" +) + +const ( + defaultWebhookTimeout = 5 * time.Second + defaultFrontendBaseURL = "https://smartflow.example.com" + webhookPayloadEvent = "smartflow.schedule_adjustment_ready" + webhookPayloadVersion = "1" + webhookMessageTitle = "SmartFlow 日程调整建议" + webhookMessageActionText = "查看并确认调整" + maxWebhookResponseBodyLen = 64 * 1024 +) + +// UserNotificationChannelReader 描述 webhook provider 读取用户通知配置所需的最小能力。 +// +// 职责边界: +// 1. 只读取 user_id + channel 对应的配置; +// 2. 不负责保存配置和测试结果; +// 3. 生产环境由 NotificationChannelDAO 实现,测试可替换为内存 fake。 +type UserNotificationChannelReader interface { + GetUserNotificationChannel(ctx context.Context, userID int, channel string) (*model.UserNotificationChannel, error) +} + +type WebhookFeishuProviderOptions struct { + HTTPClient *http.Client + FrontendBaseURL string + Timeout time.Duration + Now func() time.Time +} + +// WebhookFeishuProvider 把 SmartFlow 通知事件发送到用户配置的飞书 Webhook 触发器。 +// +// 职责边界: +// 1. 只负责读取用户 webhook 配置、拼装极简业务 JSON 并执行 HTTP POST; +// 2. 不负责 notification_records 的创建、重试节奏和幂等; +// 3. 不实现飞书群自定义机器人 msg_type 协议,私聊/群发由飞书流程自行编排。 +type WebhookFeishuProvider struct { + store UserNotificationChannelReader + client *http.Client + frontendBaseURL string + now func() time.Time +} + +type FeishuWebhookPayload struct { + Event string `json:"event"` + Version string `json:"version"` + NotificationID int64 `json:"notification_id"` + UserID int `json:"user_id"` + PreviewID string `json:"preview_id"` + TriggerID string `json:"trigger_id"` + TriggerType string `json:"trigger_type"` + TargetType string `json:"target_type"` + TargetID int `json:"target_id"` + Message FeishuWebhookMessage `json:"message"` + TraceID string `json:"trace_id,omitempty"` + SentAt string `json:"sent_at"` +} + +type FeishuWebhookMessage struct { + Title string `json:"title"` + Summary string `json:"summary"` + ActionText string `json:"action_text"` + ActionURL string `json:"action_url"` +} + +func NewWebhookFeishuProvider(store UserNotificationChannelReader, opts WebhookFeishuProviderOptions) (*WebhookFeishuProvider, error) { + if store == nil { + return nil, errors.New("user notification channel store is nil") + } + timeout := opts.Timeout + if timeout <= 0 { + timeout = defaultWebhookTimeout + } + client := opts.HTTPClient + if client == nil { + client = &http.Client{Timeout: timeout} + } + now := opts.Now + if now == nil { + now = time.Now + } + return &WebhookFeishuProvider{ + store: store, + client: client, + frontendBaseURL: normalizeFrontendBaseURL(opts.FrontendBaseURL), + now: now, + }, nil +} + +// BuildFeishuWebhookPayload 生成飞书 Webhook 触发器消费的极简业务 JSON。 +// +// 说明: +// 1. 该结构不包含飞书群机器人 msg_type 字段; +// 2. message 四个字段是飞书流程拼私聊消息的稳定输入; +// 3. 其它字段用于用户流程分支、SmartFlow 排障和审计。 +func BuildFeishuWebhookPayload(req FeishuSendRequest, frontendBaseURL string, sentAt time.Time) FeishuWebhookPayload { + if sentAt.IsZero() { + sentAt = time.Now() + } + summary := strings.TrimSpace(req.MessageText) + if summary == "" { + summary = "我为你生成了一份日程调整建议,请回到系统确认是否应用。" + } + return FeishuWebhookPayload{ + Event: webhookPayloadEvent, + Version: webhookPayloadVersion, + NotificationID: req.NotificationID, + UserID: req.UserID, + PreviewID: strings.TrimSpace(req.PreviewID), + TriggerID: strings.TrimSpace(req.TriggerID), + TriggerType: strings.TrimSpace(req.TriggerType), + TargetType: strings.TrimSpace(req.TargetType), + TargetID: req.TargetID, + Message: FeishuWebhookMessage{ + Title: webhookMessageTitle, + Summary: summary, + ActionText: webhookMessageActionText, + ActionURL: buildActionURL(frontendBaseURL, req.TargetURL), + }, + TraceID: strings.TrimSpace(req.TraceID), + SentAt: sentAt.Format(time.RFC3339), + } +} + +// Send 向用户配置的飞书 Webhook 触发器投递一次 SmartFlow 通知事件。 +func (p *WebhookFeishuProvider) Send(ctx context.Context, req FeishuSendRequest) (FeishuSendResult, error) { + if p == nil || p.store == nil || p.client == nil { + return FeishuSendResult{}, errors.New("webhook feishu provider 未初始化") + } + config, err := p.store.GetUserNotificationChannel(ctx, req.UserID, model.NotificationChannelFeishuWebhook) + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return skippedResult(req, "用户未配置飞书 Webhook 触发器"), nil + } + return FeishuSendResult{}, err + } + if config == nil || !config.Enabled || strings.TrimSpace(config.WebhookURL) == "" { + return skippedResult(req, "用户未启用飞书 Webhook 触发器"), nil + } + if err = ValidateFeishuWebhookURL(config.WebhookURL); err != nil { + return FeishuSendResult{ + Outcome: FeishuSendOutcomePermanentFail, + ErrorCode: FeishuErrorCodeInvalidURL, + ErrorMessage: err.Error(), + RequestPayload: map[string]any{ + "notification_id": req.NotificationID, + "user_id": req.UserID, + "webhook": MaskWebhookURL(config.WebhookURL), + }, + }, nil + } + + payload := BuildFeishuWebhookPayload(req, p.frontendBaseURL, p.now()) + raw, err := json.Marshal(payload) + if err != nil { + return FeishuSendResult{}, err + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimSpace(config.WebhookURL), bytes.NewReader(raw)) + if err != nil { + return permanentWebhookResult(req, payload, nil, FeishuErrorCodeInvalidURL, err.Error()), nil + } + httpReq.Header.Set("Content-Type", "application/json; charset=utf-8") + if strings.EqualFold(strings.TrimSpace(config.AuthType), model.NotificationAuthTypeBearer) && strings.TrimSpace(config.BearerToken) != "" { + httpReq.Header.Set("Authorization", "Bearer "+strings.TrimSpace(config.BearerToken)) + } + + resp, err := p.client.Do(httpReq) + if err != nil { + return temporaryWebhookResult(req, payload, nil, classifyNetworkError(err), err.Error()), nil + } + defer resp.Body.Close() + + body, readErr := io.ReadAll(io.LimitReader(resp.Body, maxWebhookResponseBodyLen)) + responsePayload := buildWebhookResponsePayload(resp.StatusCode, body, readErr) + if readErr != nil { + return temporaryWebhookResult(req, payload, responsePayload, FeishuErrorCodeNetworkError, readErr.Error()), nil + } + return classifyWebhookHTTPResult(req, payload, responsePayload, resp.StatusCode, body), nil +} + +func classifyWebhookHTTPResult(req FeishuSendRequest, payload FeishuWebhookPayload, responsePayload map[string]any, statusCode int, body []byte) FeishuSendResult { + if statusCode >= 200 && statusCode < 300 { + if len(strings.TrimSpace(string(body))) > 0 { + var parsed struct { + Code *int `json:"code"` + Msg string `json:"msg"` + } + if err := json.Unmarshal(body, &parsed); err == nil && parsed.Code != nil && *parsed.Code != 0 { + return permanentWebhookResult(req, payload, responsePayload, FeishuErrorCodePayloadInvalid, firstNonEmpty(parsed.Msg, fmt.Sprintf("飞书 webhook 返回 code=%d", *parsed.Code))) + } + } + return FeishuSendResult{ + Outcome: FeishuSendOutcomeSuccess, + ProviderMessageID: fmt.Sprintf("feishu_webhook_%d_%d", req.NotificationID, time.Now().UnixNano()), + RequestPayload: payload, + ResponsePayload: responsePayload, + } + } + switch { + case statusCode == http.StatusTooManyRequests: + return temporaryWebhookResult(req, payload, responsePayload, FeishuErrorCodeProviderRateLimited, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) + case statusCode >= 500: + return temporaryWebhookResult(req, payload, responsePayload, FeishuErrorCodeProvider5xx, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) + case statusCode == http.StatusUnauthorized || statusCode == http.StatusForbidden: + return permanentWebhookResult(req, payload, responsePayload, FeishuErrorCodeProviderAuthFailed, fmt.Sprintf("飞书 webhook 鉴权失败 HTTP %d", statusCode)) + default: + return permanentWebhookResult(req, payload, responsePayload, FeishuErrorCodePayloadInvalid, fmt.Sprintf("飞书 webhook HTTP %d", statusCode)) + } +} + +func skippedResult(req FeishuSendRequest, reason string) FeishuSendResult { + return FeishuSendResult{ + Outcome: FeishuSendOutcomeSkipped, + ErrorCode: FeishuErrorCodeRecipientMissing, + ErrorMessage: reason, + RequestPayload: map[string]any{ + "notification_id": req.NotificationID, + "user_id": req.UserID, + "preview_id": req.PreviewID, + }, + ResponsePayload: map[string]any{ + "skipped": true, + "reason": reason, + }, + } +} + +func temporaryWebhookResult(req FeishuSendRequest, payload FeishuWebhookPayload, responsePayload any, code string, message string) FeishuSendResult { + return FeishuSendResult{ + Outcome: FeishuSendOutcomeTemporaryFail, + ErrorCode: code, + ErrorMessage: message, + RequestPayload: payload, + ResponsePayload: responsePayload, + } +} + +func permanentWebhookResult(req FeishuSendRequest, payload FeishuWebhookPayload, responsePayload any, code string, message string) FeishuSendResult { + return FeishuSendResult{ + Outcome: FeishuSendOutcomePermanentFail, + ErrorCode: code, + ErrorMessage: message, + RequestPayload: payload, + ResponsePayload: responsePayload, + } +} + +func buildWebhookResponsePayload(statusCode int, body []byte, readErr error) map[string]any { + payload := map[string]any{ + "status_code": statusCode, + } + if len(body) > 0 { + payload["body"] = string(body) + } + if readErr != nil { + payload["read_error"] = readErr.Error() + } + return payload +} + +func classifyNetworkError(err error) string { + if errors.Is(err, context.DeadlineExceeded) { + return FeishuErrorCodeProviderTimeout + } + return FeishuErrorCodeNetworkError +} + +func normalizeFrontendBaseURL(value string) string { + trimmed := strings.TrimRight(strings.TrimSpace(value), "/") + if trimmed == "" { + return defaultFrontendBaseURL + } + return trimmed +} + +func buildActionURL(frontendBaseURL string, targetURL string) string { + targetURL = strings.TrimSpace(targetURL) + if strings.HasPrefix(targetURL, "https://") || strings.HasPrefix(targetURL, "http://") { + return targetURL + } + base := normalizeFrontendBaseURL(frontendBaseURL) + return base + "/" + strings.TrimLeft(targetURL, "/") +} + +// ValidateFeishuWebhookURL 校验第一版允许保存的飞书 Webhook 触发器地址。 +func ValidateFeishuWebhookURL(rawURL string) error { + parsed, err := url.Parse(strings.TrimSpace(rawURL)) + if err != nil { + return err + } + if parsed.Scheme != "https" { + return errors.New("飞书 webhook 必须使用 https") + } + host := strings.ToLower(parsed.Hostname()) + if host != "www.feishu.cn" && host != "feishu.cn" { + return errors.New("飞书 webhook 域名必须是 feishu.cn") + } + if !strings.HasPrefix(parsed.EscapedPath(), "/flow/api/trigger-webhook/") { + return errors.New("飞书 webhook 路径必须是 /flow/api/trigger-webhook/{key}") + } + return nil +} + +// MaskWebhookURL 对 webhook URL 做脱敏,避免接口和日志泄露完整密钥。 +func MaskWebhookURL(rawURL string) string { + trimmed := strings.TrimSpace(rawURL) + parsed, err := url.Parse(trimmed) + if err != nil || parsed.Host == "" { + return maskMiddle(trimmed) + } + parts := strings.Split(strings.Trim(parsed.Path, "/"), "/") + if len(parts) == 0 { + return parsed.Scheme + "://" + parsed.Host + } + last := parts[len(parts)-1] + parts[len(parts)-1] = maskMiddle(last) + parsed.Path = "/" + strings.Join(parts, "/") + parsed.RawQuery = "" + parsed.Fragment = "" + return parsed.String() +} + +func MaskSecret(value string) string { + return maskMiddle(strings.TrimSpace(value)) +} + +func maskMiddle(value string) string { + if value == "" { + return "" + } + runes := []rune(value) + if len(runes) <= 8 { + return "****" + } + return string(runes[:4]) + "..." + string(runes[len(runes)-4:]) +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if trimmed := strings.TrimSpace(value); trimmed != "" { + return trimmed + } + } + return "" +} diff --git a/backend/routers/routers.go b/backend/routers/routers.go index 23c0320..81e5b70 100644 --- a/backend/routers/routers.go +++ b/backend/routers/routers.go @@ -116,10 +116,19 @@ func RegisterRouters(handlers *api.ApiHandlers, cache *dao.CacheDAO, userRepo *d { activeScheduleGroup.Use(middleware.JWTTokenAuth(cache), middleware.RateLimitMiddleware(limiter, 20, 1)) activeScheduleGroup.POST("/dry-run", handlers.ActiveSchedule.DryRun) + activeScheduleGroup.POST("/trigger", handlers.ActiveSchedule.Trigger) activeScheduleGroup.POST("/preview", handlers.ActiveSchedule.CreatePreview) activeScheduleGroup.GET("/preview/:preview_id", handlers.ActiveSchedule.GetPreview) activeScheduleGroup.POST("/preview/:preview_id/confirm", handlers.ActiveSchedule.ConfirmPreview) } + notificationGroup := apiGroup.Group("/notification") + { + notificationGroup.Use(middleware.JWTTokenAuth(cache), middleware.RateLimitMiddleware(limiter, 20, 1)) + notificationGroup.GET("/channels/feishu", handlers.Notification.GetFeishuWebhook) + notificationGroup.PUT("/channels/feishu", handlers.Notification.SaveFeishuWebhook) + notificationGroup.DELETE("/channels/feishu", handlers.Notification.DeleteFeishuWebhook) + notificationGroup.POST("/channels/feishu/test", handlers.Notification.TestFeishuWebhook) + } } // 初始化Gin引擎 log.Println("Routes setup completed") diff --git a/backend/service/events/active_schedule_triggered.go b/backend/service/events/active_schedule_triggered.go new file mode 100644 index 0000000..defea80 --- /dev/null +++ b/backend/service/events/active_schedule_triggered.go @@ -0,0 +1,81 @@ +package events + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + + kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" + "gorm.io/gorm" +) + +// ActiveScheduleTriggeredProcessor 描述 active_schedule.triggered worker 真正执行业务所需的最小能力。 +// +// 职责边界: +// 1. ProcessTriggeredInTx 负责事务内的 trigger -> preview -> notification 编排; +// 2. MarkTriggerFailedBestEffort 负责事务外的失败回写,避免 outbox retry 前完全没有业务态可查; +// 3. 接口本身不限定具体实现,便于迁移期由 active_scheduler 模块独立演进。 +type ActiveScheduleTriggeredProcessor interface { + ProcessTriggeredInTx(ctx context.Context, tx *gorm.DB, payload sharedevents.ActiveScheduleTriggeredPayload) error + MarkTriggerFailedBestEffort(ctx context.Context, triggerID string, err error) +} + +// RegisterActiveScheduleTriggeredHandler 注册 active_schedule.triggered outbox handler。 +// +// 步骤化说明: +// 1. 先做 envelope -> contract DTO 解析与版本校验,明显坏消息直接标记 dead; +// 2. 再通过 ConsumeAndMarkConsumed 把“业务落库 + consumed 推进”收敛在同一事务里; +// 3. 若事务返回 error,则 best-effort 回写 trigger failed,并把错误交给 outbox 做 retry; +// 4. 这里不直接 import active_scheduler 的具体实现,避免 service/events 和业务编排层互相反向耦合。 +func RegisterActiveScheduleTriggeredHandler( + bus *outboxinfra.EventBus, + outboxRepo *outboxinfra.Repository, + processor ActiveScheduleTriggeredProcessor, +) error { + if bus == nil { + return errors.New("event bus is nil") + } + if outboxRepo == nil { + return errors.New("outbox repository is nil") + } + if processor == nil { + return errors.New("active schedule triggered processor is nil") + } + + handler := func(ctx context.Context, envelope kafkabus.Envelope) error { + if !isAllowedTriggeredEventVersion(envelope.EventVersion) { + _ = outboxRepo.MarkDead(ctx, envelope.OutboxID, fmt.Sprintf("active_schedule.triggered 版本不受支持: %s", envelope.EventVersion)) + return nil + } + + var payload sharedevents.ActiveScheduleTriggeredPayload + if unmarshalErr := json.Unmarshal(envelope.Payload, &payload); unmarshalErr != nil { + _ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "解析 active_schedule.triggered 载荷失败: "+unmarshalErr.Error()) + return nil + } + if validateErr := payload.Validate(); validateErr != nil { + _ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "active_schedule.triggered 载荷非法: "+validateErr.Error()) + return nil + } + + err := outboxRepo.ConsumeAndMarkConsumed(ctx, envelope.OutboxID, func(tx *gorm.DB) error { + return processor.ProcessTriggeredInTx(ctx, tx, payload) + }) + if err != nil { + processor.MarkTriggerFailedBestEffort(ctx, payload.TriggerID, err) + return err + } + return nil + } + + return bus.RegisterEventHandler(sharedevents.ActiveScheduleTriggeredEventType, handler) +} + +func isAllowedTriggeredEventVersion(version string) bool { + version = strings.TrimSpace(version) + return version == "" || version == sharedevents.ActiveScheduleTriggeredEventVersion +} diff --git a/backend/service/events/notification_feishu.go b/backend/service/events/notification_feishu.go new file mode 100644 index 0000000..9e65504 --- /dev/null +++ b/backend/service/events/notification_feishu.go @@ -0,0 +1,105 @@ +package events + +import ( + "context" + "encoding/json" + "errors" + "log" + "strings" + + kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka" + outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox" + "github.com/LoveLosita/smartflow/backend/notification" + sharedevents "github.com/LoveLosita/smartflow/backend/shared/events" +) + +// RegisterFeishuNotificationHandler 注册 `notification.feishu.requested` 消费 handler。 +// +// 职责边界: +// 1. 只负责事件解析、协议校验、调用 NotificationService 和推进 outbox consumed; +// 2. 不承担 notification_records 状态机细节,状态流转全部下沉到 notification 模块; +// 3. 不在 handler 内部创建 provider/service,避免事件消费与 retry loop 使用两套不同配置。 +func RegisterFeishuNotificationHandler( + bus *outboxinfra.EventBus, + outboxRepo *outboxinfra.Repository, + svc *notification.NotificationService, +) error { + if bus == nil { + return errors.New("event bus is nil") + } + if outboxRepo == nil { + return errors.New("outbox repository is nil") + } + if svc == nil { + return errors.New("notification service is nil") + } + + handler := func(ctx context.Context, envelope kafkabus.Envelope) error { + // 1. 先校验 event_version,避免未来协议破坏性升级后旧 handler 误吃新消息。 + // 2. 当前阶段只接受 v1;版本不匹配属于不可恢复协议错误,直接标记 dead。 + eventVersion := strings.TrimSpace(envelope.EventVersion) + if eventVersion != "" && eventVersion != sharedevents.NotificationFeishuRequestedEventVersion { + _ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "notification.feishu.requested event_version 不匹配: "+eventVersion) + return nil + } + + var payload sharedevents.FeishuNotificationRequestedPayload + if unmarshalErr := json.Unmarshal(envelope.Payload, &payload); unmarshalErr != nil { + _ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "解析 notification.feishu.requested 载荷失败: "+unmarshalErr.Error()) + return nil + } + if validateErr := payload.Validate(); validateErr != nil { + _ = outboxRepo.MarkDead(ctx, envelope.OutboxID, "notification.feishu.requested 载荷非法: "+validateErr.Error()) + return nil + } + + result, handleErr := svc.HandleFeishuRequested(ctx, payload) + if handleErr != nil { + return handleErr + } + + if consumeErr := outboxRepo.ConsumeAndMarkConsumed(ctx, envelope.OutboxID, nil); consumeErr != nil { + return consumeErr + } + + log.Printf( + "notification.feishu.requested 消费完成: outbox_id=%d notification_id=%d status=%s delivered=%t reused=%t attempt_count=%d", + envelope.OutboxID, + result.RecordID, + result.Status, + result.Delivered, + result.Reused, + result.AttemptCount, + ) + return nil + } + + return bus.RegisterEventHandler(sharedevents.NotificationFeishuRequestedEventType, handler) +} + +// PublishFeishuNotificationRequested 发布 `notification.feishu.requested` 事件。 +// +// 职责边界: +// 1. 只负责把 shared/events payload 投递到 outbox; +// 2. 不等待 provider 结果,也不提前创建 notification_records; +// 3. 供主动调度 preview 阶段后续切入通知时直接复用。 +func PublishFeishuNotificationRequested( + ctx context.Context, + publisher outboxinfra.EventPublisher, + payload sharedevents.FeishuNotificationRequestedPayload, +) error { + if publisher == nil { + return errors.New("event publisher is nil") + } + if err := payload.Validate(); err != nil { + return err + } + + return publisher.Publish(ctx, outboxinfra.PublishRequest{ + EventType: sharedevents.NotificationFeishuRequestedEventType, + EventVersion: sharedevents.NotificationFeishuRequestedEventVersion, + MessageKey: payload.MessageKey(), + AggregateID: payload.AggregateID(), + Payload: payload, + }) +} diff --git a/docs/backend/第二阶段主动调度MVP实现方案.md b/docs/backend/第二阶段主动调度MVP实现方案.md index 41cf2f1..6747fa7 100644 --- a/docs/backend/第二阶段主动调度MVP实现方案.md +++ b/docs/backend/第二阶段主动调度MVP实现方案.md @@ -2,7 +2,7 @@ ## 0. Handoff 说明 -本文档已收口为第二阶段主动调度 MVP 的最终实施版。截至 2026-04-30,后端第一至第三阶段已实现并通过本地 API + DB 验收;接手者请优先阅读本节、第 10 章装配边界和第 14 章验证 checklist,再从第四阶段继续推进。 +本文档已收口为第二阶段主动调度 MVP 的最终实施版。截至 2026-04-30,后端第一至第四阶段主体代码已实现并通过本地 `go test ./...`;真实飞书 webhook 配置接口和 `important_urgent_task` 主动触发端到端链路已通过本地后端验收。接手者请优先阅读本节、第 10 章装配边界和第 14 章验证 checklist,再从第五阶段剩余验收继续推进。 当前核心共识: @@ -34,6 +34,7 @@ 24. `compress_with_next_dynamic_task` 第一轮实现先关闭,不生成该候选;保留 schema 和文档口径,待新增补做块主链路稳定后再打开。 25. 飞书第一版使用 mock / webhook 跑通主动触达闭环,不阻塞在用户 open_id 绑定体系上。 26. notification 去重窗口第一版固定为 30 分钟。 +27. 真实飞书第一版走“用户级 Webhook 触发器”而不是群自定义机器人协议:后端按 `user_id` 查用户配置的 webhook URL,POST 极简业务 JSON;私聊、群聊、分支和后续动作由用户在飞书流程里自行编排。 ### 0.1 多阶段推进计划 @@ -57,14 +58,15 @@ 3. task_pool 正式落库写 `schedule_events(type=task, task_source_type=task_pool, rel_id=tasks.id)`。 4. 补做块新增 event,不移动原已排任务。 -第四阶段:worker 与 notification。(待实施) +第四阶段:worker 与 notification。(主体代码已完成,真实 webhook 配置接口已验收) 1. 接入 `active_schedule.triggered` worker handler 和 due job scanner。 2. 接入 `notification.feishu.requested` handler。 -3. 先使用 mock provider,再接测试 webhook。 +3. 先使用 mock provider,再接用户级飞书 Webhook 触发器 provider。 4. `notification_records` 支持幂等、状态流转和 provider retry。 +5. 新增用户通知配置入口:保存 / 查询 / 删除 / 测试当前用户的飞书 webhook。 -第五阶段:端到端验收与收口。(待实施) +第五阶段:端到端验收与收口。(部分验收中) 1. 跑通 `api / worker / all` 三种启动模式。 2. 按第 14 章 checklist 验证 dry-run、trigger、preview、notification、confirm apply、失败注入。 @@ -125,6 +127,19 @@ - 已实现 preview 写入、详情查询、`apply_id + idempotency_key`、候选转换、同步 apply adapter。 - `add_task_pool_to_schedule` 已能正式写入 `schedule_events(type=task, task_source_type=task_pool, rel_id=tasks.id)` 和对应 `schedules`。 - `create_makeup` 转换与 adapter 已预留并实现基本写入路径,但尚需在第四 / 第五阶段结合正式 unfinished feedback worker 场景补端到端验收。 +4. 第四阶段:worker 与 notification 主体代码。 + - 已接入 `active_schedule.triggered` worker handler、due job scanner、`notification.feishu.requested` handler 和 notification retry loop。 + - 已新增 `backend/notification` provider / service 分层,mock provider 保留,真实投递切到用户级飞书 Webhook 触发器 provider。 + - 已新增 `user_notification_channels` model / DAO,并接入 AutoMigrate 与 `RepoManager`。 + - 已开放当前用户飞书 webhook 配置接口: + ```text + GET /api/v1/notification/channels/feishu + PUT /api/v1/notification/channels/feishu + DELETE /api/v1/notification/channels/feishu + POST /api/v1/notification/channels/feishu/test + ``` + - `cmd/start.go` 已把正式 notification service 注入为 `WebhookFeishuProvider`;测试配置接口与正式投递复用同一个 provider 实例。 + - 用户未配置或禁用 webhook 时,通知记录进入 `skipped`,不阻塞主动调度 preview 链路。 本轮实测结果: @@ -144,23 +159,38 @@ 5. 测试命令: - 已在 `backend` 目录执行 `go test ./...` 并通过。 - 已按项目规则清理根目录 `.gocache`。 +6. 第四阶段本轮自动化结果: + - 临时新增 `backend/notification/webhook_provider_test.go` 验证 payload 拼装、飞书 webhook URL 校验与脱敏规则;测试通过后已按项目规则删除临时 `*_test.go`。 + - 已再次执行 `go test ./...` 并通过;`GOCACHE` 明确指向项目根目录 `.gocache`,命令结束后已清理。 + - 后端按最新代码启动后,已注册本地测试账号 `codex_webhook_0430_183147`(user_id=6)。 + - 已调用 `PUT /api/v1/notification/channels/feishu` 保存用户飞书 webhook;接口返回 `configured=true`、`enabled=true`、脱敏回显为 `https://www.feishu.cn/flow/api/trigger-webhook/e889...6624`。 + - 已调用 `POST /api/v1/notification/channels/feishu/test`;接口返回 `status=success`、`outcome=success`,`last_test_status=success`,`last_test_at=2026-04-30T18:31:47.885+08:00`。 +7. 第五阶段 `important_urgent_task` 端到端验收结果: + - 测试账号:`codex_e2e_0430_185311 / 123456`,当前本地环境 user_id 为 7。 + - 已保存同一个飞书 webhook 配置,创建测试任务 `task_id=82`,同步 dry-run 返回 `decision=select_candidate` 且候选数为 1。 + - 已调用 `POST /api/v1/active-schedule/trigger` 写入正式 trigger:`trigger_id=ast_39a7f87a-d037-4361-82e5-03f58e4733a3`,`trace_id=trace_api_trigger_7_1777546391942562200`。 + - worker 已生成 preview:`preview_id=asp_e6701977-aeed-4bef-9964-29d26014f73d`,`active_schedule_triggers.status=preview_generated`,`active_schedule_previews.status=ready`。 + - outbox 两段均消费成功:`active_schedule.triggered` 对应 outbox id 2986 为 `consumed`;`notification.feishu.requested` 对应 outbox id 2987 为 `consumed`。 + - notification 投递成功:`notification_records.id=2`,`status=sent`,`attempt_count=1`,`provider_message_id=feishu_webhook_2_1777546395537770600`。 + - `provider_request_json.event=smartflow.schedule_adjustment_ready`,`message.title=SmartFlow 日程调整建议`,`message.action_url=https://smartflow.example.com/schedule-adjust/asp_e6701977-aeed-4bef-9964-29d26014f73d`。 + - 飞书 webhook 响应:HTTP 200,响应体 `{"code":0,"data":{},"msg":"success"}`。 +8. 第五阶段补充自动验收结果: + - skipped 场景:测试账号 `codex_skip_idem_0430_185759`(user_id=8)未配置 webhook,正式 trigger `ast_da60cd1c-1909-4855-ad5d-53125b19fb76` 生成 preview `asp_9e5c9c46-3460-4065-a2b8-1d531cf0c8aa`;`notification_records.id=3` 进入 `skipped`,`last_error_code=recipient_missing`,两段 outbox 均为 `consumed`。 + - trigger 幂等:同一账号、同一 task、同一 `idempotency_key` 重复调用 `POST /api/v1/active-schedule/trigger`,第二次返回同一个 trigger_id,`dedupe_hit=true`。 + - confirm apply 成功与幂等:对 preview `asp_e6701977-aeed-4bef-9964-29d26014f73d` 确认 candidate `add_task_pool_to_schedule:82:9:4:3`,生成 `apply_id=asap_039719fda4f2ae75f1d3d1fe`、`schedule_events.id=2488`、`schedules.id=5177`;同一幂等键重复确认返回同一个 apply_id 和 event_id,DB 中该 preview 只落 1 条正式事件。 + - `unfinished_feedback` 端到端:基于 `schedule_events.id=2488` 触发 `unfinished_feedback`,trigger `ast_25aced9e-554a-4021-9075-7166cf268480` 生成补做块 preview `asp_555e4cb9-b3c4-4e5e-8830-bd271c99e346`;`notification_records.id=4` 为 `sent`,飞书 webhook HTTP 200,响应体 `{"code":0,"data":{},"msg":"success"}`。 + - failed 场景:测试账号 `codex_fail_0430_190101`(user_id=10)配置 `https://www.feishu.cn:81/...` 不可达端口,trigger `ast_cd8b2de9-d836-4470-ad6a-c02c32142274` 生成 preview `asp_e5db98b2-b6bc-4683-8664-ae3d7eb76c25`;`notification_records.id=6` 进入 `failed`,`last_error_code=provider_timeout`,并写入 `next_retry_at`。 + - retry loop 恢复:将 `notification_records.id=6` 对应用户 webhook 改回真实地址并把 `next_retry_at` 调到当前时间,后台 retry loop 自动重试后该记录变为 `sent`,最终 `attempt_count=3`,HTTP 200。 + - dead 场景:测试账号 `codex_dead_runtime_0430_190150`(user_id=11)通过 DB 注入非法 `http://` webhook URL,trigger `ast_fc162833-7223-4aba-89c9-194ecdfbcf40` 生成 preview `asp_731f7cb2-c5dd-4629-83cd-627bec901e30`;`notification_records.id=7` 进入 `dead`,`last_error_code=invalid_url`,`next_retry_at=NULL`。 + - api-only 启动边界:仅启动 API 后,健康检查通过;测试账号 `codex_api_mode_0430_190708`(user_id=12)创建任务 `task_id=87` 并调用正式 trigger,得到 `trigger_id=ast_b48c955f-dcb3-4e87-a296-fd98583e4807`、`status=pending`。等待 6 秒后 DB 确认 `active_schedule_triggers.status=pending`、preview 数为 0、notification 数为 0、outbox id 3008 为 `active_schedule.triggered / pending`,证明 API 模式只写入 outbox,不启动 worker 消费。 + - worker-only 启动边界:仅启动 worker 后,HTTP 健康检查超时,符合“不注册 API 路由”预期;worker 消费 api-only 留下的 outbox id 3008,`active_schedule_triggers.status=preview_generated`,生成 preview `asp_badb4be4-cf2c-4f9b-9719-cbe92f50abed`,`notification_records.id=8` 因该用户未配置 webhook 进入 `skipped`,`notification.feishu.requested` outbox id 3009 为 `consumed`。 下一阶段入口: -1. 第四阶段从 worker 与 notification 开始,不需要重做 dry-run / preview / confirm 主链路。 -2. 重点实现: - - `active_schedule.triggered` worker handler。 - - due job scanner:扫描到期 `active_schedule_jobs`,生成正式 trigger。 - - `notification_records` 状态机与 repo。 - - `notification.feishu.requested` handler。 - - 飞书 mock / webhook provider,通知链接固定为 `/schedule-adjust/{preview_id}`。 - - LLM summary 优先,固定模板 fallback。 - - 通知去重窗口固定 30 分钟,按 `user_id + trigger_type + time_window` 聚合。 -3. 第五阶段再做完整端到端收口: - - `api / worker / all` 三种启动方式。 - - `important_urgent_task` 与 `unfinished_feedback` 两条主触发。 - - notification 成功 / 失败 / 重试。 - - confirm apply 成功、冲突失败、过期拒绝、重复提交幂等。 +1. 下一步继续第五阶段剩余验收,不需要重做 dry-run / preview / confirm 主链路,也不需要重做第四阶段 provider / handler 主体代码。 +2. 第五阶段剩余重点: + - confirm apply 冲突失败、过期拒绝。 + - 更完整的边界清理:测试数据隔离策略、失败注入脚本化、前端真实地址替换 `smartflow.example.com`。 4. 工作区注意: - 另一个前端对话可能在改前端;后端阶段不要碰 `frontend` 相关改动。 - 当前允许单个 Go 文件 700 行以内;超过 700 再评估拆分。 @@ -4321,6 +4351,99 @@ updated_at 4. 若同一 `dedupe_key` 已存在 `pending / sending / sent` 记录,应避免重复创建新通知;如果上一条是 `failed`,可按重试策略推进,而不是新建多条相同飞书。 5. 记录表只负责通知投递状态,不负责 apply 状态;apply 状态仍属于 `active_schedule_previews`。 +### 13.20.1 飞书 Webhook 触发器与多用户配置 + +本轮真实飞书接入不使用群自定义机器人 `msg_type=text/post` 协议,而是使用飞书 Webhook 触发器。后端只负责把“SmartFlow 生成了一条日程调整建议”这个业务事实 POST 给用户配置的 webhook;飞书侧如何私聊、群发、分支、追加查询或调用其它流程,全部由用户在飞书工作流中编排。 + +用户级配置表建议: + +```text +user_notification_channels +- id +- user_id +- channel # feishu_webhook +- enabled +- webhook_url # 用户复制的飞书 Webhook 触发器 URL +- auth_type # none / bearer +- bearer_token # 可选;飞书触发器启用 Bearer Token 时使用 +- last_test_status # success / failed +- last_test_error +- last_test_at +- created_at +- updated_at +``` + +管理接口建议: + +```text +GET /api/v1/notification/channels/feishu +PUT /api/v1/notification/channels/feishu +DELETE /api/v1/notification/channels/feishu +POST /api/v1/notification/channels/feishu/test +``` + +接口语义: + +1. `PUT` 保存当前用户的 webhook 配置;`webhook_url` 必须是 HTTPS URL,域名第一版限制为 `www.feishu.cn` 或 `feishu.cn`。 +2. `GET` 返回当前用户配置状态,`webhook_url / bearer_token` 只允许脱敏回显。 +3. `DELETE` 关闭并软删除当前用户飞书通知配置。 +4. `test` 使用同一套 provider 发送测试 JSON,并把 `last_test_status / last_test_error / last_test_at` 写回配置表。 +5. 未配置或未启用时,真实通知不报错阻断主链路,`notification_records.status` 记为 `skipped`,表示当前用户没有启用飞书触达。 + +发送给飞书 Webhook 触发器的业务 JSON 固定从简: + +```json +{ + "event": "smartflow.schedule_adjustment_ready", + "version": "1", + "notification_id": 123, + "user_id": 5, + "preview_id": "asp_xxx", + "trigger_id": "ast_xxx", + "trigger_type": "important_urgent_task", + "target_type": "task_pool", + "target_id": 81, + "message": { + "title": "SmartFlow 日程调整建议", + "summary": "把重要且紧急任务放入滚动 24 小时内的空闲节次。", + "action_text": "查看并确认调整", + "action_url": "https://smartflow.example.com/schedule-adjust/asp_xxx" + }, + "trace_id": "trace_xxx", + "sent_at": "2026-04-30T17:34:52+08:00" +} +``` + +拼装规则: + +1. `message.title` 固定为 `SmartFlow 日程调整建议`。 +2. `message.summary` 优先使用 preview 的 `notification_summary`,为空时使用 notification fallback 文案。 +3. `message.action_text` 固定为 `查看并确认调整`。 +4. `message.action_url` 使用 `frontend_base_url + target_url`;若 `target_url` 已经是完整 HTTPS URL,则直接使用。 +5. 其它字段只做飞书流程编排、排障和审计,不要求用户流程全部使用。 + +飞书侧推荐消息模板: + +```text +{{message.title}} + +{{message.summary}} {{message.action_text}} +{{message.action_url}} +``` + +真实 provider 状态映射: + +1. HTTP 2xx 且响应体 `code=0` 或响应体为空:视为成功,`notification_records.status=sent`。 +2. 网络错误、超时、HTTP 429、HTTP 5xx:视为临时失败,进入 `failed` 并按现有 retry loop 重试。 +3. URL 非法、未配置、未启用:视为 `skipped`,不重试。 +4. HTTP 401 / 403 或飞书明确返回鉴权失败:视为不可恢复失败,进入 `dead`。 + +安全约束: + +1. webhook URL 本身等同密钥,接口和日志必须脱敏,禁止完整回显。 +2. bearer token 同样禁止完整回显;后续若引入统一密钥加密能力,再把明文存储替换为加密存储。 +3. 测试接口可以暴露成功 / 失败分类,但不能把完整 webhook 或 token 打到响应和日志里。 + ### 13.21 为什么事件契约要提前独立 事件契约可以理解为异步消息世界里的 IDL。Thrift / gRPC 描述同步 RPC 的请求、响应和字段语义;事件契约描述某个业务事实或异步动作的事件名、版本、payload、幂等键和消费语义。 @@ -4459,6 +4582,7 @@ active_schedule_jobs active_schedule_triggers active_schedule_previews notification_records +user_notification_channels outbox / event bus 消费状态 schedule_events schedules @@ -4530,6 +4654,8 @@ tasks | 飞书 provider 返回临时失败 | `notification_records.status=failed`,递增 `attempt_count`,写 `last_error / next_retry_at` | | 重试到达上限或不可恢复错误 | `notification_records.status=dead`,不再自动重试 | | 同一 `user_id + trigger_type + time_window` 内重复通知 | 命中 `dedupe_key`,不重复创建多条待发送通知 | +| 用户未配置或禁用飞书 webhook | `notification_records.status=skipped`,不重试,不影响 preview 查询 | +| 调用飞书 webhook 测试接口 | 写入 / 更新 `user_notification_channels.last_test_status / last_test_at`,飞书流程收到极简 JSON | ### 14.8 confirm apply checklist