Files
smartmate/backend/active_scheduler/job/scanner.go
Losita 0a014f7472 Version: 0.9.60.dev.260430
后端:
1.接入主动调度 worker 与飞书通知链路
- 新增 due job scanner 与 active_schedule.triggered workflow
- 接入 notification.feishu.requested handler、飞书 webhook provider 和用户通知配置接口
- 支持 notification_records 去重、重试、skipped/dead 状态流转
- 完成 api / worker / all 启动模式装配与主动调度验收记录
2.后续要做的就是补全从异常发生到给用户推送消息之间的逻辑缺口
2026-04-30 23:45:27 +08:00

270 lines
8.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package job
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"time"
"github.com/LoveLosita/smartflow/backend/active_scheduler/ports"
activesvc "github.com/LoveLosita/smartflow/backend/active_scheduler/service"
"github.com/LoveLosita/smartflow/backend/active_scheduler/trigger"
"github.com/LoveLosita/smartflow/backend/dao"
"github.com/LoveLosita/smartflow/backend/model"
)
const (
defaultScanLimit = 50
)
// Scanner 扫描到期 active_schedule_jobs 并生成正式 trigger。
//
// 职责边界:
// 1. 只负责 due job -> trigger不执行 dry-run、不写 preview、不发 notification
// 2. 扫描时必须重读 task 与 schedule 真值,避免过期 job 误触发;
// 3. 对已完成、已排入日程或不再符合条件的 job只更新 job 状态,不物理删除。
type Scanner struct {
activeDAO *dao.ActiveScheduleDAO
taskReader ports.TaskReader
scheduleReader ports.ScheduleReader
triggerService *activesvc.TriggerService
clock func() time.Time
limit int
scanEvery time.Duration
}
type ScannerOptions struct {
Limit int
ScanEvery time.Duration
Clock func() time.Time
}
type ScanResult struct {
Scanned int
Triggered int
Skipped int
Failed int
}
func NewScanner(activeDAO *dao.ActiveScheduleDAO, readers ports.Readers, triggerService *activesvc.TriggerService, options ScannerOptions) (*Scanner, error) {
if activeDAO == nil {
return nil, errors.New("active schedule dao 不能为空")
}
if readers.TaskReader == nil {
return nil, errors.New("TaskReader 不能为空")
}
if readers.ScheduleReader == nil {
return nil, errors.New("ScheduleReader 不能为空")
}
if triggerService == nil {
return nil, errors.New("trigger service 不能为空")
}
limit := options.Limit
if limit <= 0 {
limit = defaultScanLimit
}
scanEvery := options.ScanEvery
if scanEvery <= 0 {
scanEvery = time.Minute
}
clock := options.Clock
if clock == nil {
clock = time.Now
}
return &Scanner{
activeDAO: activeDAO,
taskReader: readers.TaskReader,
scheduleReader: readers.ScheduleReader,
triggerService: triggerService,
clock: clock,
limit: limit,
scanEvery: scanEvery,
}, nil
}
// Start 启动 due job 周期扫描。
//
// 说明:
// 1. worker/all 模式调用api 模式不启动,避免 API 进程承担后台职责;
// 2. 每轮扫描失败只记录日志,下一轮继续;
// 3. ctx 取消后 goroutine 自然退出。
func (s *Scanner) Start(ctx context.Context) {
if s == nil {
return
}
go func() {
ticker := time.NewTicker(s.scanEvery)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
result, err := s.ScanDue(ctx, s.now())
if err != nil {
log.Printf("主动调度 due job 扫描失败: err=%v", err)
continue
}
if result.Scanned > 0 {
log.Printf("主动调度 due job 扫描完成: scanned=%d triggered=%d skipped=%d failed=%d", result.Scanned, result.Triggered, result.Skipped, result.Failed)
}
}
}
}()
}
// ScanDue 扫描并处理一批到期 job。
func (s *Scanner) ScanDue(ctx context.Context, now time.Time) (ScanResult, error) {
if s == nil || s.activeDAO == nil {
return ScanResult{}, errors.New("scanner 未初始化")
}
jobs, err := s.activeDAO.ListDueJobs(ctx, now, s.limit)
if err != nil {
return ScanResult{}, err
}
result := ScanResult{Scanned: len(jobs)}
for _, item := range jobs {
handled, handleErr := s.processJob(ctx, item, now)
switch {
case handleErr != nil:
result.Failed++
log.Printf("主动调度 due job 处理失败: job_id=%s err=%v", item.ID, handleErr)
case handled == model.ActiveScheduleJobStatusTriggered:
result.Triggered++
default:
result.Skipped++
}
}
return result, nil
}
func (s *Scanner) processJob(ctx context.Context, item model.ActiveScheduleJob, now time.Time) (string, error) {
task, found, err := s.taskReader.GetTaskForActiveSchedule(ctx, ports.TaskRequest{
UserID: item.UserID,
TaskID: item.TaskID,
Now: now,
})
if err != nil {
_ = s.markJobFailed(ctx, item.ID, "task_read_failed", err, now)
return "", err
}
if !found {
return model.ActiveScheduleJobStatusSkipped, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusSkipped, "task_not_found", now)
}
if task.IsCompleted {
return model.ActiveScheduleJobStatusCanceled, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusCanceled, "task_completed", now)
}
if task.UrgencyThresholdAt == nil {
// 1. 到期扫描必须重读 task 真值。
// 2. 若上游已经移除了 urgency_threshold_at说明这条 due job 已经不再具备触发前提。
// 3. 这里直接收敛为 canceled避免继续错误地产生 trigger。
return model.ActiveScheduleJobStatusCanceled, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusCanceled, "task_not_schedulable", now)
}
if task.UrgencyThresholdAt != nil && task.UrgencyThresholdAt.After(now) {
return model.ActiveScheduleJobStatusPending, s.activeDAO.UpdateJobFields(ctx, item.ID, map[string]any{
"trigger_at": *task.UrgencyThresholdAt,
"last_error_code": "threshold_moved_future",
"last_scanned_at": &now,
})
}
if task.Priority != 1 && task.Priority != 2 {
return model.ActiveScheduleJobStatusSkipped, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusSkipped, "task_not_important", now)
}
alreadyScheduled, err := s.isTaskAlreadyScheduled(ctx, item.UserID, item.TaskID, now)
if err != nil {
_ = s.markJobFailed(ctx, item.ID, "schedule_read_failed", err, now)
return "", err
}
if alreadyScheduled {
return model.ActiveScheduleJobStatusSkipped, s.markJobSkipped(ctx, item.ID, model.ActiveScheduleJobStatusSkipped, "task_already_scheduled", now)
}
payload := struct {
JobID string `json:"job_id"`
UrgencyThresholdAt time.Time `json:"urgency_threshold_at"`
}{
JobID: item.ID,
UrgencyThresholdAt: item.TriggerAt,
}
rawPayload, _ := json.Marshal(payload)
jobID := item.ID
resp, err := s.triggerService.CreateAndPublish(ctx, activesvc.TriggerRequest{
UserID: item.UserID,
TriggerType: trigger.TriggerTypeImportantUrgentTask,
Source: trigger.SourceWorkerDueJob,
TargetType: trigger.TargetTypeTaskPool,
TargetID: item.TaskID,
DedupeKey: item.DedupeKey,
RequestedAt: now,
Payload: rawPayload,
JobID: &jobID,
TraceID: firstNonEmpty(item.TraceID, fmt.Sprintf("trace_active_job_%s", item.ID)),
})
if err != nil {
_ = s.markJobFailed(ctx, item.ID, "trigger_publish_failed", err, now)
return "", err
}
return model.ActiveScheduleJobStatusTriggered, s.activeDAO.UpdateJobFields(ctx, item.ID, map[string]any{
"status": model.ActiveScheduleJobStatusTriggered,
"last_trigger_id": &resp.TriggerID,
"last_error_code": nil,
"last_error": nil,
"last_scanned_at": &now,
})
}
func (s *Scanner) isTaskAlreadyScheduled(ctx context.Context, userID int, taskID int, now time.Time) (bool, error) {
facts, err := s.scheduleReader.GetScheduleFactsByWindow(ctx, ports.ScheduleWindowRequest{
UserID: userID,
TargetType: string(trigger.TargetTypeTaskPool),
TargetID: taskID,
WindowStart: now,
WindowEnd: now.Add(24 * time.Hour),
Now: now,
})
if err != nil {
return false, err
}
return facts.TargetAlreadyScheduled, nil
}
func (s *Scanner) markJobSkipped(ctx context.Context, jobID string, status string, code string, now time.Time) error {
return s.activeDAO.UpdateJobFields(ctx, jobID, map[string]any{
"status": status,
"last_error_code": code,
"last_error": nil,
"last_scanned_at": &now,
})
}
func (s *Scanner) markJobFailed(ctx context.Context, jobID string, code string, err error, now time.Time) error {
message := ""
if err != nil {
message = err.Error()
}
return s.activeDAO.UpdateJobFields(ctx, jobID, map[string]any{
"status": model.ActiveScheduleJobStatusFailed,
"last_error_code": code,
"last_error": &message,
"last_scanned_at": &now,
})
}
func (s *Scanner) now() time.Time {
if s == nil || s.clock == nil {
return time.Now()
}
return s.clock()
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if value != "" {
return value
}
}
return ""
}