后端: 1. 阶段 3 notification 服务边界落地,新增 `cmd/notification`、`services/notification`、`gateway/notification`、`shared/contracts/notification` 和 notification port,按 userauth 同款最小手搓 zrpc 样板收口 2. notification outbox consumer、relay 和 retry loop 迁入独立服务入口,处理 `notification.feishu.requested`,gateway 改为通过 zrpc client 调用 notification 3. 清退旧单体 notification DAO/model/service/provider/runner 和 `service/events/notification_feishu.go`,旧实现不再作为活跃编译路径 4. 修复 outbox 路由归属、dispatch 启动扫描、Kafka topic 探测/投递超时、sending 租约恢复、毒消息 MarkDead 错误回传和 RPC timeout 边界 5. 同步调整 active-scheduler 触发通知事件、核心 outbox handler、MySQL 迁移边界和 notification 配置 文档: 1. 更新微服务迁移计划,将阶段 3 notification 标记为已完成,并明确下一阶段从 active-scheduler 开始
84 lines
2.3 KiB
Go
84 lines
2.3 KiB
Go
package kafka
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"fmt"
|
||
"time"
|
||
|
||
segmentkafka "github.com/segmentio/kafka-go"
|
||
)
|
||
|
||
// WaitTopicReady 在指定超时时间内等待 Kafka topic 可用。
|
||
// 背景:初次部署时 broker 可能已启动,但 topic/partition 还没就绪。
|
||
// 这里启动前先探测,可减少“应用已启动但实际无法消费”的静默窗口。
|
||
func WaitTopicReady(parent context.Context, brokers []string, topic string, timeout time.Duration) error {
|
||
if len(brokers) == 0 {
|
||
return errors.New("kafka brokers is empty")
|
||
}
|
||
if topic == "" {
|
||
return errors.New("kafka topic is empty")
|
||
}
|
||
if timeout <= 0 {
|
||
timeout = 30 * time.Second
|
||
}
|
||
|
||
ctx, cancel := context.WithTimeout(parent, timeout)
|
||
defer cancel()
|
||
|
||
ticker := time.NewTicker(1 * time.Second)
|
||
defer ticker.Stop()
|
||
|
||
var lastErr error
|
||
for {
|
||
if err := probeTopic(ctx, brokers, topic); err == nil {
|
||
return nil
|
||
} else {
|
||
lastErr = err
|
||
}
|
||
|
||
select {
|
||
case <-ctx.Done():
|
||
if lastErr != nil {
|
||
return fmt.Errorf("wait topic ready timeout, topic=%s: %w", topic, lastErr)
|
||
}
|
||
return fmt.Errorf("wait topic ready timeout, topic=%s", topic)
|
||
case <-ticker.C:
|
||
}
|
||
}
|
||
}
|
||
|
||
// probeTopic 轮询所有 broker,只要任一 broker 能读到 topic 分区信息即视为就绪。
|
||
func probeTopic(ctx context.Context, brokers []string, topic string) error {
|
||
var lastErr error
|
||
for _, broker := range brokers {
|
||
conn, err := segmentkafka.DialContext(ctx, "tcp", broker)
|
||
if err != nil {
|
||
lastErr = err
|
||
continue
|
||
}
|
||
|
||
// 1. segmentio/kafka-go 的 ReadPartitions 不直接接收 context。
|
||
// 2. 这里必须给底层连接设置 I/O deadline,避免 broker 已接受连接但 metadata 响应卡住时,
|
||
// 上层 WaitTopicReady 永远阻塞,导致 outbox dispatch / consume 循环无法启动。
|
||
// 3. deadline 命中后本轮探测失败,外层 ticker 会继续重试直到总 timeout 到期。
|
||
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
|
||
partitions, readErr := conn.ReadPartitions(topic)
|
||
_ = conn.Close()
|
||
if readErr != nil {
|
||
lastErr = readErr
|
||
continue
|
||
}
|
||
if len(partitions) == 0 {
|
||
lastErr = fmt.Errorf("topic %s has no partitions yet", topic)
|
||
continue
|
||
}
|
||
return nil
|
||
}
|
||
|
||
if lastErr != nil {
|
||
return lastErr
|
||
}
|
||
return errors.New("unable to probe topic readiness")
|
||
}
|