Files
smartmate/backend/infra/rag/core/pipeline.go
Losita bf1f1defa5 Version: 0.9.14.dev.260410
后端:
  1. LLM 客户端从 newAgent/llm 提升为 infra/llm 基础设施层
     - 删除 backend/newAgent/llm/(ark.go / ark_adapter.go / client.go / json.go)
     - 等价迁移至 backend/infra/llm/,所有 newAgent node 与 service 统一改引用 infrallm
     - 消除 newAgent 对模型客户端的私有依赖,为 memory / websearch 等多模块复用铺路
  2. RAG 基础设施完成可运行态接入(factory / runtime / observer / service 四层成型)
     - 新建 backend/infra/rag/factory.go / runtime.go / observe.go / observer.go /
  service.go:工厂创建、运行时生命周期、轻量观测接口、检索服务门面
     - 更新 infra/rag/config/config.go:补齐 Milvus / Embed / Reranker 全部配置项与默认值
     - 更新 infra/rag/embed/eino_embedder.go:增强 Eino embedding 适配,支持 BaseURL / APIKey 环境变量 / 超时 /
  维度等参数
     - 更新 infra/rag/store/milvus_store.go:完整实现 Milvus 向量存储(建集合 / 建 Index / Upsert / Search /
  Delete),支持 COSINE / L2 / IP 度量
     - 更新 infra/rag/core/pipeline.go:适配 Runtime 接口,Pipeline 由 factory 注入而非手动拼装
     - 更新 infra/rag/corpus/memory_corpus.go / vector_store.go:对接 Memory 模块数据源与 Store 接口扩展
  3. Memory 模块从 Day1 骨架升级为 Day2 完整可运行态
     - 新建 memory/module.go:统一门面 Module,对外封装 EnqueueExtract / ReadService / ManageService / WithTx /
  StartWorker,启动层只依赖这一个入口
     - 新建 memory/orchestrator/llm_write_orchestrator.go:LLM 驱动的记忆抽取编排器,替代原 mock 抽取
     - 新建 memory/service/read_service.go:按用户开关过滤 + 轻量重排 + 访问时间刷新的读取链路
     - 新建 memory/service/manage_service.go:记忆管理面能力(列出 / 软删除 / 开关读写),删除同步写审计日志
     - 新建 memory/service/common.go:服务层公共工具
     - 新建 memory/worker/loop.go:后台轮询循环 RunPollingLoop,定时抢占 pending 任务并推进
     - 新建 memory/utils/audit.go / settings.go:审计日志构造、用户设置过滤等纯函数
     - 更新 memory/model/item.go / job.go / settings.go / config.go / status.go:补齐 DTO 字段与状态常量
     - 更新 memory/repo/item_repo.go / job_repo.go / audit_repo.go / settings_repo.go:补齐 CRUD 与查询能力
     - 更新 memory/worker/runner.go:Runner 对接 Module 与 LLM 抽取器,任务状态机完整化
     - 更新 memory/README.md:同步模块现状说明
  4. newAgent 接入 Memory 读取注入与工具注册依赖预埋
     - 新建 service/agentsvc/agent_memory.go:定义 MemoryReader 接口 + injectMemoryContext,在 graph
  执行前统一补充记忆上下文
     - 更新 service/agentsvc/agent.go:新增 memoryReader 字段与 SetMemoryReader 方法
     - 更新 service/agentsvc/agent_newagent.go:调用 injectMemoryContext 注入 pinned block,检索失败仅降级不阻断主链路
     - 更新 newAgent/tools/registry.go:新增 DefaultRegistryDeps(含 RAGRuntime),工具注册表支持依赖注入
  5. 启动流程与事件处理器接线更新
     - 更新 cmd/start.go:初始化 RAG Runtime → Memory Module → 注册事件处理器 → 启动 Worker 后台轮询
     - 更新 service/events/memory_extract_requested.go:改用 memory.Module.WithTx(tx) 统一门面,事件处理器不再直接依赖
  repo/service 内部包
  6. 缓存插件与配置同步
     - 更新 middleware/cache_deleter.go:静默忽略 MemoryJob / MemoryItem / MemoryAuditLog / MemoryUserSetting
  等新模型,避免日志刷屏;清理冗余注释
     - 更新 config.example.yaml:补齐 rag / memory / websearch 配置段及默认值
     - 更新 go.mod / go.sum:新增 eino-ext/openai / json-patch / go-openai 依赖
  前端:无 仓库:无
2026-04-10 23:17:38 +08:00

319 lines
7.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package core
import (
"context"
"errors"
"fmt"
"log"
"strings"
"time"
)
const (
defaultTopK = 8
defaultThreshold = 0
defaultChunkSize = 400
defaultChunkOvLap = 80
)
// Pipeline 是 RAG Core 编排器。
//
// 职责边界:
// 1. 负责统一 chunk/embed/retrieve/rerank 流程;
// 2. 负责失败降级语义;
// 3. 不承载任何具体业务语义(由 CorpusAdapter 提供)。
type Pipeline struct {
chunker Chunker
embedder Embedder
store VectorStore
reranker Reranker
logger *log.Logger
observer Observer
}
func NewPipeline(chunker Chunker, embedder Embedder, store VectorStore, reranker Reranker) *Pipeline {
return &Pipeline{
chunker: chunker,
embedder: embedder,
store: store,
reranker: reranker,
logger: log.Default(),
observer: NewNopObserver(),
}
}
// SetLogger 设置 Pipeline 使用的日志器。
func (p *Pipeline) SetLogger(logger *log.Logger) {
if p == nil || logger == nil {
return
}
p.logger = logger
}
// SetObserver 设置 Pipeline 使用的统一观测器。
func (p *Pipeline) SetObserver(observer Observer) {
if p == nil || observer == nil {
return
}
p.observer = observer
}
// Ingest 执行统一入库流程。
//
// 步骤化说明:
// 1. 先由 CorpusAdapter 生成统一文档,确保不同语料入口一致;
// 2. 再统一切块与向量化,避免业务侧重复实现;
// 3. 最后一次性 Upsert失败直接返回交由上层决定是否重试。
func (p *Pipeline) Ingest(
ctx context.Context,
corpus CorpusAdapter,
input any,
opt IngestOption,
) (*IngestResult, error) {
if p == nil || p.chunker == nil || p.embedder == nil || p.store == nil {
return nil, ErrNilDependency
}
if corpus == nil {
return nil, errors.New("nil corpus adapter")
}
docs, err := corpus.BuildIngestDocuments(ctx, input)
if err != nil {
return nil, err
}
return p.IngestDocuments(ctx, corpus.Name(), docs, opt)
}
// IngestDocuments 执行“已标准化文档”的统一入库流程。
//
// 职责边界:
// 1. 负责处理已经完成 CorpusAdapter 映射的标准文档;
// 2. 负责统一切块、向量化与 Upsert
// 3. 不负责再做业务输入解析,避免 Runtime 为拿到 document_id 重复 build 文档。
func (p *Pipeline) IngestDocuments(
ctx context.Context,
corpusName string,
docs []SourceDocument,
opt IngestOption,
) (*IngestResult, error) {
if p == nil || p.chunker == nil || p.embedder == nil || p.store == nil {
return nil, ErrNilDependency
}
if len(docs) == 0 {
return &IngestResult{DocumentCount: 0, ChunkCount: 0}, nil
}
chunkOpt := normalizeChunkOption(opt.Chunk)
chunks := make([]Chunk, 0, len(docs)*2)
for _, doc := range docs {
// 1. 对每个文档独立切块,失败直接中断,避免写入半成品。
docChunks, chunkErr := p.chunker.Chunk(ctx, doc, chunkOpt)
if chunkErr != nil {
return nil, chunkErr
}
chunks = append(chunks, docChunks...)
}
if len(chunks) == 0 {
return &IngestResult{DocumentCount: len(docs), ChunkCount: 0}, nil
}
texts := make([]string, 0, len(chunks))
for _, chunk := range chunks {
texts = append(texts, chunk.Text)
}
action := strings.TrimSpace(opt.Action)
if action == "" {
action = "add"
}
vectors, err := p.embedder.Embed(ctx, texts, action)
if err != nil {
return nil, err
}
if len(vectors) != len(chunks) {
return nil, fmt.Errorf("embedding result length mismatch: chunks=%d vectors=%d", len(chunks), len(vectors))
}
rows := make([]VectorRow, 0, len(chunks))
now := time.Now()
for i, chunk := range chunks {
metadata := cloneMap(chunk.Metadata)
metadata["corpus"] = corpusName
metadata["document_id"] = chunk.DocumentID
metadata["chunk_order"] = chunk.Order
rows = append(rows, VectorRow{
ID: chunk.ID,
Vector: vectors[i],
Text: chunk.Text,
Metadata: metadata,
CreatedAt: now,
UpdatedAt: now,
})
}
if err = p.store.Upsert(ctx, rows); err != nil {
return nil, err
}
return &IngestResult{
DocumentCount: len(docs),
ChunkCount: len(chunks),
}, nil
}
// Retrieve 执行统一检索流程。
//
// 步骤化说明:
// 1. 先做 query 向量化与向量检索;
// 2. 再执行阈值过滤,减少低质量候选;
// 3. 最后可选 rerank若失败则降级回原排序并打日志。
func (p *Pipeline) Retrieve(
ctx context.Context,
corpus CorpusAdapter,
req RetrieveRequest,
) (*RetrieveResult, error) {
if p == nil || p.embedder == nil || p.store == nil {
return nil, ErrNilDependency
}
query := strings.TrimSpace(req.Query)
if query == "" {
return nil, ErrInvalidQuery
}
topK := req.TopK
if topK <= 0 {
topK = defaultTopK
}
threshold := req.Threshold
if threshold < 0 {
threshold = defaultThreshold
}
filter := cloneMap(req.Filter)
if corpus != nil {
// 1. 先拼接 corpus 过滤条件,避免跨语料串召回。
corpusFilter, err := corpus.BuildRetrieveFilter(ctx, req.CorpusInput)
if err != nil {
return nil, err
}
filter = mergeMap(filter, corpusFilter)
filter["corpus"] = corpus.Name()
}
action := strings.TrimSpace(req.Action)
if action == "" {
action = "search"
}
vectors, err := p.embedder.Embed(ctx, []string{query}, action)
if err != nil {
return nil, err
}
if len(vectors) != 1 {
return nil, fmt.Errorf("embedding query length mismatch: %d", len(vectors))
}
scoredRows, err := p.store.Search(ctx, VectorSearchRequest{
QueryVector: vectors[0],
TopK: topK,
Filter: filter,
})
if err != nil {
return nil, err
}
rawCount := len(scoredRows)
candidates := make([]ScoredChunk, 0, len(scoredRows))
for _, row := range scoredRows {
if row.Score < threshold {
continue
}
candidates = append(candidates, ScoredChunk{
ChunkID: row.Row.ID,
DocumentID: asString(row.Row.Metadata["document_id"]),
Text: row.Row.Text,
Score: row.Score,
Metadata: cloneMap(row.Row.Metadata),
})
}
result := &RetrieveResult{
Items: candidates,
RawCount: rawCount,
FallbackUsed: false,
}
if len(candidates) == 0 || p.reranker == nil {
return result, nil
}
reranked, rerankErr := p.reranker.Rerank(ctx, query, candidates, topK)
if rerankErr != nil {
// 2. rerank 异常不终止主流程,统一降级为原排序。
result.FallbackUsed = true
result.FallbackReason = FallbackReasonRerankFailed
if p.observer != nil {
p.observer.Observe(ctx, ObserveEvent{
Level: ObserveLevelWarn,
Component: "pipeline",
Operation: "rerank_fallback",
Fields: map[string]any{
"status": "fallback",
"fallback_reason": FallbackReasonRerankFailed,
"candidate_count": len(candidates),
"top_k": topK,
"error": rerankErr,
"error_code": ClassifyErrorCode(rerankErr),
},
})
} else if p.logger != nil {
p.logger.Printf("rag rerank fallback: reason=%s err=%v", FallbackReasonRerankFailed, rerankErr)
}
return result, nil
}
result.Items = reranked
return result, nil
}
func normalizeChunkOption(opt ChunkOption) ChunkOption {
if opt.ChunkSize <= 0 {
opt.ChunkSize = defaultChunkSize
}
if opt.ChunkOverlap < 0 {
opt.ChunkOverlap = 0
}
if opt.ChunkOverlap >= opt.ChunkSize {
opt.ChunkOverlap = defaultChunkOvLap
if opt.ChunkOverlap >= opt.ChunkSize {
opt.ChunkOverlap = opt.ChunkSize / 5
}
}
return opt
}
func cloneMap(src map[string]any) map[string]any {
if len(src) == 0 {
return map[string]any{}
}
dst := make(map[string]any, len(src))
for key, value := range src {
dst[key] = value
}
return dst
}
func mergeMap(base map[string]any, ext map[string]any) map[string]any {
if base == nil {
base = map[string]any{}
}
for key, value := range ext {
base[key] = value
}
return base
}
func asString(v any) string {
if v == nil {
return ""
}
return fmt.Sprintf("%v", v)
}