Version: 0.9.14.dev.260410

后端： 1. LLM 客户端从 newAgent/llm 提升为 infra/llm 基础设施层 - 删除 backend/newAgent/llm/（ark.go / ark_adapter.go / client.go / json.go） - 等价迁移至 backend/infra/llm/，所有 newAgent node 与 service 统一改引用 infrallm - 消除 newAgent 对模型客户端的私有依赖，为 memory / websearch 等多模块复用铺路 2. RAG 基础设施完成可运行态接入（factory / runtime / observer / service 四层成型） - 新建 backend/infra/rag/factory.go / runtime.go / observe.go / observer.go / service.go：工厂创建、运行时生命周期、轻量观测接口、检索服务门面 - 更新 infra/rag/config/config.go：补齐 Milvus / Embed / Reranker 全部配置项与默认值 - 更新 infra/rag/embed/eino_embedder.go：增强 Eino embedding 适配，支持 BaseURL / APIKey 环境变量 / 超时 / 维度等参数 - 更新 infra/rag/store/milvus_store.go：完整实现 Milvus 向量存储（建集合 / 建 Index / Upsert / Search / Delete），支持 COSINE / L2 / IP 度量 - 更新 infra/rag/core/pipeline.go：适配 Runtime 接口，Pipeline 由 factory 注入而非手动拼装 - 更新 infra/rag/corpus/memory_corpus.go / vector_store.go：对接 Memory 模块数据源与 Store 接口扩展 3. Memory 模块从 Day1 骨架升级为 Day2 完整可运行态 - 新建 memory/module.go：统一门面 Module，对外封装 EnqueueExtract / ReadService / ManageService / WithTx / StartWorker，启动层只依赖这一个入口 - 新建 memory/orchestrator/llm_write_orchestrator.go：LLM 驱动的记忆抽取编排器，替代原 mock 抽取 - 新建 memory/service/read_service.go：按用户开关过滤 + 轻量重排 + 访问时间刷新的读取链路 - 新建 memory/service/manage_service.go：记忆管理面能力（列出 / 软删除 / 开关读写），删除同步写审计日志 - 新建 memory/service/common.go：服务层公共工具 - 新建 memory/worker/loop.go：后台轮询循环 RunPollingLoop，定时抢占 pending 任务并推进 - 新建 memory/utils/audit.go / settings.go：审计日志构造、用户设置过滤等纯函数 - 更新 memory/model/item.go / job.go / settings.go / config.go / status.go：补齐 DTO 字段与状态常量 - 更新 memory/repo/item_repo.go / job_repo.go / audit_repo.go / settings_repo.go：补齐 CRUD 与查询能力 - 更新 memory/worker/runner.go：Runner 对接 Module 与 LLM 抽取器，任务状态机完整化 - 更新 memory/README.md：同步模块现状说明 4. newAgent 接入 Memory 读取注入与工具注册依赖预埋 - 新建 service/agentsvc/agent_memory.go：定义 MemoryReader 接口 + injectMemoryContext，在 graph 执行前统一补充记忆上下文 - 更新 service/agentsvc/agent.go：新增 memoryReader 字段与 SetMemoryReader 方法 - 更新 service/agentsvc/agent_newagent.go：调用 injectMemoryContext 注入 pinned block，检索失败仅降级不阻断主链路 - 更新 newAgent/tools/registry.go：新增 DefaultRegistryDeps（含 RAGRuntime），工具注册表支持依赖注入 5. 启动流程与事件处理器接线更新 - 更新 cmd/start.go：初始化 RAG Runtime → Memory Module → 注册事件处理器 → 启动 Worker 后台轮询 - 更新 service/events/memory_extract_requested.go：改用 memory.Module.WithTx(tx) 统一门面，事件处理器不再直接依赖 repo/service 内部包 6. 缓存插件与配置同步 - 更新 middleware/cache_deleter.go：静默忽略 MemoryJob / MemoryItem / MemoryAuditLog / MemoryUserSetting 等新模型，避免日志刷屏；清理冗余注释 - 更新 config.example.yaml：补齐 rag / memory / websearch 配置段及默认值 - 更新 go.mod / go.sum：新增 eino-ext/openai / json-patch / go-openai 依赖前端：无仓库：无
2026-04-10 23:17:38 +08:00
parent fae162162a
commit bf1f1defa5
53 changed files with 5875 additions and 231 deletions
--- a/backend/infra/rag/runtime.go
+++ b/backend/infra/rag/runtime.go
@@ -0,0 +1,380 @@
+package rag
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
+	"github.com/LoveLosita/smartflow/backend/infra/rag/core"
+	"github.com/LoveLosita/smartflow/backend/infra/rag/corpus"
+)
+
+type runtime struct {
+	cfg          ragconfig.Config
+	pipeline     *core.Pipeline
+	memoryCorpus *corpus.MemoryCorpus
+	webCorpus    *corpus.WebCorpus
+	observer     Observer
+}
+
+func newRuntime(cfg ragconfig.Config, pipeline *core.Pipeline, observer Observer) Runtime {
+	if observer == nil {
+		observer = NewNopObserver()
+	}
+	return &runtime{
+		cfg:          cfg,
+		pipeline:     pipeline,
+		memoryCorpus: corpus.NewMemoryCorpus(),
+		webCorpus:    corpus.NewWebCorpus(),
+		observer:     observer,
+	}
+}
+
+// IngestMemory 统一承接记忆语料入库。
+func (r *runtime) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error) {
+	items := make([]corpus.MemoryIngestItem, 0, len(req.Items))
+	for _, item := range req.Items {
+		items = append(items, corpus.MemoryIngestItem{
+			MemoryID:         item.MemoryID,
+			UserID:           item.UserID,
+			ConversationID:   item.ConversationID,
+			AssistantID:      item.AssistantID,
+			RunID:            item.RunID,
+			MemoryType:       item.MemoryType,
+			Title:            item.Title,
+			Content:          item.Content,
+			Confidence:       item.Confidence,
+			Importance:       item.Importance,
+			SensitivityLevel: item.SensitivityLevel,
+			IsExplicit:       item.IsExplicit,
+			Status:           item.Status,
+			TTLAt:            item.TTLAt,
+			CreatedAt:        item.CreatedAt,
+		})
+	}
+	return r.ingestWithCorpus(ctx, req.TraceID, "memory", r.memoryCorpus, items, req.Action)
+}
+
+// RetrieveMemory 统一承接记忆语料检索。
+func (r *runtime) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error) {
+	corpusInput := corpus.MemoryRetrieveInput{
+		UserID:         req.UserID,
+		ConversationID: req.ConversationID,
+		AssistantID:    req.AssistantID,
+		RunID:          req.RunID,
+	}
+	if len(req.MemoryTypes) == 1 {
+		corpusInput.MemoryType = req.MemoryTypes[0]
+	}
+
+	result, err := r.retrieveWithCorpus(ctx, req.TraceID, "memory", r.memoryCorpus, core.RetrieveRequest{
+		Query:       req.Query,
+		TopK:        normalizeTopK(req.TopK, r.cfg.TopK),
+		Threshold:   normalizeThreshold(req.Threshold, r.cfg.Threshold),
+		Action:      normalizeAction(req.Action, "search"),
+		CorpusInput: corpusInput,
+	})
+	if err != nil {
+		return nil, err
+	}
+	if len(req.MemoryTypes) <= 1 {
+		return result, nil
+	}
+
+	// 1. 当前底层过滤仍以等值条件为主，先保持 Runtime 做多类型二次筛选；
+	// 2. 这样可以避免把 “memory_type in (...)” 的实现细节扩散到所有 Store；
+	// 3. 等后续底层过滤能力统一后，再考虑把该逻辑继续下沉。
+	allowed := make(map[string]struct{}, len(req.MemoryTypes))
+	for _, item := range req.MemoryTypes {
+		value := strings.TrimSpace(strings.ToLower(item))
+		if value == "" {
+			continue
+		}
+		allowed[value] = struct{}{}
+	}
+
+	filtered := make([]RetrieveHit, 0, len(result.Items))
+	for _, item := range result.Items {
+		memoryType := strings.TrimSpace(strings.ToLower(asString(item.Metadata["memory_type"])))
+		if len(allowed) > 0 {
+			if _, ok := allowed[memoryType]; !ok {
+				continue
+			}
+		}
+		filtered = append(filtered, item)
+	}
+	result.Items = filtered
+	if req.TopK > 0 && len(result.Items) > req.TopK {
+		result.Items = result.Items[:req.TopK]
+	}
+	return result, nil
+}
+
+// IngestWeb 统一承接网页语料入库。
+func (r *runtime) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error) {
+	items := make([]corpus.WebIngestItem, 0, len(req.Items))
+	for _, item := range req.Items {
+		items = append(items, corpus.WebIngestItem{
+			URL:         item.URL,
+			Title:       item.Title,
+			Content:     item.Content,
+			Snippet:     item.Snippet,
+			Domain:      item.Domain,
+			QueryID:     item.QueryID,
+			SessionID:   item.SessionID,
+			PublishedAt: item.PublishedAt,
+			FetchedAt:   item.FetchedAt,
+			SourceRank:  item.SourceRank,
+		})
+	}
+	return r.ingestWithCorpus(ctx, req.TraceID, "web", r.webCorpus, items, req.Action)
+}
+
+// RetrieveWeb 统一承接网页语料检索。
+func (r *runtime) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error) {
+	return r.retrieveWithCorpus(ctx, req.TraceID, "web", r.webCorpus, core.RetrieveRequest{
+		Query:     req.Query,
+		TopK:      normalizeTopK(req.TopK, r.cfg.TopK),
+		Threshold: normalizeThreshold(req.Threshold, r.cfg.Threshold),
+		Action:    normalizeAction(req.Action, "search"),
+		CorpusInput: corpus.WebRetrieveInput{
+			QueryID:   req.QueryID,
+			SessionID: req.SessionID,
+			Domain:    req.Domain,
+		},
+	})
+}
+
+func (r *runtime) ingestWithCorpus(
+	ctx context.Context,
+	traceID string,
+	corpusName string,
+	adapter core.CorpusAdapter,
+	input any,
+	action string,
+) (*IngestResult, error) {
+	start := time.Now()
+	if r == nil || r.pipeline == nil || adapter == nil {
+		return nil, core.ErrNilDependency
+	}
+
+	action = normalizeAction(action, "add")
+	observeCtx := newObserveContext(ctx, traceID, corpusName, action)
+
+	docs, err := adapter.BuildIngestDocuments(observeCtx, input)
+	if err != nil {
+		r.observe(observeCtx, ObserveEvent{
+			Level:     ObserveLevelError,
+			Component: "runtime",
+			Operation: "ingest",
+			Fields: map[string]any{
+				"status":      "failed",
+				"latency_ms":  time.Since(start).Milliseconds(),
+				"phase":       "build_documents",
+				"error":       err,
+				"error_code":  core.ClassifyErrorCode(err),
+				"input_count": estimateInputCount(input),
+			},
+		})
+		return nil, err
+	}
+
+	docIDs := make([]string, 0, len(docs))
+	for _, doc := range docs {
+		docIDs = append(docIDs, doc.ID)
+	}
+
+	result, err := r.pipeline.IngestDocuments(observeCtx, adapter.Name(), docs, core.IngestOption{
+		Chunk: core.ChunkOption{
+			ChunkSize:    r.cfg.ChunkSize,
+			ChunkOverlap: r.cfg.ChunkOverlap,
+		},
+		Action: action,
+	})
+	if err != nil {
+		r.observe(observeCtx, ObserveEvent{
+			Level:     ObserveLevelError,
+			Component: "runtime",
+			Operation: "ingest",
+			Fields: map[string]any{
+				"status":         "failed",
+				"latency_ms":     time.Since(start).Milliseconds(),
+				"document_count": len(docs),
+				"error":          err,
+				"error_code":     core.ClassifyErrorCode(err),
+			},
+		})
+		return nil, err
+	}
+
+	r.observe(observeCtx, ObserveEvent{
+		Level:     ObserveLevelInfo,
+		Component: "runtime",
+		Operation: "ingest",
+		Fields: map[string]any{
+			"status":         "success",
+			"latency_ms":     time.Since(start).Milliseconds(),
+			"document_count": result.DocumentCount,
+			"chunk_count":    result.ChunkCount,
+		},
+	})
+	return &IngestResult{
+		DocumentCount: result.DocumentCount,
+		ChunkCount:    result.ChunkCount,
+		DocumentIDs:   docIDs,
+	}, nil
+}
+
+func (r *runtime) retrieveWithCorpus(
+	ctx context.Context,
+	traceID string,
+	corpusName string,
+	adapter core.CorpusAdapter,
+	req core.RetrieveRequest,
+) (*RetrieveResult, error) {
+	start := time.Now()
+	if r == nil || r.pipeline == nil || adapter == nil {
+		return nil, core.ErrNilDependency
+	}
+
+	action := normalizeAction(req.Action, "search")
+	req.Action = action
+	observeCtx := newObserveContext(ctx, traceID, corpusName, action)
+
+	timeoutCtx := observeCtx
+	cancel := func() {}
+	if r.cfg.RetrieveTimeoutMS > 0 {
+		timeoutCtx, cancel = context.WithTimeout(observeCtx, time.Duration(r.cfg.RetrieveTimeoutMS)*time.Millisecond)
+	}
+	defer cancel()
+
+	result, err := r.pipeline.Retrieve(timeoutCtx, adapter, req)
+	if err != nil {
+		r.observe(observeCtx, ObserveEvent{
+			Level:     ObserveLevelError,
+			Component: "runtime",
+			Operation: "retrieve",
+			Fields: map[string]any{
+				"status":     "failed",
+				"latency_ms": time.Since(start).Milliseconds(),
+				"query_len":  len(strings.TrimSpace(req.Query)),
+				"top_k":      req.TopK,
+				"threshold":  req.Threshold,
+				"error":      err,
+				"error_code": core.ClassifyErrorCode(err),
+			},
+		})
+		return nil, err
+	}
+
+	items := make([]RetrieveHit, 0, len(result.Items))
+	for _, item := range result.Items {
+		items = append(items, RetrieveHit{
+			ChunkID:    item.ChunkID,
+			DocumentID: item.DocumentID,
+			Text:       item.Text,
+			Score:      item.Score,
+			Metadata:   cloneMap(item.Metadata),
+		})
+	}
+
+	r.observe(observeCtx, ObserveEvent{
+		Level:     ObserveLevelInfo,
+		Component: "runtime",
+		Operation: "retrieve",
+		Fields: map[string]any{
+			"status":          "success",
+			"latency_ms":      time.Since(start).Milliseconds(),
+			"query_len":       len(strings.TrimSpace(req.Query)),
+			"top_k":           req.TopK,
+			"threshold":       req.Threshold,
+			"raw_count":       result.RawCount,
+			"hit_count":       len(result.Items),
+			"fallback_used":   result.FallbackUsed,
+			"fallback_reason": result.FallbackReason,
+		},
+	})
+	return &RetrieveResult{
+		Items:          items,
+		RawCount:       result.RawCount,
+		FallbackUsed:   result.FallbackUsed,
+		FallbackReason: result.FallbackReason,
+	}, nil
+}
+
+func (r *runtime) observe(ctx context.Context, event ObserveEvent) {
+	if r == nil || r.observer == nil {
+		return
+	}
+	r.observer.Observe(ctx, event)
+}
+
+func newObserveContext(ctx context.Context, traceID string, corpusName string, action string) context.Context {
+	fields := map[string]any{
+		"corpus": corpusName,
+		"action": action,
+	}
+	if traceID = strings.TrimSpace(traceID); traceID != "" {
+		fields["trace_id"] = traceID
+	}
+	return core.WithObserveFields(ctx, fields)
+}
+
+func estimateInputCount(input any) int {
+	switch value := input.(type) {
+	case []corpus.MemoryIngestItem:
+		return len(value)
+	case []corpus.WebIngestItem:
+		return len(value)
+	default:
+		return 0
+	}
+}
+
+func normalizeAction(action string, fallback string) string {
+	action = strings.TrimSpace(action)
+	if action == "" {
+		return fallback
+	}
+	return action
+}
+
+func normalizeTopK(topK int, fallback int) int {
+	if topK > 0 {
+		return topK
+	}
+	if fallback > 0 {
+		return fallback
+	}
+	return 8
+}
+
+func normalizeThreshold(threshold float64, fallback float64) float64 {
+	if threshold >= 0 {
+		return threshold
+	}
+	if fallback >= 0 {
+		return fallback
+	}
+	return 0
+}
+
+func cloneMap(src map[string]any) map[string]any {
+	if len(src) == 0 {
+		return map[string]any{}
+	}
+	dst := make(map[string]any, len(src))
+	for key, value := range src {
+		dst[key] = value
+	}
+	return dst
+}
+
+func asString(v any) string {
+	if v == nil {
+		return ""
+	}
+	return strings.TrimSpace(fmt.Sprintf("%v", v))
+}