Version: 0.9.16.dev.260413

后端： 1. RAG embedding 接入修正，并兼容 Ark 多模态 embedding 链路 - 更新 backend/infra/rag/embed/eino_embedder.go：文本 embedding 继续走 Eino OpenAI 兼容链路；`doubao-embedding-vision-*` 模型切到 Ark 原生 `/embeddings/multimodal` - 增加 embedding baseURL 归一化：兼容把 `.../embeddings` 或 `.../embeddings/multimodal` 误填进配置的情况，统一回退到 `/api/v3` - 为第三方 embedding 调用增加 panic recover，避免向量检索/写入异常直接打崩主进程 2. RAG runtime / pipeline / store 稳定性加固，统一降级为 error 语义 - 更新 backend/infra/rag/runtime.go：runtime 对外入口增加 panic recover 与观测打点 - 更新 backend/infra/rag/core/pipeline.go：ingest / retrieve 编排边界增加 panic recover - 更新 backend/infra/rag/retrieve/vector_retriever.go：向量检索边界补充 panic recover - 更新 backend/infra/rag/store/milvus_store.go、backend/infra/rag/store/inmemory_store.go：补齐未初始化保护，避免 nil 依赖直接异常退出 3. RAG embedding 配置口径与普通 LLM 链路对齐 - 更新 backend/infra/rag/factory.go：RAG embedding API Key 不再走 `apiKeyEnv` 间接映射，统一直接读取 `ARK_API_KEY` - 更新 backend/infra/rag/config/config.go：删除 `rag.embed.apiKeyEnv` 配置字段，收敛配置分叉 - 更新 backend/config.example.yaml：示例配置切到当前联调口径，保持 `rag.enabled=true`、`memory.rag.enabled=true`，并对齐 Milvus / embed 配置 4. Memory + RAG 联调链路可运行态修正 - 当前已验证 memory 抽取写库、RAG ingest 写入 Milvus、后续语义召回链路可继续联调 - 检索失败场景已从“直接 panic”收敛为“记录日志并降级”，不再阻断主聊天链路前端：无仓库：无 undo： 1. 增删改查的 mysql 记忆去重没实现 2. 提取用户话为记忆的过滤机制不足，有点无脑 3. RAG 召回也有问题
2026-04-13 23:18:59 +08:00
parent 070d4c3459
commit 863cba4e4e
9 changed files with 297 additions and 53 deletions
--- a/backend/infra/rag/embed/eino_embedder.go
+++ b/backend/infra/rag/embed/eino_embedder.go
@@ -3,11 +3,15 @@ package embed
 import (
 	"context"
 	"errors"
+	"fmt"
+	"net/http"
 	"strings"
 	"time"

 	openaiembedding "github.com/cloudwego/eino-ext/libs/acl/openai"
 	einoembedding "github.com/cloudwego/eino/components/embedding"
+	"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
+	arkmodel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
 )

 // EinoConfig 描述 Eino embedding 运行参数。
@@ -22,14 +26,15 @@ type EinoConfig struct {
 // EinoEmbedder 是基于 Eino 的 embedding 适配器。
 //
 // 说明：
-// 1. 对 infra/rag 暴露统一 []float32 结果，屏蔽 Eino/OpenAI 兼容实现细节；
-// 2. 超时由该适配器自身收口，避免业务侧每次调用都手写超时控制；
-// 3. 当前底层走 Eino Ext 的 OpenAI 兼容 embedding client，便于接 Ark/OpenAI 兼容接口。
+// 1. 对 infra/rag 暴露统一 []float32 结果，屏蔽底层 SDK 的实现差异。
+// 2. 文本 embedding 继续走当前稳定的 OpenAI 兼容链路，避免无关模型受影响。
+// 3. 多模态 embedding 模型单独走 Ark 原生 `/embeddings/multimodal`，解决 vision 模型与标准 `/embeddings` 不兼容的问题。
 type EinoEmbedder struct {
-	client    einoembedding.Embedder
-	model     string
-	timeout   time.Duration
-	dimension int
+	textClient       einoembedding.Embedder
+	multimodalClient *arkruntime.Client
+	model            string
+	timeout          time.Duration
+	dimension        int
 }

 func NewEinoEmbedder(ctx context.Context, cfg EinoConfig) (*EinoEmbedder, error) {
@@ -40,10 +45,42 @@ func NewEinoEmbedder(ctx context.Context, cfg EinoConfig) (*EinoEmbedder, error)
 		return nil, errors.New("eino embedder model is empty")
 	}

+	timeout := 1200 * time.Millisecond
+	if cfg.TimeoutMS > 0 {
+		timeout = time.Duration(cfg.TimeoutMS) * time.Millisecond
+	}
+
+	baseURL := normalizeEmbeddingBaseURL(cfg.BaseURL)
+	model := strings.TrimSpace(cfg.Model)
+	httpClient := &http.Client{Timeout: timeout}
+
+	// 1. `doubao-embedding-vision-*` 这类模型不支持标准 `/embeddings`。
+	// 2. 这里直接切到 Ark 原生多模态 embedding API，避免再依赖错误 endpoint 拼接。
+	// 3. 之所以仍保留文本链路，是为了不影响普通 text embedding 模型的既有行为。
+	if isMultimodalEmbeddingModel(model) {
+		arkOptions := []arkruntime.ConfigOption{
+			arkruntime.WithHTTPClient(httpClient),
+		}
+		if baseURL != "" {
+			arkOptions = append(arkOptions, arkruntime.WithBaseUrl(baseURL))
+		}
+
+		return &EinoEmbedder{
+			multimodalClient: arkruntime.NewClientWithApiKey(
+				strings.TrimSpace(cfg.APIKey),
+				arkOptions...,
+			),
+			model:     model,
+			timeout:   timeout,
+			dimension: cfg.Dimension,
+		}, nil
+	}
+
 	clientCfg := &openaiembedding.EmbeddingConfig{
-		APIKey:  strings.TrimSpace(cfg.APIKey),
-		BaseURL: strings.TrimSpace(cfg.BaseURL),
-		Model:   strings.TrimSpace(cfg.Model),
+		APIKey:     strings.TrimSpace(cfg.APIKey),
+		BaseURL:    baseURL,
+		Model:      model,
+		HTTPClient: httpClient,
 	}
 	if cfg.Dimension > 0 {
 		clientCfg.Dimensions = &cfg.Dimension
@@ -54,21 +91,16 @@ func NewEinoEmbedder(ctx context.Context, cfg EinoConfig) (*EinoEmbedder, error)
 		return nil, err
 	}

-	timeout := 1200 * time.Millisecond
-	if cfg.TimeoutMS > 0 {
-		timeout = time.Duration(cfg.TimeoutMS) * time.Millisecond
-	}
-
 	return &EinoEmbedder{
-		client:    client,
-		model:     strings.TrimSpace(cfg.Model),
-		timeout:   timeout,
-		dimension: cfg.Dimension,
+		textClient: client,
+		model:      model,
+		timeout:    timeout,
+		dimension:  cfg.Dimension,
 	}, nil
 }

-func (e *EinoEmbedder) Embed(ctx context.Context, texts []string, _ string) ([][]float32, error) {
-	if e == nil || e.client == nil {
+func (e *EinoEmbedder) Embed(ctx context.Context, texts []string, _ string) (result [][]float32, err error) {
+	if e == nil {
 		return nil, errors.New("eino embedder is not initialized")
 	}
 	if len(texts) == 0 {
@@ -82,12 +114,29 @@ func (e *EinoEmbedder) Embed(ctx context.Context, texts []string, _ string) ([][
 	}
 	defer cancel()

-	vectors, err := e.client.EmbedStrings(callCtx, texts, einoembedding.WithModel(e.model))
+	// 1. 第三方 SDK 一旦 panic，不应该穿透到 RAG 主链路。
+	// 2. 这里统一在模型调用边界 recover，并转成 error 交给上层做降级。
+	// 3. 这样 memory 主写链路和 agent 主回复链路都不会因为向量同步失败被直接打崩。
+	defer func() {
+		if recovered := recover(); recovered != nil {
+			err = fmt.Errorf("eino embedder panic recovered: %v", recovered)
+			result = nil
+		}
+	}()
+
+	if e.multimodalClient != nil {
+		return e.embedTextsWithMultimodalAPI(callCtx, texts)
+	}
+	if e.textClient == nil {
+		return nil, errors.New("eino embedder client is not initialized")
+	}
+
+	vectors, err := e.textClient.EmbedStrings(callCtx, texts, einoembedding.WithModel(e.model))
 	if err != nil {
 		return nil, err
 	}

-	result := make([][]float32, 0, len(vectors))
+	result = make([][]float32, 0, len(vectors))
 	for _, vector := range vectors {
 		converted := make([]float32, len(vector))
 		for i, value := range vector {
@@ -97,3 +146,63 @@ func (e *EinoEmbedder) Embed(ctx context.Context, texts []string, _ string) ([][
 	}
 	return result, nil
 }
+
+func (e *EinoEmbedder) embedTextsWithMultimodalAPI(ctx context.Context, texts []string) ([][]float32, error) {
+	if e.multimodalClient == nil {
+		return nil, errors.New("eino multimodal embedder client is not initialized")
+	}
+
+	vectors := make([][]float32, 0, len(texts))
+	for _, text := range texts {
+		text := text
+		req := arkmodel.MultiModalEmbeddingRequest{
+			Model: e.model,
+			Input: []arkmodel.MultimodalEmbeddingInput{
+				{
+					Type: arkmodel.MultiModalEmbeddingInputTypeText,
+					Text: &text,
+				},
+			},
+		}
+		if e.dimension > 0 {
+			req.Dimensions = &e.dimension
+		}
+
+		// 1. Ark 的多模态 embedding 请求体是“单条内容由多个 part 组成”。
+		// 2. 当前 RAG 这里只传文本，因此每段文本单独发一次，避免把多段文本错误拼成同一个 multimodal sample。
+		// 3. 一旦后续真的要做批量多模态 embedding，再单独扩展 batch 接口，而不是在这里偷改语义。
+		resp, err := e.multimodalClient.CreateMultiModalEmbeddings(ctx, req)
+		if err != nil {
+			return nil, err
+		}
+
+		converted := make([]float32, len(resp.Data.Embedding))
+		copy(converted, resp.Data.Embedding)
+		vectors = append(vectors, converted)
+	}
+	return vectors, nil
+}
+
+func isMultimodalEmbeddingModel(model string) bool {
+	return strings.HasPrefix(strings.ToLower(strings.TrimSpace(model)), "doubao-embedding-vision-")
+}
+
+func normalizeEmbeddingBaseURL(raw string) string {
+	baseURL := strings.TrimRight(strings.TrimSpace(raw), "/")
+	if baseURL == "" {
+		return ""
+	}
+
+	lowerBaseURL := strings.ToLower(baseURL)
+
+	// 1. 配置里应填写 Ark 服务根路径，而不是具体 embedding endpoint。
+	// 2. 这里兼容两类常见误配：`/embeddings` 和 `/embeddings/multimodal`。
+	// 3. 统一回退到 `/api/v3` 根路径后，再由对应 SDK 自己追加正确后缀，避免最终 URL 重复拼接。
+	if strings.HasSuffix(lowerBaseURL, "/embeddings/multimodal") {
+		return strings.TrimSuffix(baseURL, baseURL[len(baseURL)-len("/embeddings/multimodal"):])
+	}
+	if strings.HasSuffix(lowerBaseURL, "/embeddings") {
+		return strings.TrimSuffix(baseURL, baseURL[len(baseURL)-len("/embeddings"):])
+	}
+	return baseURL
+}