Version: 0.9.16.dev.260413
后端: 1. RAG embedding 接入修正,并兼容 Ark 多模态 embedding 链路 - 更新 backend/infra/rag/embed/eino_embedder.go:文本 embedding 继续走 Eino OpenAI 兼容链路;`doubao-embedding-vision-*` 模型切到 Ark 原生 `/embeddings/multimodal` - 增加 embedding baseURL 归一化:兼容把 `.../embeddings` 或 `.../embeddings/multimodal` 误填进配置的情况,统一回退到 `/api/v3` - 为第三方 embedding 调用增加 panic recover,避免向量检索/写入异常直接打崩主进程 2. RAG runtime / pipeline / store 稳定性加固,统一降级为 error 语义 - 更新 backend/infra/rag/runtime.go:runtime 对外入口增加 panic recover 与观测打点 - 更新 backend/infra/rag/core/pipeline.go:ingest / retrieve 编排边界增加 panic recover - 更新 backend/infra/rag/retrieve/vector_retriever.go:向量检索边界补充 panic recover - 更新 backend/infra/rag/store/milvus_store.go、backend/infra/rag/store/inmemory_store.go:补齐未初始化保护,避免 nil 依赖直接异常退出 3. RAG embedding 配置口径与普通 LLM 链路对齐 - 更新 backend/infra/rag/factory.go:RAG embedding API Key 不再走 `apiKeyEnv` 间接映射,统一直接读取 `ARK_API_KEY` - 更新 backend/infra/rag/config/config.go:删除 `rag.embed.apiKeyEnv` 配置字段,收敛配置分叉 - 更新 backend/config.example.yaml:示例配置切到当前联调口径,保持 `rag.enabled=true`、`memory.rag.enabled=true`,并对齐 Milvus / embed 配置 4. Memory + RAG 联调链路可运行态修正 - 当前已验证 memory 抽取写库、RAG ingest 写入 Milvus、后续语义召回链路可继续联调 - 检索失败场景已从“直接 panic”收敛为“记录日志并降级”,不再阻断主聊天链路 前端:无 仓库:无 undo: 1. 增删改查的 mysql 记忆去重没实现 2. 提取用户话为记忆的过滤机制不足,有点无脑 3. RAG 召回也有问题
This commit is contained in:
@@ -3,6 +3,7 @@ package rag
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -33,7 +34,9 @@ func newRuntime(cfg ragconfig.Config, pipeline *core.Pipeline, observer Observer
|
||||
}
|
||||
|
||||
// IngestMemory 统一承接记忆语料入库。
|
||||
func (r *runtime) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error) {
|
||||
func (r *runtime) IngestMemory(ctx context.Context, req MemoryIngestRequest) (result *IngestResult, err error) {
|
||||
defer r.recoverPublicPanic(ctx, req.TraceID, "memory", normalizeAction(req.Action, "add"), "ingest", &err)
|
||||
|
||||
items := make([]corpus.MemoryIngestItem, 0, len(req.Items))
|
||||
for _, item := range req.Items {
|
||||
items = append(items, corpus.MemoryIngestItem{
|
||||
@@ -58,7 +61,9 @@ func (r *runtime) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*I
|
||||
}
|
||||
|
||||
// RetrieveMemory 统一承接记忆语料检索。
|
||||
func (r *runtime) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error) {
|
||||
func (r *runtime) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (result *RetrieveResult, err error) {
|
||||
defer r.recoverPublicPanic(ctx, req.TraceID, "memory", normalizeAction(req.Action, "search"), "retrieve", &err)
|
||||
|
||||
corpusInput := corpus.MemoryRetrieveInput{
|
||||
UserID: req.UserID,
|
||||
ConversationID: req.ConversationID,
|
||||
@@ -69,7 +74,7 @@ func (r *runtime) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest)
|
||||
corpusInput.MemoryType = req.MemoryTypes[0]
|
||||
}
|
||||
|
||||
result, err := r.retrieveWithCorpus(ctx, req.TraceID, "memory", r.memoryCorpus, core.RetrieveRequest{
|
||||
result, err = r.retrieveWithCorpus(ctx, req.TraceID, "memory", r.memoryCorpus, core.RetrieveRequest{
|
||||
Query: req.Query,
|
||||
TopK: normalizeTopK(req.TopK, r.cfg.TopK),
|
||||
Threshold: normalizeThreshold(req.Threshold, r.cfg.Threshold),
|
||||
@@ -113,7 +118,9 @@ func (r *runtime) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest)
|
||||
}
|
||||
|
||||
// IngestWeb 统一承接网页语料入库。
|
||||
func (r *runtime) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error) {
|
||||
func (r *runtime) IngestWeb(ctx context.Context, req WebIngestRequest) (result *IngestResult, err error) {
|
||||
defer r.recoverPublicPanic(ctx, req.TraceID, "web", normalizeAction(req.Action, "add"), "ingest", &err)
|
||||
|
||||
items := make([]corpus.WebIngestItem, 0, len(req.Items))
|
||||
for _, item := range req.Items {
|
||||
items = append(items, corpus.WebIngestItem{
|
||||
@@ -133,7 +140,9 @@ func (r *runtime) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestR
|
||||
}
|
||||
|
||||
// RetrieveWeb 统一承接网页语料检索。
|
||||
func (r *runtime) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error) {
|
||||
func (r *runtime) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (result *RetrieveResult, err error) {
|
||||
defer r.recoverPublicPanic(ctx, req.TraceID, "web", normalizeAction(req.Action, "search"), "retrieve", &err)
|
||||
|
||||
return r.retrieveWithCorpus(ctx, req.TraceID, "web", r.webCorpus, core.RetrieveRequest{
|
||||
Query: req.Query,
|
||||
TopK: normalizeTopK(req.TopK, r.cfg.TopK),
|
||||
@@ -311,6 +320,41 @@ func (r *runtime) observe(ctx context.Context, event ObserveEvent) {
|
||||
r.observer.Observe(ctx, event)
|
||||
}
|
||||
|
||||
func (r *runtime) recoverPublicPanic(
|
||||
ctx context.Context,
|
||||
traceID string,
|
||||
corpusName string,
|
||||
action string,
|
||||
operation string,
|
||||
errPtr *error,
|
||||
) {
|
||||
recovered := recover()
|
||||
if recovered == nil || errPtr == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// 1. runtime 是 RAG Infra 对业务侧暴露的最终方法面,任何下层 panic 都不应再穿透到业务协程。
|
||||
// 2. 这里统一把 panic 转成 error,并补一条结构化观测,方便继续排查是哪一层依赖失控。
|
||||
// 3. 保留 stack 是为了在“进程不崩”的前提下仍能定位根因,避免只剩一句 recovered 无法复盘。
|
||||
panicErr := fmt.Errorf("rag runtime panic recovered: corpus=%s operation=%s panic=%v", corpusName, operation, recovered)
|
||||
*errPtr = panicErr
|
||||
|
||||
observeCtx := newObserveContext(ctx, traceID, corpusName, action)
|
||||
r.observe(observeCtx, ObserveEvent{
|
||||
Level: ObserveLevelError,
|
||||
Component: "runtime",
|
||||
Operation: operation + "_panic_recovered",
|
||||
Fields: map[string]any{
|
||||
"status": "failed",
|
||||
"panic": fmt.Sprintf("%v", recovered),
|
||||
"panic_type": fmt.Sprintf("%T", recovered),
|
||||
"error": panicErr,
|
||||
"error_code": core.ClassifyErrorCode(panicErr),
|
||||
"stack": string(debug.Stack()),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func newObserveContext(ctx context.Context, traceID string, corpusName string, action string) context.Context {
|
||||
fields := map[string]any{
|
||||
"corpus": corpusName,
|
||||
|
||||
Reference in New Issue
Block a user