Version: 0.9.14.dev.260410

后端： 1. LLM 客户端从 newAgent/llm 提升为 infra/llm 基础设施层 - 删除 backend/newAgent/llm/（ark.go / ark_adapter.go / client.go / json.go） - 等价迁移至 backend/infra/llm/，所有 newAgent node 与 service 统一改引用 infrallm - 消除 newAgent 对模型客户端的私有依赖，为 memory / websearch 等多模块复用铺路 2. RAG 基础设施完成可运行态接入（factory / runtime / observer / service 四层成型） - 新建 backend/infra/rag/factory.go / runtime.go / observe.go / observer.go / service.go：工厂创建、运行时生命周期、轻量观测接口、检索服务门面 - 更新 infra/rag/config/config.go：补齐 Milvus / Embed / Reranker 全部配置项与默认值 - 更新 infra/rag/embed/eino_embedder.go：增强 Eino embedding 适配，支持 BaseURL / APIKey 环境变量 / 超时 / 维度等参数 - 更新 infra/rag/store/milvus_store.go：完整实现 Milvus 向量存储（建集合 / 建 Index / Upsert / Search / Delete），支持 COSINE / L2 / IP 度量 - 更新 infra/rag/core/pipeline.go：适配 Runtime 接口，Pipeline 由 factory 注入而非手动拼装 - 更新 infra/rag/corpus/memory_corpus.go / vector_store.go：对接 Memory 模块数据源与 Store 接口扩展 3. Memory 模块从 Day1 骨架升级为 Day2 完整可运行态 - 新建 memory/module.go：统一门面 Module，对外封装 EnqueueExtract / ReadService / ManageService / WithTx / StartWorker，启动层只依赖这一个入口 - 新建 memory/orchestrator/llm_write_orchestrator.go：LLM 驱动的记忆抽取编排器，替代原 mock 抽取 - 新建 memory/service/read_service.go：按用户开关过滤 + 轻量重排 + 访问时间刷新的读取链路 - 新建 memory/service/manage_service.go：记忆管理面能力（列出 / 软删除 / 开关读写），删除同步写审计日志 - 新建 memory/service/common.go：服务层公共工具 - 新建 memory/worker/loop.go：后台轮询循环 RunPollingLoop，定时抢占 pending 任务并推进 - 新建 memory/utils/audit.go / settings.go：审计日志构造、用户设置过滤等纯函数 - 更新 memory/model/item.go / job.go / settings.go / config.go / status.go：补齐 DTO 字段与状态常量 - 更新 memory/repo/item_repo.go / job_repo.go / audit_repo.go / settings_repo.go：补齐 CRUD 与查询能力 - 更新 memory/worker/runner.go：Runner 对接 Module 与 LLM 抽取器，任务状态机完整化 - 更新 memory/README.md：同步模块现状说明 4. newAgent 接入 Memory 读取注入与工具注册依赖预埋 - 新建 service/agentsvc/agent_memory.go：定义 MemoryReader 接口 + injectMemoryContext，在 graph 执行前统一补充记忆上下文 - 更新 service/agentsvc/agent.go：新增 memoryReader 字段与 SetMemoryReader 方法 - 更新 service/agentsvc/agent_newagent.go：调用 injectMemoryContext 注入 pinned block，检索失败仅降级不阻断主链路 - 更新 newAgent/tools/registry.go：新增 DefaultRegistryDeps（含 RAGRuntime），工具注册表支持依赖注入 5. 启动流程与事件处理器接线更新 - 更新 cmd/start.go：初始化 RAG Runtime → Memory Module → 注册事件处理器 → 启动 Worker 后台轮询 - 更新 service/events/memory_extract_requested.go：改用 memory.Module.WithTx(tx) 统一门面，事件处理器不再直接依赖 repo/service 内部包 6. 缓存插件与配置同步 - 更新 middleware/cache_deleter.go：静默忽略 MemoryJob / MemoryItem / MemoryAuditLog / MemoryUserSetting 等新模型，避免日志刷屏；清理冗余注释 - 更新 config.example.yaml：补齐 rag / memory / websearch 配置段及默认值 - 更新 go.mod / go.sum：新增 eino-ext/openai / json-patch / go-openai 依赖前端：无仓库：无
2026-04-10 23:17:38 +08:00
parent fae162162a
commit bf1f1defa5
53 changed files with 5875 additions and 231 deletions
--- a/backend/cmd/start.go
+++ b/backend/cmd/start.go
@@ -8,8 +8,12 @@ import (
 	"github.com/LoveLosita/smartflow/backend/api"
 	"github.com/LoveLosita/smartflow/backend/dao"
 	kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
 	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
 	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
 	"github.com/LoveLosita/smartflow/backend/inits"
 	"github.com/LoveLosita/smartflow/backend/memory"
 	"github.com/LoveLosita/smartflow/backend/middleware"
 	newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv"
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
@@ -51,6 +55,30 @@ func Start() {
 		log.Fatalf("Failed to initialize Eino: %v", err)
 	}
 	ragCfg := ragconfig.LoadFromViper()
 	var ragRuntime infrarag.Runtime
 	if ragCfg.Enabled {
 		// 1. 当前项目尚未完成全局观测平台建设，这里先注入一层轻量 Observer；
 		// 2. RAG 内部只依赖 Observer 接口，后续若全项目统一日志/指标系统，只需替换这里；
 		// 3. 这样可以避免 RAG 单独自建一套割裂的日志基础设施。
 		ragLogger := log.Default()
 		ragRuntime, err = infrarag.NewRuntimeFromConfig(context.Background(), ragCfg, infrarag.FactoryDeps{
 			Logger:   ragLogger,
 			Observer: infrarag.NewLoggerObserver(ragLogger),
 		})
 		if err != nil {
 			log.Fatalf("Failed to initialize RAG runtime: %v", err)
 		}
 		log.Printf("RAG runtime initialized: store=%s embed=%s reranker=%s", ragCfg.Store, ragCfg.EmbedProvider, ragCfg.RerankerProvider)
 	} else {
 		log.Println("RAG runtime is disabled")
 	}
 	// 1. memory 模块对启动层只暴露一个门面。
 	// 2. 后续若接入统一 DI 容器，也优先注入这个门面，而不是继续暴露内部 repo/service。
 	memoryCfg := memory.LoadConfigFromViper()
 	memoryModule := memory.NewModule(db, infrallm.WrapArkClient(aiHub.Worker), ragRuntime, memoryCfg)
 	// DAO 层初始化。
 	cacheRepo := dao.NewCacheDAO(rdb)
 	agentCacheRepo := dao.NewAgentCache(rdb)
@@ -67,7 +95,7 @@ func Start() {
 	// outbox 通用事件总线接线（第二阶段）：
 	// 1. 读取 Kafka 配置；
 	// 2. 创建 infra 级 EventBus；
-	// 3. 显式注册"聊天持久化"事件处理器；
+	// 3. 显式注册业务事件处理器；
 	// 4. 启动总线后台 dispatch/consume 循环。
 	kafkaCfg := kafkabus.LoadConfig()
 	eventBus, err := outboxinfra.NewEventBus(outboxRepo, kafkaCfg)
@@ -75,9 +103,8 @@ func Start() {
 		log.Fatalf("Failed to initialize outbox event bus: %v", err)
 	}
 	if eventBus != nil {
-		// 3. 在启动前完成"业务事件处理器"注册。
+		// 1. 在启动前完成业务事件处理器注册。
-		// 3.1 这里显式调用 service/events，保证 infra 层不承载业务语义。
+		// 2. memory 事件处理器也统一通过 memoryModule 接入，避免启动层感知内部细节。
 		// 3.2 若注册失败直接中止启动，避免"消息已入队但无人消费"的隐性故障。
 		if err = eventsvc.RegisterChatHistoryPersistHandler(eventBus, outboxRepo, manager); err != nil {
 			log.Fatalf("Failed to register chat history event handler: %v", err)
 		}
@@ -90,7 +117,7 @@ func Start() {
 		if err = eventsvc.RegisterAgentStateSnapshotHandler(eventBus, outboxRepo, manager); err != nil {
 			log.Fatalf("Failed to register agent state snapshot event handler: %v", err)
 		}
-		if err = eventsvc.RegisterMemoryExtractRequestedHandler(eventBus, outboxRepo); err != nil {
+		if err = eventsvc.RegisterMemoryExtractRequestedHandler(eventBus, outboxRepo, memoryModule); err != nil {
 			log.Fatalf("Failed to register memory extract event handler: %v", err)
 		}
 		eventBus.Start(context.Background())
@@ -100,6 +127,8 @@ func Start() {
 		log.Println("Outbox event bus is disabled")
 	}
 	memoryModule.StartWorker(context.Background())
 	// Service 层初始化。
 	userService := service.NewUserService(userRepo, cacheRepo)
 	taskSv := service.NewTaskService(taskRepo, cacheRepo, eventBus)
@@ -110,9 +139,12 @@ func Start() {
 	// newAgent 依赖接线。
 	agentService.SetAgentStateStore(dao.NewAgentStateStoreAdapter(cacheRepo))
-	agentService.SetToolRegistry(newagenttools.NewDefaultRegistry())
+	agentService.SetToolRegistry(newagenttools.NewDefaultRegistryWithDeps(newagenttools.DefaultRegistryDeps{
 		RAGRuntime: ragRuntime,
 	}))
 	agentService.SetScheduleProvider(newagentconv.NewScheduleProvider(scheduleRepo, taskClassRepo))
 	agentService.SetSchedulePersistor(newagentconv.NewSchedulePersistorAdapter(manager))
 	agentService.SetMemoryReader(memoryModule)
 	// API 层初始化。
 	userApi := api.NewUserHandler(userService)
--- a/backend/config.example.yaml
+++ b/backend/config.example.yaml
@@ -54,3 +54,54 @@ agent:
  baseURL: "https://ark.cn-beijing.volces.com/api/v3" # Worker服务的基础URL，需根据实际情况调整
  dailyRefineConcurrency: 3 # 日内并发优化并发度，建议按模型配额调整
  weeklyAdjustBudget: 5 # 周级跨天配平额度上限，防止过度调整
 rag:
  enabled: false
  store: "inmemory" # 可选：inmemory / milvus
  topK: 8
  threshold: 0.55
  retrieve:
    timeoutMs: 1500
  ingest:
    chunkSize: 400
    chunkOverlap: 80
  embed:
    provider: "mock" # 可选：mock / eino
    model: "" # 例如 Ark/OpenAI 兼容 embedding 模型名
    baseURL: "https://ark.cn-beijing.volces.com/api/v3"
    apiKeyEnv: "ARK_API_KEY"
    timeoutMs: 1200
    dimension: 1024
  reranker:
    enabled: false
    provider: "noop" # 当前默认 noop，后续可扩展
    timeoutMs: 1200
  milvus:
    address: "http://localhost:19530" # Milvus REST 入口，当前联调确认不要填 9091 健康检查口
    token: "root:Milvus"
    dbName: ""
    collectionName: "smartflow_rag_chunks"
    metricType: "COSINE"
    requestTimeoutMs: 1500
 memory:
  enabled: true
  rag:
    enabled: false
  prompt:
    extract: ""
    decision: ""
  threshold: 0.55
  enableReranker: false
  llm:
    temperature: 0.1
    topP: 0.2
  job:
    maxRetry: 6
  worker:
    pollEvery: 2s
    claimBatch: 1
 websearch:
  rag:
    enabled: false
--- a/backend/go.mod
+++ b/backend/go.mod
@@ -5,6 +5,7 @@ go 1.24.0
 require (
 	github.com/cloudwego/eino v0.7.13
 	github.com/cloudwego/eino-ext/components/model/ark v0.1.64
 	github.com/cloudwego/eino-ext/libs/acl/openai v0.1.13
 	github.com/gin-gonic/gin v1.11.0
 	github.com/go-redis/redis/v8 v8.11.5
 	github.com/golang-jwt/jwt/v4 v4.5.2
@@ -30,6 +31,7 @@ require (
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/eino-contrib/jsonschema v1.0.3 // indirect
 	github.com/evanphx/json-patch v0.5.2 // indirect
 	github.com/fsnotify/fsnotify v1.9.0 // indirect
 	github.com/gabriel-vasile/mimetype v1.4.8 // indirect
 	github.com/gin-contrib/sse v1.1.0 // indirect
@@ -50,6 +52,7 @@ require (
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/meguminnnnnnnnn/go-openai v0.1.1 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/nikolalohinski/gonja v1.5.3 // indirect
--- a/backend/go.sum
+++ b/backend/go.sum
@@ -31,6 +31,8 @@ github.com/cloudwego/eino v0.7.13 h1:Ku7hY+83gGJJjf4On3UgqjC57UcA+DXe0tqAZiNDDew
 github.com/cloudwego/eino v0.7.13/go.mod h1:nA8Vacmuqv3pqKBQbTWENBLQ8MmGmPt/WqiyLeB8ohQ=
 github.com/cloudwego/eino-ext/components/model/ark v0.1.64 h1:ecsP4xWhOGi6NYxl2NOemEoTNpNuLT7ING8gOZ7CArI=
 github.com/cloudwego/eino-ext/components/model/ark v0.1.64/go.mod h1:aabMR15RTXBSi9Eu13CWavzE+no5BQO4FJUEEdqImbg=
 github.com/cloudwego/eino-ext/libs/acl/openai v0.1.13 h1:z0bI5TH3nE+uDQiRhxBQMvk2HswlDUM3xP38+VSgpSQ=
 github.com/cloudwego/eino-ext/libs/acl/openai v0.1.13/go.mod h1:1xMQZ8eE11pkEoTAEy8UlaAY817qGVMvjpDPGSIO3Ns=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -42,6 +44,8 @@ github.com/eino-contrib/jsonschema v1.0.3 h1:2Kfsm1xlMV0ssY2nuxshS4AwbLFuqmPmzIj
 github.com/eino-contrib/jsonschema v1.0.3/go.mod h1:cpnX4SyKjWjGC7iN2EbhxaTdLqGjCi0e9DxpLYxddD4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
 github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
 github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
 github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
@@ -106,6 +110,7 @@ github.com/goph/emperror v0.17.2/go.mod h1:+ZbQ+fUNO/6FNiUo0ujtMjhgad9Xa6fQL9KhH
 github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
 github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
 github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
 github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
 github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
@@ -141,6 +146,8 @@ github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx
 github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/meguminnnnnnnnn/go-openai v0.1.1 h1:u/IMMgrj/d617Dh/8BKAwlcstD74ynOJzCtVl+y8xAs=
 github.com/meguminnnnnnnnn/go-openai v0.1.1/go.mod h1:qs96ysDmxhE4BZoU45I43zcyfnaYxU3X+aRzLko/htY=
 github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1fWh90gTKwiN4QCGoY9TWyyO4=
 github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
--- a/backend/newAgent/llm/ark.go
+++ b/backend/newAgent/llm/ark.go
@@ -1,14 +1,8 @@
-// 过渡期遗留文件。
+// 过渡期统一 Ark 调用封装。
 //
-// 这里的 CallArkText / CallArkJSON 是为了让旧 agent 代码（route/quicknote 等）
+// 这里保留 CallArkText / CallArkJSON，方便暂时还直接持有 *ark.ChatModel 的调用点
-// 在迁移到统一 Client 之前能继续直接持有 *ark.ChatModel。
+// 逐步迁移到统一 Client。后续 memory 也可以直接复用这套中立层。
-//
+package llm
 // 替代路径：
 //   - CallArkText  → WrapArkClient(arkModel) + client.GenerateText(...)
 //   - CallArkJSON  → WrapArkClient(arkModel) + GenerateJSON[T](...)
 //
 // 待旧 agent 代码全部收敛到 Client 接口后，本文件可整体删除。
 package newagentllm
 import (
 	"context"
@@ -24,9 +18,9 @@ import (
 // ArkCallOptions 是基于 ark.ChatModel 的通用调用选项。
 //
 // 设计目的：
-// 1. 当前 route / quicknote 都还直接持有 *ark.ChatModel；
+// 1. 先把 Ark 调用样板抽成公共层；
-// 2. 在它们完全收敛到更抽象的 Client 前，先把重复的 ark 调用样板抽成公共层；
+// 2. 再由 WrapArkClient 提供统一 Client；
-// 3. 这样本轮就能先删除 route/quicknote 里那几份重复的 Generate 样板代码。
+// 3. 让上层尽量只关注业务 prompt 和结构化结果。
 type ArkCallOptions struct {
 	Temperature float64
 	MaxTokens   int
--- a/backend/newAgent/llm/ark_adapter.go
+++ b/backend/newAgent/llm/ark_adapter.go
@@ -1,4 +1,4 @@
-package newagentllm
+package llm
 import (
 	"context"
@@ -11,12 +11,12 @@ import (
 	arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
 )
-// WrapArkClient 将 ark.ChatModel 适配为 newAgent 的统一 Client。
+// WrapArkClient 将 ark.ChatModel 适配为统一 Client。
 //
 // 职责边界：
 // 1. generateText：调用 ark.ChatModel.Generate（非流式），供 GenerateJSON 使用；
-// 2. streamText：调用 ark.ChatModel.Stream（流式），供 EmitPseudoAssistantText 等使用；
+// 2. streamText：调用 ark.ChatModel.Stream（流式），供需要流式输出的场景使用；
-// 3. 两者共用 buildArkStreamOptions 统一构造调用选项。
+// 3. 两者共用同一套 options 转换。
 func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
 	if arkChatModel == nil {
 		return nil
@@ -48,7 +48,7 @@ func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
 	return NewClient(generateFunc, streamFunc)
 }
-// buildArkStreamOptions 将 newAgent 的 GenerateOptions 转换为 ark 的流式调用选项。
+// buildArkStreamOptions 将统一 GenerateOptions 转换为 ark 的流式调用选项。
 func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
 	thinkingEnabled := options.Thinking == ThinkingModeEnabled
--- a/backend/newAgent/llm/client.go
+++ b/backend/newAgent/llm/client.go
@@ -1,4 +1,4 @@
-package newagentllm
+package llm
 import (
 	"context"
@@ -23,11 +23,11 @@ const (
 	ThinkingModeDisabled ThinkingMode = "disabled"
 )
-// GenerateOptions 是 Agent 内部统一的模型调用选项。
+// GenerateOptions 是统一模型调用选项。
 //
 // 设计目的：
-// 1. 先把“每个 skill 都会反复传的参数”收敛成一份结构；
+// 1. 先把“每个 skill / worker 都会反复传的参数”收敛成一份结构；
-// 2. 让 node 层以后只表达“我要什么”，不再自己重复组织 option；
+// 2. 让上层以后只表达“我要什么”，不再自己重复组织 option；
 // 3. 暂时不追求覆盖所有 provider 参数，先把最常用的几个公共位抽出来。
 type GenerateOptions struct {
 	Temperature float64
@@ -49,7 +49,7 @@ type TextResult struct {
 // StreamReader 抽象了“可逐块 Recv 的流式返回器”。
 //
-// 之所以不直接依赖某个具体 SDK 的 reader 类型，是因为 Agent 现在还在建骨架阶段，
+// 之所以不直接依赖某个具体 SDK 的 reader 类型，是因为现在还处在骨架收敛阶段，
 // 后续接 ark、OpenAI 兼容层还是别的 provider，都可以往这个最小接口上适配。
 type StreamReader interface {
 	Recv() (*schema.Message, error)
@@ -62,10 +62,10 @@ type TextGenerateFunc func(ctx context.Context, messages []*schema.Message, opti
 // StreamGenerateFunc 是流式生成的统一适配函数签名。
 type StreamGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error)
-// Client 是 Agent 里的统一模型客户端门面。
+// Client 是统一模型客户端门面。
 //
 // 职责边界：
-// 1. 负责把 node 层的“模型调用意图”收敛到统一入口；
+// 1. 负责把调用方的“模型调用意图”收敛到统一入口；
 // 2. 负责统一参数校验、空响应防御、GenerateJSON 复用；
 // 3. 不负责写 prompt，不负责业务 fallback，也不直接持有具体厂商 SDK 细节。
 type Client struct {
@@ -89,7 +89,7 @@ func NewClient(generateText TextGenerateFunc, streamText StreamGenerateFunc) *Cl
 // 3. 不负责业务 prompt 拼接，也不负责把文本再映射成业务结构。
 func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
 	if c == nil || c.generateText == nil {
-		return nil, errors.New("agent llm client is not ready")
+		return nil, errors.New("llm client is not ready")
 	}
 	if len(messages) == 0 {
 		return nil, errors.New("llm messages is empty")
@@ -111,10 +111,9 @@ func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, o
 // GenerateJSON 先走统一文本生成，再走统一 JSON 解析。
 //
 // 设计说明：
-// 1. 旧 agent 里每个 skill 都各自写了一份“Generate -> 提取 JSON -> 反序列化”；
+// 1. 把“Generate -> 提取 JSON -> 反序列化”这段公共链路收敛起来；
-// 2. 这里先把这一整段收敛成公共链路，后续 quicknote/taskquery/schedule 都直接复用；
+// 2. 上层只关心业务结构，不需要重复实现解析样板；
-// 3. 返回 parsed + rawResult，方便上层既能拿结构化字段，也能在打点/回退时保留原文。
+// 3. 返回 parsed + rawResult，方便打点与回退时保留原文。
 // 4. 这里做成泛型函数而不是方法，是因为 Go 不支持“方法自带类型参数”。
 func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema.Message, options GenerateOptions) (*T, *TextResult, error) {
 	result, err := client.GenerateText(ctx, messages, options)
 	if err != nil {
@@ -136,7 +135,7 @@ func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema
 // 3. 不负责累计全文，也不负责 token 统计落库。
 func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
 	if c == nil || c.streamText == nil {
-		return nil, errors.New("agent llm stream client is not ready")
+		return nil, errors.New("llm stream client is not ready")
 	}
 	if len(messages) == 0 {
 		return nil, errors.New("llm messages is empty")
@@ -147,9 +146,9 @@ func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options
 // BuildSystemUserMessages 构造最常见的“system + history + user”消息列表。
 //
 // 设计说明：
-// 1. 这是旧 agent 中高频重复片段，几乎每个 skill 都会拼一次；
+// 1. 先把最稳定的消息编排方式沉淀下来，减少各业务域样板代码；
-// 2. 这里先把最稳定的消息编排方式沉淀下来，减少 node 层样板代码；
+// 2. 只做消息切片装配，不做 prompt 生成；
-// 3. 只做消息切片装配，不做 prompt 生成。
+// 3. 供 agent / memory 等多个能力域复用。
 func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, userPrompt string) []*schema.Message {
 	messages := make([]*schema.Message, 0, len(history)+2)
 	if strings.TrimSpace(systemPrompt) != "" {
--- a/backend/newAgent/llm/json.go
+++ b/backend/newAgent/llm/json.go
@@ -1,4 +1,4 @@
-package newagentllm
+package llm
 import (
 	"encoding/json"
@@ -12,7 +12,7 @@ import (
 // 职责边界：
 // 1. 负责处理“模型输出前后夹杂解释文字 / markdown 代码块”的常见情况；
 // 2. 负责提取最外层 JSON object 并反序列化为目标结构；
-// 3. 不负责业务字段合法性校验，例如 priority 是否在 1~4，应由上层 node 再校验。
+// 3. 不负责业务字段合法性校验，应由上层调用方自行校验。
 func ParseJSONObject[T any](raw string) (*T, error) {
 	clean := strings.TrimSpace(raw)
 	if clean == "" {
@@ -36,7 +36,7 @@ func ParseJSONObject[T any](raw string) (*T, error) {
 // 设计说明：
 // 1. LLM 很容易输出“这里是结果：{...}”这种半结构化文本；
 // 2. 这里用括号计数而不是正则，避免嵌套对象一多就误截断；
-// 3. 目前只提取 object，不提取 array，因为当前 agent 的路由/规划契约基本都是对象。
+// 3. 目前只提取 object，不提取 array，因为当前契约基本都是对象。
 func ExtractJSONObject(text string) string {
 	clean := trimMarkdownCodeFence(strings.TrimSpace(text))
 	if clean == "" {
--- a/backend/infra/rag/HANDOFF_RAGInfra一步到位接入方案.md
+++ b/backend/infra/rag/HANDOFF_RAGInfra一步到位接入方案.md
@@ -0,0 +1,640 @@
 # HANDOFF：RAG Infra 一步到位接入方案
 ## 1. 文档目的
 本文用于把 `backend/infra/rag` 从“可运行骨架”推进到“可被业务正式接入的共享基础设施”。
 本文重点回答 4 个问题：
 1. 当前 `RAG Infra` 已经做到了什么，还缺什么。
 2. 什么样的状态，才算“合格、可接入、可灰度、可回滚”的 `RAG Infra`。
 3. 如何以“依赖注入 + 对外只暴露方法入口”的方式收口，避免业务侧直接依赖底层实现细节。
 4. 如何在不打断现有业务的前提下，把 `memory` 与 `websearch` 并行迁移到统一 `RAG Infra`。
 ---
 ## 2. 当前现状
 ## 2.1 已完成部分
 当前 `backend/infra/rag` 已经具备共享骨架，主要包括：
 1. 通用接口与类型：
   - `core/interfaces.go`
   - `core/types.go`
   - `core/errors.go`
 2. 通用编排器：
   - `core/pipeline.go`
 3. 默认切块器：
   - `chunk/text_chunker.go`
 4. 语料适配器：
   - `corpus/memory_corpus.go`
   - `corpus/web_corpus.go`
 5. 默认可运行实现：
   - `embed/mock_embedder.go`
   - `rerank/noop_reranker.go`
   - `store/inmemory_store.go`
 6. 配置骨架：
   - `config/config.go`
 这说明项目已经完成了“共享 RAG Core 的第一阶段搭骨架”，不再是单纯的设计想法。
 ## 2.2 当前存在的问题
 虽然骨架已经有了，但距离“可正式接入的 Infra”还差关键几步：
 1. 运行时没有正式装配入口。
   - 当前仍主要依赖 `rag.NewDefaultPipeline()`。
   - 启动阶段没有统一按配置组装 `embedder / store / reranker / corpus runtime`。
 2. 真实底层实现还是占位。
   - `embed/eino_embedder.go` 未实现。
   - `rerank/eino_reranker.go` 未实现。
   - `store/milvus_store.go` 未实现。
 3. 配置虽有结构，但还未真正接入运行链路。
   - `rag/config/config.go` 定义了 `rag.*` 配置。
   - `backend/cmd/start.go` 尚未实例化并注入 `RAG Runtime`。
 4. 业务尚未真正切流。
   - `memory` 读取链路还没有正式走 `Pipeline.Retrieve`。
   - `websearch` 还没有通过 `WebCorpus + Pipeline` 形成正式 WebRAG 路径。
 5. 工程化能力不完整。
   - 缺统一 timeout。
   - 缺统一日志字段。
   - 缺基础指标。
   - 缺单元测试与集成测试。
 6. 还存在潜在重复实现风险。
   - `retrieve/vector_retriever.go` 与 `core/pipeline.go` 都承载部分检索逻辑。
   - 若后续两套逻辑并存，容易出现行为漂移与维护成本上升。
 ## 2.3 当前状态结论
 当前 `RAG Infra` 的状态，更准确地说是：
 1. 已经完成“共享骨架搭建”。
 2. 还没有完成“统一装配、真实实现、正式接入、工程化收口”。
 3. 目前适合继续扩展，但还不适合直接作为长期稳定的业务依赖面。
 ---
 ## 3. 目标定义：什么叫“合格的 RAG Infra”
 本轮改造完成后，`backend/infra/rag` 应满足以下标准：
 1. 启动时可统一构造并注入，不再靠业务模块自行拼装底层依赖。
 2. 对外只暴露稳定方法入口，不暴露底层 `Pipeline / Store / Embedder / Reranker` 的装配细节。
 3. 支持按配置切换实现：
   - `inmemory / milvus`
   - `mock / eino`
   - `noop / eino`
 4. 支持 `memory` 与 `websearch` 两类语料复用同一套 `chunk / embed / retrieve / rerank / fallback` 流程。
 5. 支持灰度开关与回滚，不要求业务“一次性硬切流”。
 6. 支持基础观测：
   - 延迟
   - 命中数
   - fallback 原因
   - 错误码
 7. 具备最小可依赖测试集，保证公共层改动不会悄悄破坏业务。
 ---
 ## 4. 核心改造原则
 ## 4.1 原则一：依赖注入统一由 Infra 自己负责
 `RAG Infra` 必须自己承接“底层实现装配”，业务侧不应感知：
 1. 当前用的是 `Milvus` 还是 `InMemoryStore`。
 2. 当前用的是 `MockEmbedder` 还是 `EinoEmbedder`。
 3. 当前是否开启 `Reranker`。
 4. 当前超时、阈值、切块参数是多少。
 业务只拿到一个已经注入好的 `RAG Runtime` 或 `RAG Service`，直接调用方法。
 ## 4.2 原则二：对外只暴露方法，不暴露底层零件
 业务层不应直接依赖这些细粒度对象：
 1. `core.Pipeline`
 2. `core.VectorStore`
 3. `core.Embedder`
 4. `core.Reranker`
 5. `corpus.MemoryCorpus`
 6. `corpus.WebCorpus`
 这些对象应被视为 `infra/rag` 内部拼装细节。
 业务层只应调用诸如以下方法：
 1. `IngestMemory`
 2. `RetrieveMemory`
 3. `IngestWeb`
 4. `RetrieveWeb`
 这样做的好处是：
 1. 业务依赖面更稳定。
 2. 后续替换底层实现时，不会把改动扩散到多个业务模块。
 3. 便于统一日志、监控、降级和权限边界。
 ## 4.3 原则三：业务语义留在业务层，通用 RAG 工序下沉到 Infra
 下沉到 `infra/rag` 的内容：
 1. 切块
 2. 向量化
 3. 向量存储
 4. 召回
 5. rerank
 6. threshold 过滤
 7. fallback 语义
 8. 统一日志与指标
 留在业务层的内容：
 1. `memory` 的注入优先级、门控规则、显式/隐式策略
 2. `websearch` 的 provider 搜索、query 改写、时间过滤、domain 白名单、抓取策略
 3. 最终给模型注入哪些证据、注入多少、如何组织引用
 ## 4.4 原则四：并行迁移，不一步删旧
 本轮改造虽然目标是“一步到位把 Infra 做完整”，但切流必须保持并行迁移：
 1. 新 Infra 建好后，先让 `memory` 接入并保留旧逻辑兜底。
 2. 再让 `websearch` 接入并保留 V1 路径兜底。
 3. 观察稳定后再删除旧分支。
 ---
 ## 5. 目标架构
 ## 5.1 推荐对外结构
 建议在 `backend/infra/rag` 新增统一对外门面，例如：
 1. `runtime.go`
 2. `factory.go`
 3. `service.go`
 推荐把正式对外依赖面收敛为一个接口，例如：
 ```go
 type Runtime interface {
    IngestMemory(ctx context.Context, input MemoryIngestRequest) (*IngestResult, error)
    RetrieveMemory(ctx context.Context, input MemoryRetrieveRequest) (*RetrieveResult, error)
    IngestWeb(ctx context.Context, input WebIngestRequest) (*IngestResult, error)
    RetrieveWeb(ctx context.Context, input WebRetrieveRequest) (*RetrieveResult, error)
 }
 ```
 说明：
 1. 业务侧只依赖 `Runtime`。
 2. `Runtime` 内部再去调用 `Pipeline + CorpusAdapter + Store + Embedder + Reranker`。
 3. 这样可以保证业务不会直接 import `core` 包下的底层细节。
 ## 5.2 推荐内部结构
 建议内部形成以下分工：
 1. `factory.go`
   - 负责按配置创建 `Embedder / Store / Reranker / Pipeline`
 2. `runtime.go`
   - 负责持有 `Pipeline + MemoryCorpus + WebCorpus + Logger + Metrics`
 3. `service.go`
   - 负责定义 `Runtime` 接口与对外方法
 4. `core/`
   - 保持底层通用编排逻辑
 5. `corpus/`
   - 只负责“语料 -> 标准文档”和“业务过滤 -> 标准 filter”
 ## 5.3 推荐依赖注入方式
 在 `backend/cmd/start.go` 中，启动期统一创建 `RAG Runtime`，例如：
 1. 读取 `rag.*` 配置
 2. 构造 `RAGFactory`
 3. 生成 `RAGRuntime`
 4. 注入给：
   - `memory service`
   - `newAgent web tools`
 业务侧只拿运行好的对象，不再自己 new 任何底层实现。
 ---
 ## 6. 对外方法面设计
 ## 6.1 Memory 对外方法
 推荐对外暴露以下方法：
 1. `IngestMemory`
   - 输入：标准化后的记忆入库请求
   - 输出：文档数、chunk 数、同步结果
 2. `RetrieveMemory`
   - 输入：用户、会话、助手、run、query、topK、threshold
   - 输出：标准 `RetrieveResult`
 注意：
 1. `memory` 业务层不应直接调用 `MemoryCorpus`。
 2. `memory` 业务层不应自己拼向量过滤条件。
 3. 所有过滤条件由 `RetrieveMemory` 内部统一转换。
 ## 6.2 Web 对外方法
 推荐对外暴露以下方法：
 1. `IngestWeb`
   - 输入：抓取结果 `url/title/snippet/content/domain/query_id/session_id`
   - 输出：统一入库摘要
 2. `RetrieveWeb`
   - 输入：query、query_id/session_id、domain、topK、threshold
   - 输出：标准 `RetrieveResult`
 注意：
 1. `websearch` 业务层不应直接持有 `WebCorpus`。
 2. `websearch` 业务层只负责“拿到页面内容”与“决定是否需要调用 RAG”。
 3. 实际向量入库、检索、rerank 由 `infra/rag` 统一处理。
 ## 6.3 对外方法设计边界
 方法层负责什么：
 1. 参数合法性校验
 2. 内部 filter 组装
 3. 调 `Pipeline.Ingest / Retrieve`
 4. 统一日志、指标、fallback
 方法层不负责什么：
 1. 不负责 `websearch provider` 搜索
 2. 不负责 HTML 抓取
 3. 不负责 prompt 注入
 4. 不负责业务排序偏好
 ---
 ## 7. 具体改造计划
 ## 7.1 第一部分：把 RAG Infra 自身做完整
 ### 目标
 让 `backend/infra/rag` 成为“正式可注入、正式可切换、正式可依赖”的共享基础设施。
 ### 实施项
 1. 新增正式运行时与工厂：
   - `backend/infra/rag/runtime.go`
   - `backend/infra/rag/factory.go`
   - 如有需要，新增 `backend/infra/rag/service.go`
 2. 扩展配置：
   - `rag.enabled`
   - `rag.store`
   - `rag.embed.provider`
   - `rag.embed.model`
   - `rag.embed.timeoutMs`
   - `rag.embed.dimension`
   - `rag.reranker.provider`
   - `rag.reranker.timeoutMs`
   - `rag.retrieve.timeoutMs`
   - `rag.ingest.chunkSize`
   - `rag.ingest.chunkOverlap`
 3. 收口运行入口：
   - `rag.NewDefaultPipeline()` 保留为本地 fallback
   - 正式业务接入走 `NewRuntimeFromConfig(...)`
 4. 消除重复检索路径：
   - 明确 `Pipeline` 是官方检索入口
   - `retrieve/vector_retriever.go` 要么内聚为内部实现，要么后续删除，避免双轨
 ### 验收
 1. 启动期可按配置成功构造 `RAG Runtime`。
 2. 业务侧不需要自己组装 `Pipeline / Store / Embedder / Reranker`。
 3. 对外暴露面稳定，底层实现可替换。
 ## 7.2 第二部分：补齐真实底层实现
 ### 目标
 让 `RAG Infra` 具备真实可用的向量能力，而不是停留在 mock。
 ### 实施项
 1. 实现 `embed/eino_embedder.go`
   - 负责 embedding 调用
   - 负责 embedding timeout
   - 负责错误包装与统一日志
 2. 实现 `rerank/eino_reranker.go`
   - 负责 rerank 调用
   - 负责 rerank timeout
   - 负责失败降级到原排序
 3. 实现 `store/milvus_store.go`
   - `Upsert`
   - `Search`
   - `Delete`
   - `Get`
 4. Milvus 元数据设计建议：
   - 高频过滤字段应做显式标量字段，不建议全部依赖大 JSON 过滤
   - 重点字段包括：
     - `corpus`
     - `user_id`
     - `assistant_id`
     - `conversation_id`
     - `run_id`
     - `memory_type`
     - `query_id`
     - `session_id`
     - `domain`
 ### 验收
 1. `MilvusStore` 在已准备好的 Docker 环境中可稳定完成写入与检索。
 2. `EinoEmbedder` 和 `EinoReranker` 可按配置启用。
 3. provider 波动时，主链路仍能 fallback。
 ## 7.3 第三部分：补齐工程化能力
 ### 目标
 让 `RAG Infra` 具备“可观测、可测试、可回滚”的基础设施属性。
 ### 实施项
 1. timeout 接线：
   - embedding timeout
   - retrieve timeout
   - rerank timeout
 2. 统一日志字段：
   - `trace_id`
   - `corpus`
   - `action`
   - `provider`
   - `latency_ms`
   - `hit_count`
   - `fallback_reason`
 3. 指标补齐：
   - `rag_ingest_count`
   - `rag_retrieve_count`
   - `rag_hit_count`
   - `rag_fallback_rate`
   - `rag_latency_ms`
 4. 测试补齐：
   - `chunker` 单测
   - `corpus filter` 单测
   - `pipeline fallback` 单测
   - `MilvusStore` 集成测试
   - `memory/web` 过滤隔离测试
 ### 验收
 1. 出现检索问题时，可从日志定位是：
   - 没命中
   - 超时
   - rerank 降级
   - filter 过滤过严
 2. 公共层测试可稳定覆盖关键路径。
 ## 7.4 第四部分：接入 Memory
 ### 目标
 让 `memory` 成为第一个正式接入 `RAG Infra` 的业务域。
 ### 实施项
 1. 写入链路接入：
   - 在 memory worker 成功写入 `memory_items` 后，调用 `RAGRuntime.IngestMemory`
   - 复用 `memory_items.vector_status/vector_id`
 2. 读取链路接入：
   - 在 `memory/service/read_service.go` 中新增 `RetrieveMemory` 路径
   - 强制过滤：
     - `user_id`
     - `assistant_id`
     - `conversation_id`
     - `run_id`
 3. 开关控制：
   - `memory.rag.enabled=false` 默认关闭
   - 打开后先灰度使用新路径
 4. 降级策略：
   - `RAG` 检索失败 -> 回退旧读取链路
   - `Reranker` 失败 -> 保留原始排序
 ### 验收
 1. 开关关闭时行为与当前一致。
 2. 开关开启时，记忆召回可稳定工作。
 3. 失败时不会影响主链路回复。
 ## 7.5 第五部分：接入 WebSearch
 ### 目标
 让 `websearch` 成为第二个正式接入 `RAG Infra` 的业务域，并复用 `WebCorpus`。
 ### 实施项
 1. 保留 V1 路径：
   - `web_search` 做 provider 搜索
   - `web_fetch` 做正文抓取与清洗
 2. 新增 V2 路径：
   - 把抓取结果映射为 `WebIngestItem`
   - 调 `RAGRuntime.IngestWeb`
   - 再调 `RAGRuntime.RetrieveWeb`
 3. 强约束过滤：
   - `query_id` 或 `session_id` 至少有一个
   - 避免跨 query/session 串召回
 4. 开关控制：
   - `websearch.rag.enabled=false` 默认关闭
 5. 降级策略：
   - `web_rag_search` 失败 -> 回退到 `web_search + web_fetch`
 ### 验收
 1. 新旧链路并存，互不影响。
 2. 新链路不会跨 query/session 串数据。
 3. 失败可立刻回退到 V1。
 ## 7.6 第六部分：启动接线与统一管理
 ### 目标
 让 `RAG Runtime` 成为启动期统一装配、统一管理的依赖。
 ### 实施项
 1. 在 `backend/cmd/start.go` 中：
   - 读取 `rag.*` 配置
   - 构造 `RAG Runtime`
   - 注入给 `memory` 与 `newAgent web tools`
 2. 统一由启动期管理依赖生命周期：
   - 初始化
   - 健康检查
   - 关闭清理
 3. 业务层禁止直接 new 底层实现：
   - 禁止业务自己构建 `MilvusStore`
   - 禁止业务自己构建 `EinoEmbedder`
   - 禁止业务自己拼 `Pipeline`
 ### 验收
 1. 依赖管理集中在启动层。
 2. 业务代码只依赖方法入口，不接触底层实现。
 3. 后续替换实现时，无需大面积修改业务层代码。
 ---
 ## 8. 推荐目录改造方案
 建议新增或调整如下文件：
 1. `backend/infra/rag/runtime.go`
 2. `backend/infra/rag/factory.go`
 3. `backend/infra/rag/service.go`
 4. `backend/infra/rag/README.md` 或在本文件持续追加
 5. `backend/infra/rag/embed/eino_embedder.go`
 6. `backend/infra/rag/rerank/eino_reranker.go`
 7. `backend/infra/rag/store/milvus_store.go`
 8. `backend/infra/rag/core/pipeline_test.go`
 9. `backend/infra/rag/chunk/text_chunker_test.go`
 10. `backend/infra/rag/corpus/memory_corpus_test.go`
 11. `backend/infra/rag/corpus/web_corpus_test.go`
 12. `backend/infra/rag/store/milvus_store_integration_test.go`
 配套改动文件：
 1. `backend/cmd/start.go`
 2. `backend/config.example.yaml`
 3. `backend/memory/service/read_service.go`
 4. `backend/newAgent/tools/registry.go`
 5. `backend/agent/通用能力接入文档.md`
 ---
 ## 9. 配置建议
 建议新增如下配置结构：
 ```yaml
 rag:
  enabled: true
  store: "milvus"
  topK: 8
  threshold: 0.55
  retrieve:
    timeoutMs: 1500
  ingest:
    chunkSize: 400
    chunkOverlap: 80
  embed:
    provider: "eino"
    model: ""
    timeoutMs: 1200
    dimension: 1024
  reranker:
    enabled: true
    provider: "eino"
    timeoutMs: 1200
 memory:
  rag:
    enabled: false
 websearch:
  rag:
    enabled: false
 ```
 说明：
 1. `rag.enabled` 控制公共层是否启用。
 2. `memory.rag.enabled` 与 `websearch.rag.enabled` 控制业务级切流。
 3. 即使 `rag.enabled=true`，也不代表所有业务立刻默认走新链路。
 ---
 ## 10. 回滚策略
 推荐回滚顺序如下：
 1. 先关业务级开关：
   - `memory.rag.enabled=false`
   - `websearch.rag.enabled=false`
 2. 再关重排：
   - `rag.reranker.enabled=false`
 3. 再切底层实现：
   - `rag.store=inmemory`
   - `rag.embed.provider=mock`
   - `rag.reranker.provider=noop`
 4. 若仍异常，再回退到业务旧链路
 这样可以做到：
 1. 不因单个 provider 波动打断主流程。
 2. 保留最小可用能力。
 3. 故障定位粒度更细。
 ---
 ## 11. 风险与应对
 1. 风险：Milvus 过滤能力与现有 metadata 结构不匹配。
   - 应对：高频过滤字段单独建模，不依赖大 JSON 粗暴过滤。
 2. 风险：embedding/rerank provider 波动影响延迟。
   - 应对：超时控制 + fallback + 业务级开关。
 3. 风险：业务层绕过 Infra 直接依赖底层实现。
   - 应对：通过 `Runtime` 方法面统一收口，代码评审禁止横向绕过。
 4. 风险：新旧检索路径长期并存导致维护成本上升。
   - 应对：本轮先保留兜底，稳定后明确删除旧实现。
 5. 风险：跨 query/session 串召回。
   - 应对：`WebRetrieve` 强制校验 `query_id/session_id` 至少其一存在。
 ---
 ## 12. 最小落地顺序
 如果按“尽快落成可接入 Infra”的优先级来排，本轮建议顺序如下：
 1. 先做 `runtime/factory/service`，把依赖注入和方法面收口。
 2. 再实现 `MilvusStore + EinoEmbedder + EinoReranker`。
 3. 再补 timeout、日志、指标、测试。
 4. 然后优先接 `memory`。
 5. 最后接 `websearch`。
 原因：
 1. 若先接业务、不先收口方法面，后面会把底层细节泄露到业务层。
 2. 若先接 websearch、不先接 memory，会导致共享 Infra 价值不够集中，面试叙事也不完整。
 ---
 ## 13. 本轮完成后的预期收益
 完成本方案后，项目会获得以下收益：
 1. `memory` 与 `websearch` 共享一套真正可运行的 RAG 基础设施。
 2. 业务侧不再重复实现切块、召回、重排与降级逻辑。
 3. `infra/rag` 成为正式公共能力，具备统一依赖注入与统一管理能力。
 4. 后续新增新语料域时，只需新增 `CorpusAdapter + 方法面`，无需再复制一套 RAG 链路。
 5. 项目简历叙事会更完整：
   - “抽象并实现共享 RAG Infra”
   - “统一 Memory/WebSearch 的检索与重排能力”
   - “通过依赖注入与门面方法收口底层复杂度”
 ---
 ## 14. 当前建议结论
 建议把本轮目标明确为：
 1. **不是**“再给 RAG 补几个占位实现”。
 2. **而是**“把 `backend/infra/rag` 一次性做成正式可接入的公共基础设施”。
 关键落点是两句话：
 1. 依赖注入统一由 `infra/rag` 自己负责。
 2. 对外只暴露方法入口，业务侧不直接接触底层实现细节。
 只要这两点收住，后续 `memory`、`websearch`、甚至更多语料域都会明显更好管理。
--- a/backend/infra/rag/config/config.go
+++ b/backend/infra/rag/config/config.go
@@ -5,30 +5,63 @@ import "github.com/spf13/viper"
 // Config 是 RAG Core 运行配置。
 type Config struct {
 	Enabled bool
 	Store   string
 	TopK    int
 	Threshold float64
 	EmbedProvider  string
 	EmbedModel     string
 	EmbedBaseURL   string
 	EmbedAPIKeyEnv string
 	EmbedTimeoutMS int
 	EmbedDimension int
 	RerankerEnabled   bool
 	RerankerProvider  string
 	RerankerTimeoutMS int
 	ChunkSize    int
 	ChunkOverlap int
 	RetrieveTimeoutMS int
 	MilvusAddress          string
 	MilvusToken            string
 	MilvusDBName           string
 	MilvusCollectionName   string
 	MilvusMetricType       string
 	MilvusRequestTimeoutMS int
 }
 // LoadFromViper 读取 rag 配置并补默认值。
 func LoadFromViper() Config {
 	cfg := Config{
 		Enabled:                viper.GetBool("rag.enabled"),
 		Store:                  viper.GetString("rag.store"),
 		TopK:                   viper.GetInt("rag.topK"),
 		Threshold:              viper.GetFloat64("rag.threshold"),
 		EmbedProvider:          viper.GetString("rag.embed.provider"),
 		EmbedModel:             viper.GetString("rag.embed.model"),
 		EmbedBaseURL:           viper.GetString("rag.embed.baseURL"),
 		EmbedAPIKeyEnv:         viper.GetString("rag.embed.apiKeyEnv"),
 		EmbedTimeoutMS:         viper.GetInt("rag.embed.timeoutMs"),
 		EmbedDimension:         viper.GetInt("rag.embed.dimension"),
 		RerankerEnabled:        viper.GetBool("rag.reranker.enabled"),
 		RerankerProvider:       viper.GetString("rag.reranker.provider"),
 		RerankerTimeoutMS:      viper.GetInt("rag.reranker.timeoutMs"),
 		ChunkSize:              viper.GetInt("rag.ingest.chunkSize"),
 		ChunkOverlap:           viper.GetInt("rag.ingest.chunkOverlap"),
 		RetrieveTimeoutMS:      viper.GetInt("rag.retrieve.timeoutMs"),
 		MilvusAddress:          viper.GetString("rag.milvus.address"),
 		MilvusToken:            viper.GetString("rag.milvus.token"),
 		MilvusDBName:           viper.GetString("rag.milvus.dbName"),
 		MilvusCollectionName:   viper.GetString("rag.milvus.collectionName"),
 		MilvusMetricType:       viper.GetString("rag.milvus.metricType"),
 		MilvusRequestTimeoutMS: viper.GetInt("rag.milvus.requestTimeoutMs"),
 	}
 	if cfg.Store == "" {
 		cfg.Store = "inmemory"
 	}
 	if cfg.TopK <= 0 {
 		cfg.TopK = 8
@@ -36,6 +69,24 @@ func LoadFromViper() Config {
 	if cfg.Threshold < 0 {
 		cfg.Threshold = 0
 	}
 	if cfg.EmbedProvider == "" {
 		cfg.EmbedProvider = "mock"
 	}
 	if cfg.EmbedBaseURL == "" {
 		cfg.EmbedBaseURL = viper.GetString("agent.baseURL")
 	}
 	if cfg.EmbedAPIKeyEnv == "" {
 		cfg.EmbedAPIKeyEnv = "ARK_API_KEY"
 	}
 	if cfg.EmbedTimeoutMS <= 0 {
 		cfg.EmbedTimeoutMS = 1200
 	}
 	if cfg.EmbedDimension <= 0 {
 		cfg.EmbedDimension = 1024
 	}
 	if cfg.RerankerProvider == "" {
 		cfg.RerankerProvider = "noop"
 	}
 	if cfg.RerankerTimeoutMS <= 0 {
 		cfg.RerankerTimeoutMS = 1200
 	}
@@ -48,5 +99,20 @@ func LoadFromViper() Config {
 	if cfg.RetrieveTimeoutMS <= 0 {
 		cfg.RetrieveTimeoutMS = 1500
 	}
 	if cfg.MilvusAddress == "" {
 		cfg.MilvusAddress = "http://localhost:19530"
 	}
 	if cfg.MilvusToken == "" {
 		cfg.MilvusToken = "root:Milvus"
 	}
 	if cfg.MilvusCollectionName == "" {
 		cfg.MilvusCollectionName = "smartflow_rag_chunks"
 	}
 	if cfg.MilvusMetricType == "" {
 		cfg.MilvusMetricType = "COSINE"
 	}
 	if cfg.MilvusRequestTimeoutMS <= 0 {
 		cfg.MilvusRequestTimeoutMS = 1500
 	}
 	return cfg
 }
--- a/backend/infra/rag/core/observer.go
+++ b/backend/infra/rag/core/observer.go
@@ -0,0 +1,190 @@
 package core
 import (
 	"context"
 	"errors"
 	"fmt"
 	"log"
 	"sort"
 	"strings"
 )
 // ObserveLevel 表示观测事件等级。
 type ObserveLevel string
 const (
 	ObserveLevelInfo  ObserveLevel = "info"
 	ObserveLevelWarn  ObserveLevel = "warn"
 	ObserveLevelError ObserveLevel = "error"
 )
 // ObserveEvent 描述一次统一观测事件。
 //
 // 职责边界：
 // 1. 只承载 RAG Infra 的结构化运行信息；
 // 2. 不绑定具体日志系统、指标系统或 tracing 实现；
 // 3. 字段内容应尽量稳定，便于后续统一接入全局观测平台。
 type ObserveEvent struct {
 	Level     ObserveLevel
 	Component string
 	Operation string
 	Fields    map[string]any
 }
 // Observer 是 RAG Infra 的最小观测接口。
 //
 // 职责边界：
 // 1. 负责消费结构化事件；
 // 2. 不负责决定业务逻辑是否继续执行；
 // 3. 任一实现都不应反向影响主链路稳定性。
 type Observer interface {
 	Observe(ctx context.Context, event ObserveEvent)
 }
 // ObserverFunc 允许用函数快速适配 Observer。
 type ObserverFunc func(ctx context.Context, event ObserveEvent)
 func (f ObserverFunc) Observe(ctx context.Context, event ObserveEvent) {
 	if f == nil {
 		return
 	}
 	f(ctx, event)
 }
 // NewNopObserver 返回空实现，适合在未接入统一观测平台时兜底。
 func NewNopObserver() Observer {
 	return ObserverFunc(func(context.Context, ObserveEvent) {})
 }
 // NewLoggerObserver 返回标准日志适配器。
 //
 // 说明：
 // 1. 当前项目尚未建立统一日志平台时，先把结构化字段稳定打印出来；
 // 2. 后续若项目引入统一 logger/metrics/tracing，只需替换该 Observer 注入实现；
 // 3. 该适配器默认保持单行输出，减少和现有日志风格的割裂感。
 func NewLoggerObserver(logger *log.Logger) Observer {
 	if logger == nil {
 		logger = log.Default()
 	}
 	return &loggerObserver{logger: logger}
 }
 type loggerObserver struct {
 	logger *log.Logger
 }
 func (o *loggerObserver) Observe(ctx context.Context, event ObserveEvent) {
 	if o == nil || o.logger == nil {
 		return
 	}
 	level := strings.TrimSpace(string(event.Level))
 	if level == "" {
 		level = string(ObserveLevelInfo)
 	}
 	component := strings.TrimSpace(event.Component)
 	if component == "" {
 		component = "unknown"
 	}
 	operation := strings.TrimSpace(event.Operation)
 	if operation == "" {
 		operation = "unknown"
 	}
 	fields := ObserveFieldsFromContext(ctx)
 	for key, value := range event.Fields {
 		key = strings.TrimSpace(key)
 		if key == "" || !shouldKeepObserveField(value) {
 			continue
 		}
 		fields[key] = value
 	}
 	parts := []string{
 		"rag",
 		fmt.Sprintf("level=%s", level),
 		fmt.Sprintf("component=%s", component),
 		fmt.Sprintf("operation=%s", operation),
 	}
 	keys := make([]string, 0, len(fields))
 	for key := range fields {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	for _, key := range keys {
 		parts = append(parts, fmt.Sprintf("%s=%v", key, fields[key]))
 	}
 	o.logger.Print(strings.Join(parts, " "))
 }
 type observeFieldsContextKey struct{}
 // WithObserveFields 把通用观测字段挂入上下文，便于下游组件复用。
 //
 // 步骤化说明：
 // 1. 先读取已有上下文字段，保证 Runtime / Pipeline / Store 能逐层补充信息；
 // 2. 后写字段覆盖同名旧值，确保下游拿到的是最新语义；
 // 3. 仅保存“有意义”的字段，避免日志长期堆积大量空值。
 func WithObserveFields(ctx context.Context, fields map[string]any) context.Context {
 	if len(fields) == 0 {
 		return ctx
 	}
 	if ctx == nil {
 		ctx = context.Background()
 	}
 	merged := ObserveFieldsFromContext(ctx)
 	for key, value := range fields {
 		key = strings.TrimSpace(key)
 		if key == "" || !shouldKeepObserveField(value) {
 			continue
 		}
 		merged[key] = value
 	}
 	if len(merged) == 0 {
 		return ctx
 	}
 	return context.WithValue(ctx, observeFieldsContextKey{}, merged)
 }
 // ObserveFieldsFromContext 提取上下文中已经累积的观测字段。
 func ObserveFieldsFromContext(ctx context.Context) map[string]any {
 	if ctx == nil {
 		return map[string]any{}
 	}
 	raw, ok := ctx.Value(observeFieldsContextKey{}).(map[string]any)
 	if !ok || len(raw) == 0 {
 		return map[string]any{}
 	}
 	result := make(map[string]any, len(raw))
 	for key, value := range raw {
 		result[key] = value
 	}
 	return result
 }
 // ClassifyErrorCode 统一把常见错误压缩为稳定错误码，便于后续接入全局观测平台。
 func ClassifyErrorCode(err error) string {
 	switch {
 	case err == nil:
 		return ""
 	case errors.Is(err, context.DeadlineExceeded):
 		return "DEADLINE_EXCEEDED"
 	case errors.Is(err, context.Canceled):
 		return "CANCELED"
 	default:
 		return "RAG_ERROR"
 	}
 }
 func shouldKeepObserveField(value any) bool {
 	if value == nil {
 		return false
 	}
 	if text, ok := value.(string); ok {
 		return strings.TrimSpace(text) != ""
 	}
 	return true
 }
--- a/backend/infra/rag/core/pipeline.go
+++ b/backend/infra/rag/core/pipeline.go
@@ -28,6 +28,7 @@ type Pipeline struct {
 	store    VectorStore
 	reranker Reranker
 	logger   *log.Logger
 	observer Observer
 }
 func NewPipeline(chunker Chunker, embedder Embedder, store VectorStore, reranker Reranker) *Pipeline {
@@ -37,9 +38,26 @@ func NewPipeline(chunker Chunker, embedder Embedder, store VectorStore, reranker
 		store:    store,
 		reranker: reranker,
 		logger:   log.Default(),
 		observer: NewNopObserver(),
 	}
 }
 // SetLogger 设置 Pipeline 使用的日志器。
 func (p *Pipeline) SetLogger(logger *log.Logger) {
 	if p == nil || logger == nil {
 		return
 	}
 	p.logger = logger
 }
 // SetObserver 设置 Pipeline 使用的统一观测器。
 func (p *Pipeline) SetObserver(observer Observer) {
 	if p == nil || observer == nil {
 		return
 	}
 	p.observer = observer
 }
 // Ingest 执行统一入库流程。
 //
 // 步骤化说明：
@@ -63,6 +81,24 @@ func (p *Pipeline) Ingest(
 	if err != nil {
 		return nil, err
 	}
 	return p.IngestDocuments(ctx, corpus.Name(), docs, opt)
 }
 // IngestDocuments 执行“已标准化文档”的统一入库流程。
 //
 // 职责边界：
 // 1. 负责处理已经完成 CorpusAdapter 映射的标准文档；
 // 2. 负责统一切块、向量化与 Upsert；
 // 3. 不负责再做业务输入解析，避免 Runtime 为拿到 document_id 重复 build 文档。
 func (p *Pipeline) IngestDocuments(
 	ctx context.Context,
 	corpusName string,
 	docs []SourceDocument,
 	opt IngestOption,
 ) (*IngestResult, error) {
 	if p == nil || p.chunker == nil || p.embedder == nil || p.store == nil {
 		return nil, ErrNilDependency
 	}
 	if len(docs) == 0 {
 		return &IngestResult{DocumentCount: 0, ChunkCount: 0}, nil
 	}
@@ -102,7 +138,7 @@ func (p *Pipeline) Ingest(
 	now := time.Now()
 	for i, chunk := range chunks {
 		metadata := cloneMap(chunk.Metadata)
-		metadata["corpus"] = corpus.Name()
+		metadata["corpus"] = corpusName
 		metadata["document_id"] = chunk.DocumentID
 		metadata["chunk_order"] = chunk.Order
 		rows = append(rows, VectorRow{
@@ -214,7 +250,23 @@ func (p *Pipeline) Retrieve(
 		// 2. rerank 异常不终止主流程，统一降级为原排序。
 		result.FallbackUsed = true
 		result.FallbackReason = FallbackReasonRerankFailed
 		if p.observer != nil {
 			p.observer.Observe(ctx, ObserveEvent{
 				Level:     ObserveLevelWarn,
 				Component: "pipeline",
 				Operation: "rerank_fallback",
 				Fields: map[string]any{
 					"status":          "fallback",
 					"fallback_reason": FallbackReasonRerankFailed,
 					"candidate_count": len(candidates),
 					"top_k":           topK,
 					"error":           rerankErr,
 					"error_code":      ClassifyErrorCode(rerankErr),
 				},
 			})
 		} else if p.logger != nil {
 			p.logger.Printf("rag rerank fallback: reason=%s err=%v", FallbackReasonRerankFailed, rerankErr)
 		}
 		return result, nil
 	}
 	result.Items = reranked
--- a/backend/infra/rag/corpus/memory_corpus.go
+++ b/backend/infra/rag/corpus/memory_corpus.go
@@ -22,7 +22,11 @@ type MemoryIngestItem struct {
 	MemoryType       string
 	Title            string
 	Content          string
 	Confidence       float64
 	Importance       float64
 	SensitivityLevel int
 	IsExplicit       bool
 	Status           string
 	TTLAt            *time.Time
 	CreatedAt        *time.Time
 }
@@ -71,7 +75,12 @@ func (c *MemoryCorpus) BuildIngestDocuments(_ context.Context, input any) ([]cor
 			"assistant_id":      strings.TrimSpace(item.AssistantID),
 			"run_id":            strings.TrimSpace(item.RunID),
 			"memory_type":       strings.TrimSpace(strings.ToLower(item.MemoryType)),
 			"title":             strings.TrimSpace(item.Title),
 			"confidence":        item.Confidence,
 			"importance":        item.Importance,
 			"sensitivity_level": item.SensitivityLevel,
 			"is_explicit":       item.IsExplicit,
 			"status":            strings.TrimSpace(item.Status),
 		}
 		if item.TTLAt != nil {
 			metadata["ttl_at"] = item.TTLAt.Format(time.RFC3339)
--- a/backend/infra/rag/embed/eino_embedder.go
+++ b/backend/infra/rag/embed/eino_embedder.go
@@ -3,19 +3,97 @@ package embed
 import (
 	"context"
 	"errors"
 	"strings"
 	"time"
 	openaiembedding "github.com/cloudwego/eino-ext/libs/acl/openai"
 	einoembedding "github.com/cloudwego/eino/components/embedding"
 )
-// EinoEmbedder 是 Eino embedding 的占位实现。
+// EinoConfig 描述 Eino embedding 运行参数。
 type EinoConfig struct {
 	APIKey    string
 	BaseURL   string
 	Model     string
 	TimeoutMS int
 	Dimension int
 }
 // EinoEmbedder 是基于 Eino 的 embedding 适配器。
 //
 // 说明：
-// 1. 本轮先占位接口，避免过早耦合具体 Provider；
+// 1. 对 infra/rag 暴露统一 []float32 结果，屏蔽 Eino/OpenAI 兼容实现细节；
-// 2. 后续接入真实 embedding 时，只替换此文件内部实现。
+// 2. 超时由该适配器自身收口，避免业务侧每次调用都手写超时控制；
-type EinoEmbedder struct{}
+// 3. 当前底层走 Eino Ext 的 OpenAI 兼容 embedding client，便于接 Ark/OpenAI 兼容接口。
-
+type EinoEmbedder struct {
-func NewEinoEmbedder() *EinoEmbedder {
+	client    einoembedding.Embedder
-	return &EinoEmbedder{}
+	model     string
 	timeout   time.Duration
 	dimension int
 }
-func (e *EinoEmbedder) Embed(_ context.Context, _ []string, _ string) ([][]float32, error) {
+func NewEinoEmbedder(ctx context.Context, cfg EinoConfig) (*EinoEmbedder, error) {
-	return nil, errors.New("eino embedder is not implemented yet")
+	if strings.TrimSpace(cfg.APIKey) == "" {
 		return nil, errors.New("eino embedder api key is empty")
 	}
 	if strings.TrimSpace(cfg.Model) == "" {
 		return nil, errors.New("eino embedder model is empty")
 	}
 	clientCfg := &openaiembedding.EmbeddingConfig{
 		APIKey:  strings.TrimSpace(cfg.APIKey),
 		BaseURL: strings.TrimSpace(cfg.BaseURL),
 		Model:   strings.TrimSpace(cfg.Model),
 	}
 	if cfg.Dimension > 0 {
 		clientCfg.Dimensions = &cfg.Dimension
 	}
 	client, err := openaiembedding.NewEmbeddingClient(ctx, clientCfg)
 	if err != nil {
 		return nil, err
 	}
 	timeout := 1200 * time.Millisecond
 	if cfg.TimeoutMS > 0 {
 		timeout = time.Duration(cfg.TimeoutMS) * time.Millisecond
 	}
 	return &EinoEmbedder{
 		client:    client,
 		model:     strings.TrimSpace(cfg.Model),
 		timeout:   timeout,
 		dimension: cfg.Dimension,
 	}, nil
 }
 func (e *EinoEmbedder) Embed(ctx context.Context, texts []string, _ string) ([][]float32, error) {
 	if e == nil || e.client == nil {
 		return nil, errors.New("eino embedder is not initialized")
 	}
 	if len(texts) == 0 {
 		return nil, nil
 	}
 	callCtx := ctx
 	cancel := func() {}
 	if e.timeout > 0 {
 		callCtx, cancel = context.WithTimeout(ctx, e.timeout)
 	}
 	defer cancel()
 	vectors, err := e.client.EmbedStrings(callCtx, texts, einoembedding.WithModel(e.model))
 	if err != nil {
 		return nil, err
 	}
 	result := make([][]float32, 0, len(vectors))
 	for _, vector := range vectors {
 		converted := make([]float32, len(vector))
 		for i, value := range vector {
 			converted[i] = float32(value)
 		}
 		result = append(result, converted)
 	}
 	return result, nil
 }
--- a/backend/infra/rag/factory.go
+++ b/backend/infra/rag/factory.go
@@ -0,0 +1,139 @@
 package rag
 import (
 	"context"
 	"fmt"
 	"log"
 	"os"
 	"strings"
 	ragchunk "github.com/LoveLosita/smartflow/backend/infra/rag/chunk"
 	ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
 	"github.com/LoveLosita/smartflow/backend/infra/rag/core"
 	ragembed "github.com/LoveLosita/smartflow/backend/infra/rag/embed"
 	ragrerank "github.com/LoveLosita/smartflow/backend/infra/rag/rerank"
 	ragstore "github.com/LoveLosita/smartflow/backend/infra/rag/store"
 )
 // FactoryDeps 描述 Runtime 工厂所需的可选依赖。
 //
 // 说明：
 // 1. Logger 仅作为“当前项目尚无统一日志系统”时的默认落点；
 // 2. Observer 是正式的统一观测插槽，后续可替换为项目级 logger/metrics/tracing 适配器；
 // 3. 业务侧仍然只拿 Runtime，不直接碰底层装配细节。
 type FactoryDeps struct {
 	Logger   *log.Logger
 	Observer Observer
 }
 // NewRuntimeFromConfig 按配置统一组装 RAG Runtime。
 //
 // 设计说明：
 // 1. 所有底层实现选择都收口到这里，业务侧不再自行 new store/embedder/reranker；
 // 2. 即使后续引入更多 provider，也应优先扩展本工厂，而不是把选择逻辑扩散到业务模块；
 // 3. 观测能力也在此统一注入，避免 runtime/store/pipeline 各自偷偷打印日志。
 func NewRuntimeFromConfig(ctx context.Context, cfg ragconfig.Config, deps FactoryDeps) (Runtime, error) {
 	logger, observer := normalizeFactoryDeps(deps)
 	embedder, err := buildEmbedder(ctx, cfg)
 	if err != nil {
 		return nil, err
 	}
 	store, err := buildStore(cfg, logger, observer)
 	if err != nil {
 		return nil, err
 	}
 	reranker, err := buildReranker(cfg, observer)
 	if err != nil {
 		return nil, err
 	}
 	pipeline := core.NewPipeline(ragchunk.NewTextChunker(), embedder, store, reranker)
 	pipeline.SetLogger(logger)
 	pipeline.SetObserver(observer)
 	return newRuntime(cfg, pipeline, observer), nil
 }
 func normalizeFactoryDeps(deps FactoryDeps) (*log.Logger, Observer) {
 	logger := deps.Logger
 	if logger == nil {
 		logger = log.Default()
 	}
 	observer := deps.Observer
 	if observer == nil {
 		observer = NewLoggerObserver(logger)
 	}
 	return logger, observer
 }
 func buildEmbedder(ctx context.Context, cfg ragconfig.Config) (core.Embedder, error) {
 	switch strings.ToLower(strings.TrimSpace(cfg.EmbedProvider)) {
 	case "", "mock":
 		return ragembed.NewMockEmbedder(cfg.EmbedDimension), nil
 	case "eino":
 		apiKey := strings.TrimSpace(os.Getenv(cfg.EmbedAPIKeyEnv))
 		if apiKey == "" {
 			return nil, fmt.Errorf("rag embed api key is empty: env=%s", cfg.EmbedAPIKeyEnv)
 		}
 		return ragembed.NewEinoEmbedder(ctx, ragembed.EinoConfig{
 			APIKey:    apiKey,
 			BaseURL:   cfg.EmbedBaseURL,
 			Model:     cfg.EmbedModel,
 			TimeoutMS: cfg.EmbedTimeoutMS,
 			Dimension: cfg.EmbedDimension,
 		})
 	default:
 		return nil, fmt.Errorf("unsupported rag embed provider: %s", cfg.EmbedProvider)
 	}
 }
 func buildStore(cfg ragconfig.Config, logger *log.Logger, observer Observer) (core.VectorStore, error) {
 	switch strings.ToLower(strings.TrimSpace(cfg.Store)) {
 	case "", "inmemory":
 		return ragstore.NewInMemoryVectorStore(), nil
 	case "milvus":
 		return ragstore.NewMilvusStore(ragstore.MilvusConfig{
 			Address:          cfg.MilvusAddress,
 			Token:            cfg.MilvusToken,
 			DBName:           cfg.MilvusDBName,
 			CollectionName:   cfg.MilvusCollectionName,
 			RequestTimeoutMS: cfg.MilvusRequestTimeoutMS,
 			Dimension:        cfg.EmbedDimension,
 			MetricType:       cfg.MilvusMetricType,
 			Logger:           logger,
 			Observer:         observer,
 		})
 	default:
 		return nil, fmt.Errorf("unsupported rag store: %s", cfg.Store)
 	}
 }
 func buildReranker(cfg ragconfig.Config, observer Observer) (core.Reranker, error) {
 	if !cfg.RerankerEnabled {
 		return ragrerank.NewNoopReranker(), nil
 	}
 	switch strings.ToLower(strings.TrimSpace(cfg.RerankerProvider)) {
 	case "", "noop":
 		return ragrerank.NewNoopReranker(), nil
 	case "eino":
 		if observer != nil {
 			observer.Observe(context.Background(), ObserveEvent{
 				Level:     ObserveLevelWarn,
 				Component: "factory",
 				Operation: "reranker_fallback",
 				Fields: map[string]any{
 					"provider":        "eino",
 					"status":          "fallback",
 					"fallback_target": "noop",
 					"reason":          "reranker_not_implemented",
 				},
 			})
 		}
 		return ragrerank.NewNoopReranker(), nil
 	default:
 		return nil, fmt.Errorf("unsupported rag reranker provider: %s", cfg.RerankerProvider)
 	}
 }
--- a/backend/infra/rag/observe.go
+++ b/backend/infra/rag/observe.go
@@ -0,0 +1,32 @@
 package rag
 import (
 	"log"
 	"github.com/LoveLosita/smartflow/backend/infra/rag/core"
 )
 // ObserveLevel 对外暴露统一观测等级别名，避免启动层直接依赖 core 细节。
 type ObserveLevel = core.ObserveLevel
 const (
 	ObserveLevelInfo  = core.ObserveLevelInfo
 	ObserveLevelWarn  = core.ObserveLevelWarn
 	ObserveLevelError = core.ObserveLevelError
 )
 // ObserveEvent 对外暴露统一观测事件别名。
 type ObserveEvent = core.ObserveEvent
 // Observer 对外暴露统一观测接口别名。
 type Observer = core.Observer
 // NewNopObserver 返回空实现。
 func NewNopObserver() Observer {
 	return core.NewNopObserver()
 }
 // NewLoggerObserver 返回标准日志适配器。
 func NewLoggerObserver(logger *log.Logger) Observer {
 	return core.NewLoggerObserver(logger)
 }
--- a/backend/infra/rag/runtime.go
+++ b/backend/infra/rag/runtime.go
@@ -0,0 +1,380 @@
 package rag
 import (
 	"context"
 	"fmt"
 	"strings"
 	"time"
 	ragconfig "github.com/LoveLosita/smartflow/backend/infra/rag/config"
 	"github.com/LoveLosita/smartflow/backend/infra/rag/core"
 	"github.com/LoveLosita/smartflow/backend/infra/rag/corpus"
 )
 type runtime struct {
 	cfg          ragconfig.Config
 	pipeline     *core.Pipeline
 	memoryCorpus *corpus.MemoryCorpus
 	webCorpus    *corpus.WebCorpus
 	observer     Observer
 }
 func newRuntime(cfg ragconfig.Config, pipeline *core.Pipeline, observer Observer) Runtime {
 	if observer == nil {
 		observer = NewNopObserver()
 	}
 	return &runtime{
 		cfg:          cfg,
 		pipeline:     pipeline,
 		memoryCorpus: corpus.NewMemoryCorpus(),
 		webCorpus:    corpus.NewWebCorpus(),
 		observer:     observer,
 	}
 }
 // IngestMemory 统一承接记忆语料入库。
 func (r *runtime) IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error) {
 	items := make([]corpus.MemoryIngestItem, 0, len(req.Items))
 	for _, item := range req.Items {
 		items = append(items, corpus.MemoryIngestItem{
 			MemoryID:         item.MemoryID,
 			UserID:           item.UserID,
 			ConversationID:   item.ConversationID,
 			AssistantID:      item.AssistantID,
 			RunID:            item.RunID,
 			MemoryType:       item.MemoryType,
 			Title:            item.Title,
 			Content:          item.Content,
 			Confidence:       item.Confidence,
 			Importance:       item.Importance,
 			SensitivityLevel: item.SensitivityLevel,
 			IsExplicit:       item.IsExplicit,
 			Status:           item.Status,
 			TTLAt:            item.TTLAt,
 			CreatedAt:        item.CreatedAt,
 		})
 	}
 	return r.ingestWithCorpus(ctx, req.TraceID, "memory", r.memoryCorpus, items, req.Action)
 }
 // RetrieveMemory 统一承接记忆语料检索。
 func (r *runtime) RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error) {
 	corpusInput := corpus.MemoryRetrieveInput{
 		UserID:         req.UserID,
 		ConversationID: req.ConversationID,
 		AssistantID:    req.AssistantID,
 		RunID:          req.RunID,
 	}
 	if len(req.MemoryTypes) == 1 {
 		corpusInput.MemoryType = req.MemoryTypes[0]
 	}
 	result, err := r.retrieveWithCorpus(ctx, req.TraceID, "memory", r.memoryCorpus, core.RetrieveRequest{
 		Query:       req.Query,
 		TopK:        normalizeTopK(req.TopK, r.cfg.TopK),
 		Threshold:   normalizeThreshold(req.Threshold, r.cfg.Threshold),
 		Action:      normalizeAction(req.Action, "search"),
 		CorpusInput: corpusInput,
 	})
 	if err != nil {
 		return nil, err
 	}
 	if len(req.MemoryTypes) <= 1 {
 		return result, nil
 	}
 	// 1. 当前底层过滤仍以等值条件为主，先保持 Runtime 做多类型二次筛选；
 	// 2. 这样可以避免把 “memory_type in (...)” 的实现细节扩散到所有 Store；
 	// 3. 等后续底层过滤能力统一后，再考虑把该逻辑继续下沉。
 	allowed := make(map[string]struct{}, len(req.MemoryTypes))
 	for _, item := range req.MemoryTypes {
 		value := strings.TrimSpace(strings.ToLower(item))
 		if value == "" {
 			continue
 		}
 		allowed[value] = struct{}{}
 	}
 	filtered := make([]RetrieveHit, 0, len(result.Items))
 	for _, item := range result.Items {
 		memoryType := strings.TrimSpace(strings.ToLower(asString(item.Metadata["memory_type"])))
 		if len(allowed) > 0 {
 			if _, ok := allowed[memoryType]; !ok {
 				continue
 			}
 		}
 		filtered = append(filtered, item)
 	}
 	result.Items = filtered
 	if req.TopK > 0 && len(result.Items) > req.TopK {
 		result.Items = result.Items[:req.TopK]
 	}
 	return result, nil
 }
 // IngestWeb 统一承接网页语料入库。
 func (r *runtime) IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error) {
 	items := make([]corpus.WebIngestItem, 0, len(req.Items))
 	for _, item := range req.Items {
 		items = append(items, corpus.WebIngestItem{
 			URL:         item.URL,
 			Title:       item.Title,
 			Content:     item.Content,
 			Snippet:     item.Snippet,
 			Domain:      item.Domain,
 			QueryID:     item.QueryID,
 			SessionID:   item.SessionID,
 			PublishedAt: item.PublishedAt,
 			FetchedAt:   item.FetchedAt,
 			SourceRank:  item.SourceRank,
 		})
 	}
 	return r.ingestWithCorpus(ctx, req.TraceID, "web", r.webCorpus, items, req.Action)
 }
 // RetrieveWeb 统一承接网页语料检索。
 func (r *runtime) RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error) {
 	return r.retrieveWithCorpus(ctx, req.TraceID, "web", r.webCorpus, core.RetrieveRequest{
 		Query:     req.Query,
 		TopK:      normalizeTopK(req.TopK, r.cfg.TopK),
 		Threshold: normalizeThreshold(req.Threshold, r.cfg.Threshold),
 		Action:    normalizeAction(req.Action, "search"),
 		CorpusInput: corpus.WebRetrieveInput{
 			QueryID:   req.QueryID,
 			SessionID: req.SessionID,
 			Domain:    req.Domain,
 		},
 	})
 }
 func (r *runtime) ingestWithCorpus(
 	ctx context.Context,
 	traceID string,
 	corpusName string,
 	adapter core.CorpusAdapter,
 	input any,
 	action string,
 ) (*IngestResult, error) {
 	start := time.Now()
 	if r == nil || r.pipeline == nil || adapter == nil {
 		return nil, core.ErrNilDependency
 	}
 	action = normalizeAction(action, "add")
 	observeCtx := newObserveContext(ctx, traceID, corpusName, action)
 	docs, err := adapter.BuildIngestDocuments(observeCtx, input)
 	if err != nil {
 		r.observe(observeCtx, ObserveEvent{
 			Level:     ObserveLevelError,
 			Component: "runtime",
 			Operation: "ingest",
 			Fields: map[string]any{
 				"status":      "failed",
 				"latency_ms":  time.Since(start).Milliseconds(),
 				"phase":       "build_documents",
 				"error":       err,
 				"error_code":  core.ClassifyErrorCode(err),
 				"input_count": estimateInputCount(input),
 			},
 		})
 		return nil, err
 	}
 	docIDs := make([]string, 0, len(docs))
 	for _, doc := range docs {
 		docIDs = append(docIDs, doc.ID)
 	}
 	result, err := r.pipeline.IngestDocuments(observeCtx, adapter.Name(), docs, core.IngestOption{
 		Chunk: core.ChunkOption{
 			ChunkSize:    r.cfg.ChunkSize,
 			ChunkOverlap: r.cfg.ChunkOverlap,
 		},
 		Action: action,
 	})
 	if err != nil {
 		r.observe(observeCtx, ObserveEvent{
 			Level:     ObserveLevelError,
 			Component: "runtime",
 			Operation: "ingest",
 			Fields: map[string]any{
 				"status":         "failed",
 				"latency_ms":     time.Since(start).Milliseconds(),
 				"document_count": len(docs),
 				"error":          err,
 				"error_code":     core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	r.observe(observeCtx, ObserveEvent{
 		Level:     ObserveLevelInfo,
 		Component: "runtime",
 		Operation: "ingest",
 		Fields: map[string]any{
 			"status":         "success",
 			"latency_ms":     time.Since(start).Milliseconds(),
 			"document_count": result.DocumentCount,
 			"chunk_count":    result.ChunkCount,
 		},
 	})
 	return &IngestResult{
 		DocumentCount: result.DocumentCount,
 		ChunkCount:    result.ChunkCount,
 		DocumentIDs:   docIDs,
 	}, nil
 }
 func (r *runtime) retrieveWithCorpus(
 	ctx context.Context,
 	traceID string,
 	corpusName string,
 	adapter core.CorpusAdapter,
 	req core.RetrieveRequest,
 ) (*RetrieveResult, error) {
 	start := time.Now()
 	if r == nil || r.pipeline == nil || adapter == nil {
 		return nil, core.ErrNilDependency
 	}
 	action := normalizeAction(req.Action, "search")
 	req.Action = action
 	observeCtx := newObserveContext(ctx, traceID, corpusName, action)
 	timeoutCtx := observeCtx
 	cancel := func() {}
 	if r.cfg.RetrieveTimeoutMS > 0 {
 		timeoutCtx, cancel = context.WithTimeout(observeCtx, time.Duration(r.cfg.RetrieveTimeoutMS)*time.Millisecond)
 	}
 	defer cancel()
 	result, err := r.pipeline.Retrieve(timeoutCtx, adapter, req)
 	if err != nil {
 		r.observe(observeCtx, ObserveEvent{
 			Level:     ObserveLevelError,
 			Component: "runtime",
 			Operation: "retrieve",
 			Fields: map[string]any{
 				"status":     "failed",
 				"latency_ms": time.Since(start).Milliseconds(),
 				"query_len":  len(strings.TrimSpace(req.Query)),
 				"top_k":      req.TopK,
 				"threshold":  req.Threshold,
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	items := make([]RetrieveHit, 0, len(result.Items))
 	for _, item := range result.Items {
 		items = append(items, RetrieveHit{
 			ChunkID:    item.ChunkID,
 			DocumentID: item.DocumentID,
 			Text:       item.Text,
 			Score:      item.Score,
 			Metadata:   cloneMap(item.Metadata),
 		})
 	}
 	r.observe(observeCtx, ObserveEvent{
 		Level:     ObserveLevelInfo,
 		Component: "runtime",
 		Operation: "retrieve",
 		Fields: map[string]any{
 			"status":          "success",
 			"latency_ms":      time.Since(start).Milliseconds(),
 			"query_len":       len(strings.TrimSpace(req.Query)),
 			"top_k":           req.TopK,
 			"threshold":       req.Threshold,
 			"raw_count":       result.RawCount,
 			"hit_count":       len(result.Items),
 			"fallback_used":   result.FallbackUsed,
 			"fallback_reason": result.FallbackReason,
 		},
 	})
 	return &RetrieveResult{
 		Items:          items,
 		RawCount:       result.RawCount,
 		FallbackUsed:   result.FallbackUsed,
 		FallbackReason: result.FallbackReason,
 	}, nil
 }
 func (r *runtime) observe(ctx context.Context, event ObserveEvent) {
 	if r == nil || r.observer == nil {
 		return
 	}
 	r.observer.Observe(ctx, event)
 }
 func newObserveContext(ctx context.Context, traceID string, corpusName string, action string) context.Context {
 	fields := map[string]any{
 		"corpus": corpusName,
 		"action": action,
 	}
 	if traceID = strings.TrimSpace(traceID); traceID != "" {
 		fields["trace_id"] = traceID
 	}
 	return core.WithObserveFields(ctx, fields)
 }
 func estimateInputCount(input any) int {
 	switch value := input.(type) {
 	case []corpus.MemoryIngestItem:
 		return len(value)
 	case []corpus.WebIngestItem:
 		return len(value)
 	default:
 		return 0
 	}
 }
 func normalizeAction(action string, fallback string) string {
 	action = strings.TrimSpace(action)
 	if action == "" {
 		return fallback
 	}
 	return action
 }
 func normalizeTopK(topK int, fallback int) int {
 	if topK > 0 {
 		return topK
 	}
 	if fallback > 0 {
 		return fallback
 	}
 	return 8
 }
 func normalizeThreshold(threshold float64, fallback float64) float64 {
 	if threshold >= 0 {
 		return threshold
 	}
 	if fallback >= 0 {
 		return fallback
 	}
 	return 0
 }
 func cloneMap(src map[string]any) map[string]any {
 	if len(src) == 0 {
 		return map[string]any{}
 	}
 	dst := make(map[string]any, len(src))
 	for key, value := range src {
 		dst[key] = value
 	}
 	return dst
 }
 func asString(v any) string {
 	if v == nil {
 		return ""
 	}
 	return strings.TrimSpace(fmt.Sprintf("%v", v))
 }
--- a/backend/infra/rag/service.go
+++ b/backend/infra/rag/service.go
@@ -0,0 +1,117 @@
 package rag
 import (
 	"context"
 	"time"
 )
 // Runtime 是 RAG Infra 对业务侧暴露的唯一稳定方法面。
 //
 // 职责边界：
 // 1. 负责承接 memory/web 两类语料的统一入库与检索入口；
 // 2. 负责屏蔽底层 Pipeline / Store / Embedder / Reranker 的装配细节；
 // 3. 不负责 provider 搜索、HTML 抓取、prompt 注入等业务语义。
 type Runtime interface {
 	IngestMemory(ctx context.Context, req MemoryIngestRequest) (*IngestResult, error)
 	RetrieveMemory(ctx context.Context, req MemoryRetrieveRequest) (*RetrieveResult, error)
 	IngestWeb(ctx context.Context, req WebIngestRequest) (*IngestResult, error)
 	RetrieveWeb(ctx context.Context, req WebRetrieveRequest) (*RetrieveResult, error)
 }
 // IngestResult 描述一次统一入库执行摘要。
 type IngestResult struct {
 	DocumentCount int
 	ChunkCount    int
 	DocumentIDs   []string
 }
 // RetrieveHit 是对业务侧暴露的统一命中项。
 type RetrieveHit struct {
 	ChunkID    string
 	DocumentID string
 	Text       string
 	Score      float64
 	Metadata   map[string]any
 }
 // RetrieveResult 描述一次检索执行摘要。
 type RetrieveResult struct {
 	Items          []RetrieveHit
 	RawCount       int
 	FallbackUsed   bool
 	FallbackReason string
 }
 // MemoryIngestItem 是 memory 语料入库项。
 type MemoryIngestItem struct {
 	MemoryID         int64
 	UserID           int
 	ConversationID   string
 	AssistantID      string
 	RunID            string
 	MemoryType       string
 	Title            string
 	Content          string
 	Confidence       float64
 	Importance       float64
 	SensitivityLevel int
 	IsExplicit       bool
 	Status           string
 	TTLAt            *time.Time
 	CreatedAt        *time.Time
 }
 // MemoryIngestRequest 描述一次记忆向量入库请求。
 type MemoryIngestRequest struct {
 	TraceID string
 	Action  string
 	Items   []MemoryIngestItem
 }
 // MemoryRetrieveRequest 描述一次记忆检索请求。
 type MemoryRetrieveRequest struct {
 	TraceID        string
 	Query          string
 	TopK           int
 	Threshold      float64
 	Action         string
 	UserID         int
 	ConversationID string
 	AssistantID    string
 	RunID          string
 	MemoryTypes    []string
 }
 // WebIngestItem 是网页语料入库项。
 type WebIngestItem struct {
 	URL         string
 	Title       string
 	Content     string
 	Snippet     string
 	Domain      string
 	QueryID     string
 	SessionID   string
 	PublishedAt *time.Time
 	FetchedAt   *time.Time
 	SourceRank  int
 }
 // WebIngestRequest 描述一次网页语料入库请求。
 type WebIngestRequest struct {
 	TraceID string
 	Action  string
 	Items   []WebIngestItem
 }
 // WebRetrieveRequest 描述一次网页检索请求。
 type WebRetrieveRequest struct {
 	TraceID   string
 	Query     string
 	TopK      int
 	Threshold float64
 	Action    string
 	QueryID   string
 	SessionID string
 	Domain    string
 }
--- a/backend/infra/rag/store/milvus_store.go
+++ b/backend/infra/rag/store/milvus_store.go
@@ -1,35 +1,894 @@
 package store
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log"
 	"net/http"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/LoveLosita/smartflow/backend/infra/rag/core"
 )
-// MilvusStore 是 Milvus 连接器占位实现。
+// MilvusConfig 描述 Milvus REST 存储配置。
 type MilvusConfig struct {
 	// Address 应指向 Milvus REST 入口。
 	// 当前项目联调验证使用 19530；9091 仅用于 health/metrics，不承载本文实现所走的 REST API。
 	Address          string
 	Token            string
 	DBName           string
 	CollectionName   string
 	RequestTimeoutMS int
 	Dimension        int
 	MetricType       string
 	Logger           *log.Logger
 	Observer         core.Observer
 }
 // MilvusStore 是基于 Milvus REST API 的向量存储实现。
 //
-// 说明：
+// 设计说明：
-// 1. 本轮先保留接口结构，便于后续平滑替换 InMemoryStore；
+// 1. 本实现优先保证“项目内可接入、可管理、可灰度”，不强依赖额外 SDK；
-// 2. 真实接入时需补充连接池、集合初始化、元数据过滤与错误转换。
+// 2. 通过固定字段 + metadata JSON 的方式兼顾过滤能力与元数据完整性；
-type MilvusStore struct{}
+// 3. collection 在首次写入时自动创建，避免启动期额外初始化脚本。
-
+type MilvusStore struct {
-func NewMilvusStore() *MilvusStore {
+	cfg      MilvusConfig
-	return &MilvusStore{}
+	client   *http.Client
 	observer core.Observer
 	mu       sync.Mutex
 	ensured  bool
 }
-func (s *MilvusStore) Upsert(_ context.Context, _ []core.VectorRow) error {
+const (
-	return errors.New("milvus store is not implemented yet")
+	milvusPrimaryField   = "id"
 	milvusVectorField    = "vector"
 	milvusTextField      = "text"
 	milvusMetadataField  = "metadata"
 	milvusCorpusField    = "corpus"
 	milvusDocumentField  = "document_id"
 	milvusUserIDField    = "user_id"
 	milvusAssistantField = "assistant_id"
 	milvusConvField      = "conversation_id"
 	milvusRunField       = "run_id"
 	milvusMemoryType     = "memory_type"
 	milvusQueryIDField   = "query_id"
 	milvusSessionField   = "session_id"
 	milvusDomainField    = "domain"
 	milvusChunkOrder     = "chunk_order"
 	milvusUpdatedAtField = "updated_at"
 )
 var milvusFilterFieldMap = map[string]string{
 	"corpus":          milvusCorpusField,
 	"document_id":     milvusDocumentField,
 	"user_id":         milvusUserIDField,
 	"assistant_id":    milvusAssistantField,
 	"conversation_id": milvusConvField,
 	"run_id":          milvusRunField,
 	"memory_type":     milvusMemoryType,
 	"query_id":        milvusQueryIDField,
 	"session_id":      milvusSessionField,
 	"domain":          milvusDomainField,
 	"chunk_order":     milvusChunkOrder,
 }
-func (s *MilvusStore) Search(_ context.Context, _ core.VectorSearchRequest) ([]core.ScoredVectorRow, error) {
+func NewMilvusStore(cfg MilvusConfig) (*MilvusStore, error) {
-	return nil, errors.New("milvus store is not implemented yet")
+	cfg.Address = strings.TrimRight(strings.TrimSpace(cfg.Address), "/")
 	if cfg.Address == "" {
 		return nil, errors.New("milvus address is empty")
 	}
 	if cfg.CollectionName == "" {
 		cfg.CollectionName = "smartflow_rag_chunks"
 	}
 	if cfg.MetricType == "" {
 		cfg.MetricType = "COSINE"
 	}
 	if cfg.RequestTimeoutMS <= 0 {
 		cfg.RequestTimeoutMS = 1500
 	}
 	if cfg.Logger == nil {
 		cfg.Logger = log.Default()
 	}
 	if cfg.Observer == nil {
 		cfg.Observer = core.NewLoggerObserver(cfg.Logger)
 	}
-func (s *MilvusStore) Delete(_ context.Context, _ []string) error {
+	return &MilvusStore{
-	return errors.New("milvus store is not implemented yet")
+		cfg:      cfg,
 		client:   &http.Client{Timeout: time.Duration(cfg.RequestTimeoutMS) * time.Millisecond},
 		observer: cfg.Observer,
 	}, nil
 }
-func (s *MilvusStore) Get(_ context.Context, _ []string) ([]core.VectorRow, error) {
+func (s *MilvusStore) Upsert(ctx context.Context, rows []core.VectorRow) error {
-	return nil, errors.New("milvus store is not implemented yet")
+	start := time.Now()
 	if len(rows) == 0 {
 		return nil
 	}
 	if err := s.ensureCollection(ctx, len(rows[0].Vector)); err != nil {
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "upsert",
 			Fields: map[string]any{
 				"status":     "failed",
 				"row_count":  len(rows),
 				"vector_dim": len(rows[0].Vector),
 				"latency_ms": time.Since(start).Milliseconds(),
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return err
 	}
 	data := make([]map[string]any, 0, len(rows))
 	for _, row := range rows {
 		item := mapRowToMilvusEntity(row)
 		data = append(data, item)
 	}
 	_, err := s.postJSON(ctx, "/v2/vectordb/entities/upsert", map[string]any{
 		"collectionName": s.cfg.CollectionName,
 		"data":           data,
 		"dbName":         blankToNil(s.cfg.DBName),
 	})
 	if err != nil {
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "upsert",
 			Fields: map[string]any{
 				"status":     "failed",
 				"row_count":  len(rows),
 				"vector_dim": len(rows[0].Vector),
 				"latency_ms": time.Since(start).Milliseconds(),
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return err
 	}
 	s.observe(ctx, core.ObserveEvent{
 		Level:     core.ObserveLevelInfo,
 		Component: "store",
 		Operation: "upsert",
 		Fields: map[string]any{
 			"status":     "success",
 			"row_count":  len(rows),
 			"vector_dim": len(rows[0].Vector),
 			"latency_ms": time.Since(start).Milliseconds(),
 		},
 	})
 	return err
 }
 func (s *MilvusStore) Search(ctx context.Context, req core.VectorSearchRequest) ([]core.ScoredVectorRow, error) {
 	start := time.Now()
 	if len(req.QueryVector) == 0 {
 		return nil, nil
 	}
 	if err := s.ensureCollection(ctx, len(req.QueryVector)); err != nil {
 		if isMilvusCollectionMissing(err) {
 			return nil, nil
 		}
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "search",
 			Fields: map[string]any{
 				"status":       "failed",
 				"top_k":        req.TopK,
 				"filter_count": len(req.Filter),
 				"vector_dim":   len(req.QueryVector),
 				"latency_ms":   time.Since(start).Milliseconds(),
 				"error":        err,
 				"error_code":   core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	filterExpr, err := buildMilvusFilter(req.Filter)
 	if err != nil {
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "search",
 			Fields: map[string]any{
 				"status":       "failed",
 				"top_k":        req.TopK,
 				"filter_count": len(req.Filter),
 				"vector_dim":   len(req.QueryVector),
 				"latency_ms":   time.Since(start).Milliseconds(),
 				"error":        err,
 				"error_code":   core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	body := map[string]any{
 		"collectionName": s.cfg.CollectionName,
 		"data":           [][]float32{req.QueryVector},
 		"annsField":      milvusVectorField,
 		"limit":          normalizeMilvusTopK(req.TopK),
 		"outputFields":   milvusOutputFields(false),
 	}
 	if filterExpr != "" {
 		body["filter"] = filterExpr
 	}
 	if s.cfg.DBName != "" {
 		body["dbName"] = s.cfg.DBName
 	}
 	respBody, err := s.postJSON(ctx, "/v2/vectordb/entities/search", body)
 	if err != nil {
 		if isMilvusCollectionMissing(err) {
 			return nil, nil
 		}
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "search",
 			Fields: map[string]any{
 				"status":       "failed",
 				"top_k":        req.TopK,
 				"filter_count": len(req.Filter),
 				"vector_dim":   len(req.QueryVector),
 				"latency_ms":   time.Since(start).Milliseconds(),
 				"error":        err,
 				"error_code":   core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	var resp milvusSearchResponse
 	if err = json.Unmarshal(respBody, &resp); err != nil {
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "search",
 			Fields: map[string]any{
 				"status":       "failed",
 				"top_k":        req.TopK,
 				"filter_count": len(req.Filter),
 				"vector_dim":   len(req.QueryVector),
 				"latency_ms":   time.Since(start).Milliseconds(),
 				"error":        err,
 				"error_code":   core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	if resp.Code != 0 && resp.Code != 200 {
 		err = fmt.Errorf("milvus search failed: code=%d message=%s", resp.Code, resp.Message)
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "search",
 			Fields: map[string]any{
 				"status":       "failed",
 				"top_k":        req.TopK,
 				"filter_count": len(req.Filter),
 				"vector_dim":   len(req.QueryVector),
 				"latency_ms":   time.Since(start).Milliseconds(),
 				"error":        err,
 				"error_code":   core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	result := make([]core.ScoredVectorRow, 0, len(resp.Data))
 	for _, item := range resp.Data {
 		row, score := item.toVectorRow()
 		result = append(result, core.ScoredVectorRow{
 			Row:   row,
 			Score: score,
 		})
 	}
 	s.observe(ctx, core.ObserveEvent{
 		Level:     core.ObserveLevelInfo,
 		Component: "store",
 		Operation: "search",
 		Fields: map[string]any{
 			"status":       "success",
 			"top_k":        req.TopK,
 			"filter_count": len(req.Filter),
 			"vector_dim":   len(req.QueryVector),
 			"result_count": len(result),
 			"latency_ms":   time.Since(start).Milliseconds(),
 		},
 	})
 	return result, nil
 }
 func (s *MilvusStore) Delete(ctx context.Context, ids []string) error {
 	start := time.Now()
 	if len(ids) == 0 {
 		return nil
 	}
 	filter := fmt.Sprintf(`%s in [%s]`, milvusPrimaryField, joinQuotedStrings(ids))
 	_, err := s.postJSON(ctx, "/v2/vectordb/entities/delete", map[string]any{
 		"collectionName": s.cfg.CollectionName,
 		"filter":         filter,
 		"dbName":         blankToNil(s.cfg.DBName),
 	})
 	if isMilvusCollectionMissing(err) {
 		return nil
 	}
 	if err != nil {
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "delete",
 			Fields: map[string]any{
 				"status":     "failed",
 				"id_count":   len(ids),
 				"latency_ms": time.Since(start).Milliseconds(),
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return err
 	}
 	s.observe(ctx, core.ObserveEvent{
 		Level:     core.ObserveLevelInfo,
 		Component: "store",
 		Operation: "delete",
 		Fields: map[string]any{
 			"status":     "success",
 			"id_count":   len(ids),
 			"latency_ms": time.Since(start).Milliseconds(),
 		},
 	})
 	return err
 }
 func (s *MilvusStore) Get(ctx context.Context, ids []string) ([]core.VectorRow, error) {
 	start := time.Now()
 	if len(ids) == 0 {
 		return nil, nil
 	}
 	respBody, err := s.postJSON(ctx, "/v2/vectordb/entities/get", map[string]any{
 		"collectionName": s.cfg.CollectionName,
 		"id":             ids,
 		"outputFields":   milvusOutputFields(true),
 		"dbName":         blankToNil(s.cfg.DBName),
 	})
 	if err != nil {
 		if isMilvusCollectionMissing(err) {
 			return nil, nil
 		}
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "get",
 			Fields: map[string]any{
 				"status":     "failed",
 				"id_count":   len(ids),
 				"latency_ms": time.Since(start).Milliseconds(),
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	var resp milvusGetResponse
 	if err = json.Unmarshal(respBody, &resp); err != nil {
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "get",
 			Fields: map[string]any{
 				"status":     "failed",
 				"id_count":   len(ids),
 				"latency_ms": time.Since(start).Milliseconds(),
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	if resp.Code != 0 && resp.Code != 200 {
 		err = fmt.Errorf("milvus get failed: code=%d message=%s", resp.Code, resp.Message)
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "get",
 			Fields: map[string]any{
 				"status":     "failed",
 				"id_count":   len(ids),
 				"latency_ms": time.Since(start).Milliseconds(),
 				"error":      err,
 				"error_code": core.ClassifyErrorCode(err),
 			},
 		})
 		return nil, err
 	}
 	rows := make([]core.VectorRow, 0, len(resp.Data))
 	for _, item := range resp.Data {
 		rows = append(rows, mapMilvusRow(item, true))
 	}
 	s.observe(ctx, core.ObserveEvent{
 		Level:     core.ObserveLevelInfo,
 		Component: "store",
 		Operation: "get",
 		Fields: map[string]any{
 			"status":     "success",
 			"id_count":   len(ids),
 			"row_count":  len(rows),
 			"latency_ms": time.Since(start).Milliseconds(),
 		},
 	})
 	return rows, nil
 }
 func (s *MilvusStore) ensureCollection(ctx context.Context, dimension int) error {
 	start := time.Now()
 	if dimension <= 0 {
 		dimension = s.cfg.Dimension
 	}
 	if dimension <= 0 {
 		return errors.New("milvus vector dimension is invalid")
 	}
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if s.ensured {
 		return nil
 	}
 	payload := map[string]any{
 		"collectionName": s.cfg.CollectionName,
 		"schema": map[string]any{
 			"autoId":              false,
 			"enabledDynamicField": false,
 			"fields": []map[string]any{
 				buildVarcharField(milvusPrimaryField, true, 256),
 				buildVectorField(milvusVectorField, dimension),
 				buildVarcharField(milvusTextField, false, 65535),
 				{"fieldName": milvusMetadataField, "dataType": "JSON"},
 				buildVarcharField(milvusCorpusField, false, 64),
 				buildVarcharField(milvusDocumentField, false, 256),
 				{"fieldName": milvusUserIDField, "dataType": "Int64"},
 				buildVarcharField(milvusAssistantField, false, 128),
 				buildVarcharField(milvusConvField, false, 128),
 				buildVarcharField(milvusRunField, false, 128),
 				buildVarcharField(milvusMemoryType, false, 64),
 				buildVarcharField(milvusQueryIDField, false, 128),
 				buildVarcharField(milvusSessionField, false, 128),
 				buildVarcharField(milvusDomainField, false, 128),
 				{"fieldName": milvusChunkOrder, "dataType": "Int64"},
 				{"fieldName": milvusUpdatedAtField, "dataType": "Int64"},
 			},
 		},
 		"indexParams": []map[string]any{
 			{
 				"fieldName":  milvusVectorField,
 				"indexName":  milvusVectorField,
 				"metricType": s.cfg.MetricType,
 				"indexType":  "AUTOINDEX",
 			},
 		},
 	}
 	if s.cfg.DBName != "" {
 		payload["dbName"] = s.cfg.DBName
 	}
 	_, err := s.postJSON(ctx, "/v2/vectordb/collections/create", payload)
 	if err != nil {
 		if isMilvusAlreadyExists(err) {
 			s.ensured = true
 			s.observe(ctx, core.ObserveEvent{
 				Level:     core.ObserveLevelInfo,
 				Component: "store",
 				Operation: "ensure_collection",
 				Fields: map[string]any{
 					"status":      "already_exists",
 					"vector_dim":  dimension,
 					"metric_type": s.cfg.MetricType,
 					"latency_ms":  time.Since(start).Milliseconds(),
 				},
 			})
 			return nil
 		}
 		s.observe(ctx, core.ObserveEvent{
 			Level:     core.ObserveLevelError,
 			Component: "store",
 			Operation: "ensure_collection",
 			Fields: map[string]any{
 				"status":      "failed",
 				"vector_dim":  dimension,
 				"metric_type": s.cfg.MetricType,
 				"latency_ms":  time.Since(start).Milliseconds(),
 				"error":       err,
 				"error_code":  core.ClassifyErrorCode(err),
 			},
 		})
 		return err
 	}
 	s.ensured = true
 	s.observe(ctx, core.ObserveEvent{
 		Level:     core.ObserveLevelInfo,
 		Component: "store",
 		Operation: "ensure_collection",
 		Fields: map[string]any{
 			"status":      "created",
 			"vector_dim":  dimension,
 			"metric_type": s.cfg.MetricType,
 			"latency_ms":  time.Since(start).Milliseconds(),
 		},
 	})
 	return nil
 }
 func (s *MilvusStore) postJSON(ctx context.Context, path string, payload map[string]any) ([]byte, error) {
 	body, err := json.Marshal(payload)
 	if err != nil {
 		return nil, err
 	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, s.cfg.Address+path, bytes.NewReader(body))
 	if err != nil {
 		return nil, err
 	}
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Accept", "application/json")
 	if token := strings.TrimSpace(s.cfg.Token); token != "" {
 		req.Header.Set("Authorization", "Bearer "+token)
 	}
 	resp, err := s.client.Do(req)
 	if err != nil {
 		return nil, err
 	}
 	defer resp.Body.Close()
 	respBody, readErr := io.ReadAll(resp.Body)
 	if readErr != nil {
 		return nil, readErr
 	}
 	if resp.StatusCode >= 400 {
 		return nil, fmt.Errorf("milvus http failed: status=%d body=%s", resp.StatusCode, string(respBody))
 	}
 	var basic milvusBasicResponse
 	if jsonErr := json.Unmarshal(respBody, &basic); jsonErr == nil {
 		if basic.Code != 0 && basic.Code != 200 {
 			return nil, fmt.Errorf("milvus api failed: code=%d message=%s", basic.Code, basic.Message)
 		}
 	}
 	return respBody, nil
 }
 func (s *MilvusStore) observe(ctx context.Context, event core.ObserveEvent) {
 	if s == nil || s.observer == nil {
 		return
 	}
 	fields := cloneMap(event.Fields)
 	fields["store"] = "milvus"
 	fields["collection"] = s.cfg.CollectionName
 	if strings.TrimSpace(s.cfg.DBName) != "" {
 		fields["db_name"] = s.cfg.DBName
 	}
 	s.observer.Observe(ctx, core.ObserveEvent{
 		Level:     event.Level,
 		Component: event.Component,
 		Operation: event.Operation,
 		Fields:    fields,
 	})
 }
 func mapRowToMilvusEntity(row core.VectorRow) map[string]any {
 	metadata := cloneMap(row.Metadata)
 	entity := map[string]any{
 		milvusPrimaryField:  row.ID,
 		milvusVectorField:   row.Vector,
 		milvusTextField:     row.Text,
 		milvusMetadataField: metadata,
 		milvusCorpusField:   asString(metadata["corpus"]),
 		milvusDocumentField: asString(metadata["document_id"]),
 		milvusUpdatedAtField: func() int64 {
 			if row.UpdatedAt.IsZero() {
 				return time.Now().UnixMilli()
 			}
 			return row.UpdatedAt.UnixMilli()
 		}(),
 	}
 	assignMilvusScalar(entity, milvusUserIDField, metadata["user_id"])
 	assignMilvusScalar(entity, milvusAssistantField, metadata["assistant_id"])
 	assignMilvusScalar(entity, milvusConvField, metadata["conversation_id"])
 	assignMilvusScalar(entity, milvusRunField, metadata["run_id"])
 	assignMilvusScalar(entity, milvusMemoryType, metadata["memory_type"])
 	assignMilvusScalar(entity, milvusQueryIDField, metadata["query_id"])
 	assignMilvusScalar(entity, milvusSessionField, metadata["session_id"])
 	assignMilvusScalar(entity, milvusDomainField, metadata["domain"])
 	assignMilvusScalar(entity, milvusChunkOrder, metadata["chunk_order"])
 	return entity
 }
 func assignMilvusScalar(target map[string]any, field string, value any) {
 	if value == nil {
 		return
 	}
 	switch field {
 	case milvusUserIDField, milvusChunkOrder:
 		if parsed, ok := toInt64(value); ok {
 			target[field] = parsed
 		}
 	default:
 		if text := asString(value); text != "" {
 			target[field] = text
 		}
 	}
 }
 func buildMilvusFilter(filter map[string]any) (string, error) {
 	if len(filter) == 0 {
 		return "", nil
 	}
 	parts := make([]string, 0, len(filter))
 	for key, value := range filter {
 		field, ok := milvusFilterFieldMap[key]
 		if !ok {
 			return "", fmt.Errorf("unsupported milvus filter key: %s", key)
 		}
 		switch field {
 		case milvusUserIDField, milvusChunkOrder:
 			parsed, parseOK := toInt64(value)
 			if !parseOK {
 				return "", fmt.Errorf("milvus filter key=%s expects integer", key)
 			}
 			parts = append(parts, fmt.Sprintf("%s == %d", field, parsed))
 		default:
 			text := escapeMilvusString(asString(value))
 			parts = append(parts, fmt.Sprintf(`%s == "%s"`, field, text))
 		}
 	}
 	return strings.Join(parts, " and "), nil
 }
 func buildVarcharField(name string, isPrimary bool, maxLength int) map[string]any {
 	field := map[string]any{
 		"fieldName":         name,
 		"dataType":          "VarChar",
 		"elementTypeParams": map[string]any{"max_length": maxLength},
 	}
 	if isPrimary {
 		field["isPrimary"] = true
 	}
 	return field
 }
 func buildVectorField(name string, dimension int) map[string]any {
 	return map[string]any{
 		"fieldName":         name,
 		"dataType":          "FloatVector",
 		"elementTypeParams": map[string]any{"dim": dimension},
 	}
 }
 func milvusOutputFields(includeVector bool) []string {
 	fields := []string{
 		milvusTextField,
 		milvusMetadataField,
 		milvusCorpusField,
 		milvusDocumentField,
 		milvusUserIDField,
 		milvusAssistantField,
 		milvusConvField,
 		milvusRunField,
 		milvusMemoryType,
 		milvusQueryIDField,
 		milvusSessionField,
 		milvusDomainField,
 		milvusChunkOrder,
 		milvusUpdatedAtField,
 	}
 	if includeVector {
 		fields = append(fields, milvusVectorField)
 	}
 	return fields
 }
 func normalizeMilvusTopK(topK int) int {
 	if topK <= 0 {
 		return 8
 	}
 	return topK
 }
 func blankToNil(v string) any {
 	v = strings.TrimSpace(v)
 	if v == "" {
 		return nil
 	}
 	return v
 }
 func escapeMilvusString(v string) string {
 	v = strings.ReplaceAll(v, `\`, `\\`)
 	return strings.ReplaceAll(v, `"`, `\"`)
 }
 func joinQuotedStrings(values []string) string {
 	parts := make([]string, 0, len(values))
 	for _, value := range values {
 		parts = append(parts, fmt.Sprintf(`"%s"`, escapeMilvusString(value)))
 	}
 	return strings.Join(parts, ",")
 }
 func cloneMap(src map[string]any) map[string]any {
 	if len(src) == 0 {
 		return map[string]any{}
 	}
 	dst := make(map[string]any, len(src))
 	for key, value := range src {
 		dst[key] = value
 	}
 	return dst
 }
 func asString(v any) string {
 	if v == nil {
 		return ""
 	}
 	return strings.TrimSpace(fmt.Sprintf("%v", v))
 }
 func toInt64(v any) (int64, bool) {
 	switch value := v.(type) {
 	case int:
 		return int64(value), true
 	case int32:
 		return int64(value), true
 	case int64:
 		return value, true
 	case float64:
 		return int64(value), true
 	case json.Number:
 		parsed, err := value.Int64()
 		return parsed, err == nil
 	case string:
 		parsed, err := strconv.ParseInt(strings.TrimSpace(value), 10, 64)
 		return parsed, err == nil
 	default:
 		return 0, false
 	}
 }
 func isMilvusAlreadyExists(err error) bool {
 	if err == nil {
 		return false
 	}
 	text := strings.ToLower(err.Error())
 	return strings.Contains(text, "already exist") || strings.Contains(text, "already exists")
 }
 func isMilvusCollectionMissing(err error) bool {
 	if err == nil {
 		return false
 	}
 	text := strings.ToLower(err.Error())
 	return strings.Contains(text, "can't find collection") || strings.Contains(text, "collection not found")
 }
 type milvusBasicResponse struct {
 	Code    int    `json:"code"`
 	Message string `json:"message"`
 }
 type milvusSearchResponse struct {
 	Code    int                `json:"code"`
 	Message string             `json:"message"`
 	Data    []milvusSearchItem `json:"data"`
 }
 type milvusSearchItem map[string]any
 func (m milvusSearchItem) toVectorRow() (core.VectorRow, float64) {
 	row := mapMilvusRow(map[string]any(m), false)
 	score := 0.0
 	if value, ok := m["distance"].(float64); ok {
 		score = value
 	}
 	return row, score
 }
 type milvusGetResponse struct {
 	Code    int              `json:"code"`
 	Message string           `json:"message"`
 	Data    []map[string]any `json:"data"`
 }
 func mapMilvusRow(raw map[string]any, includeVector bool) core.VectorRow {
 	metadata := cloneMap(readMetadataMap(raw[milvusMetadataField]))
 	assignMetadataIfPresent(metadata, "corpus", raw[milvusCorpusField])
 	assignMetadataIfPresent(metadata, "document_id", raw[milvusDocumentField])
 	assignMetadataIfPresent(metadata, "user_id", raw[milvusUserIDField])
 	assignMetadataIfPresent(metadata, "assistant_id", raw[milvusAssistantField])
 	assignMetadataIfPresent(metadata, "conversation_id", raw[milvusConvField])
 	assignMetadataIfPresent(metadata, "run_id", raw[milvusRunField])
 	assignMetadataIfPresent(metadata, "memory_type", raw[milvusMemoryType])
 	assignMetadataIfPresent(metadata, "query_id", raw[milvusQueryIDField])
 	assignMetadataIfPresent(metadata, "session_id", raw[milvusSessionField])
 	assignMetadataIfPresent(metadata, "domain", raw[milvusDomainField])
 	assignMetadataIfPresent(metadata, "chunk_order", raw[milvusChunkOrder])
 	row := core.VectorRow{
 		ID:       asString(raw[milvusPrimaryField]),
 		Text:     asString(raw[milvusTextField]),
 		Metadata: metadata,
 	}
 	if row.ID == "" {
 		row.ID = asString(raw["id"])
 	}
 	if includeVector {
 		row.Vector = readFloat32Vector(raw[milvusVectorField])
 	}
 	return row
 }
 func readMetadataMap(value any) map[string]any {
 	switch data := value.(type) {
 	case map[string]any:
 		return data
 	default:
 		return map[string]any{}
 	}
 }
 func readFloat32Vector(value any) []float32 {
 	switch vector := value.(type) {
 	case []float32:
 		return vector
 	case []any:
 		result := make([]float32, 0, len(vector))
 		for _, item := range vector {
 			switch number := item.(type) {
 			case float64:
 				result = append(result, float32(number))
 			case float32:
 				result = append(result, number)
 			}
 		}
 		return result
 	default:
 		return nil
 	}
 }
 func assignMetadataIfPresent(target map[string]any, key string, value any) {
 	if value == nil {
 		return
 	}
 	switch typed := value.(type) {
 	case string:
 		if strings.TrimSpace(typed) == "" {
 			return
 		}
 		target[key] = strings.TrimSpace(typed)
 	default:
 		target[key] = typed
 	}
 }
--- a/backend/infra/rag/store/vector_store.go
+++ b/backend/infra/rag/store/vector_store.go
@@ -5,4 +5,5 @@ import "github.com/LoveLosita/smartflow/backend/infra/rag/core"
 // EnsureCompile 用于静态校验实现是否满足接口。
 func EnsureCompile() {
 	var _ core.VectorStore = (*InMemoryVectorStore)(nil)
 	var _ core.VectorStore = (*MilvusStore)(nil)
 }
--- a/backend/memory/README.md
+++ b/backend/memory/README.md
@@ -1,28 +1,76 @@
-# Memory 模块（Day1 骨架）
+# Memory 模块现状说明
-## 本轮目标
+## 当前已打通的链路
-1. 打通 `memory.extract.requested` 事件发布与消费。
+1. 用户消息落聊天历史时，会通过 outbox 发布 `memory.extract.requested`。
-2. 消费后把任务可靠写入 `memory_jobs`（支持幂等）。
+2. 事件消费者只负责把请求幂等写入 `memory_jobs`，不在消费回调里做重 LLM 计算。
-3. 提供 `worker.RunOnce()`，可手工推进 `pending -> processing -> success/failed`。
+3. 启动期会拉起 `memory worker`，后台轮询 `memory_jobs`。
 4. worker 抢占任务后，调用 `backend/infra/llm` 驱动的记忆抽取编排器。
 5. 抽取结果会被标准化后写入 `memory_items`，同时写入 `memory_audit_logs`。
 6. 全部落库成功后，任务状态推进到 `success`；失败则走可重试状态机。
-## 本轮边界（刻意不做）
+## 当前目录职责
-1. 不接真实 LLM 抽取与冲突决策。
+- `module.go`：对外统一门面，负责组装 repo / service / worker / orchestrator。
-2. 不接 Milvus 向量召回。
+- `model/`：记忆模块 DTO、状态常量、配置对象。
-3. 不做读取注入链路（Day2 再接）。
+- `repo/`：`memory_jobs / memory_items / memory_audit_logs / memory_user_settings` 访问层。
 - `service/`：任务入队、读取重排、管理维护、配置加载。
 - `orchestrator/`：记忆抽取编排。
  - `write_orchestrator.go` 是纯本地 fallback。
  - `llm_write_orchestrator.go` 是当前主用的 LLM 抽取器。
 - `worker/`：任务执行器与后台轮询循环。
 - `utils/`：JSON 提取、候选事实标准化、设置过滤、审计构造等纯函数工具。
-## 目录说明
+## 当前已补齐的内部能力
- `model/`：记忆领域 DTO、状态机、配置对象。
+1. `Module`
- `repo/`：`memory_*` 表访问。
+   - 负责把 repo / service / worker / orchestrator 组装成统一门面。
- `service/`：任务入队门面与配置加载。
+   - 外部现在优先依赖 `memory.Module`，而不是自己手搓内部组件。
- `orchestrator/`：写入链路编排（Day1 为 mock 抽取）。
+   - 支持 `WithTx(tx)`，方便接入现有统一事务管理器。
- `worker/`：任务执行器（支持手工触发单次运行）。
+2. `EnqueueService`
- `utils/`：`ExtractJSON`、`NormalizeFacts` 等工具函数。
+   - 负责把 `memory.extract.requested` 事件转成 `memory_jobs`，不做重 LLM 计算。
 3. `Runner + RunPollingLoop`
   - 负责后台轮询任务、调用抽取器、写入 `memory_items`、补写 `memory_audit_logs`。
 4. `ReadService`
   - 负责在 memory 内部做“按用户开关过滤 + 轻量重排 + 访问时间刷新”。
   - 当前还没有接到 `newAgent` prompt 注入侧，这是刻意保留的切流点。
 5. `ManageService`
   - 负责记忆管理面能力：列出记忆、软删除记忆、读取/更新用户记忆开关。
   - 删除动作会同步写入审计日志，保证“有变更就有审计”。
-## 手工验证建议
+## 当前推荐接入姿势
-1. 发起一轮聊天后，检查 outbox 是否存在 `memory.extract.requested`。
+1. 启动阶段统一创建：
-2. 等待消费后，检查 `memory_jobs` 是否新增 `pending` 记录。
+   - `memoryModule := memory.NewModule(db, llmClient, memory.LoadConfigFromViper())`
-3. 手工调用 `worker.RunOnce()`，确认任务推进到 `success/failed`。
+2. 后台 worker 启动：
   - `memoryModule.StartWorker(ctx)`
 3. 事务内写入记忆任务：
   - `memoryModule.WithTx(tx).EnqueueExtract(ctx, payload, eventID)`
 4. 后续 agent 读取：
   - 直接调用 `memoryModule.Retrieve(...)`
 ## 当前实现边界
 1. 已实现异步写入链路，也已补齐 memory 内部读取与管理能力，但还没有接“读取召回 + prompt 注入”。
 2. 已实现 MySQL 事实落库，但还没有接 Milvus 向量同步。
 3. 已实现 LLM 抽取和基础审计日志，但还没有做 `ADD/UPDATE/DELETE/NONE` 决策型冲突消解。
 4. 当前更偏“先把 memory 自己的闭环打通”，后续再继续做 agent 注入、向量检索和冲突更新。
 ## 当前推荐验证方式
 1. 发起一条用户消息，确认 outbox 中生成 `memory.extract.requested`。
 2. 等待事件消费后，确认 `memory_jobs` 出现 `pending` 或被 worker 抢占为 `processing`。
 3. 等待后台 worker 执行后，确认：
   - `memory_jobs.status = success`
   - `memory_items` 出现新记忆
   - `memory_audit_logs` 出现对应 `create` 记录
 4. 直接调用 `ManageService`：
   - `ListItems` 能列出 active/archived 记忆
   - `DeleteItem` 会把状态改成 `deleted`，并新增一条 `delete` 审计
   - `GetUserSetting / UpsertUserSetting` 能返回并更新用户记忆开关
 ## 下一步建议
 1. 把 `ReadService` 接进 `newAgent`，先注入“偏好 / 约束 / 最近 todo_hint”三类高价值记忆。
 2. 引入向量召回与 rerank，把“当前话题相关的事实类记忆”补进候选集合。
 3. 再补 `ADD/UPDATE/DELETE/NONE` 决策，解决“同义记忆去重”和“旧记忆更新”。
--- a/backend/memory/log.txt
+++ b/backend/memory/log.txt
@@ -0,0 +1,853 @@
 GOROOT=C:\Program Files\Go #gosetup
 GOPATH=C:\Users\Dev\go #gosetup
 "C:\Program Files\Go\bin\go.exe" build -o C:\Users\Dev\AppData\Local\JetBrains\GoLand2025.3\tmp\GoLand\___6go_build_main_go.exe D:\SmartFlow-Agent\backend\main.go #gosetup
 C:\Users\Dev\AppData\Local\JetBrains\GoLand2025.3\tmp\GoLand\___6go_build_main_go.exe #gosetup
 2026/04/10 22:43:49 Config loaded successfully
 2026/04/10 22:43:57 Database connected successfully
 2026/04/10 22:43:57 Database auto migration completed
 2026/04/10 22:43:57 RAG runtime is disabled
 2026/04/10 22:43:57 outbox engine starting: topic=smartflow.agent.outbox brokers=[localhost:9092] retry_scan=1s batch=100
 2026/04/10 22:43:57 Kafka topic is ready: smartflow.agent.outbox
 2026/04/10 22:43:57 Outbox event bus started
 2026/04/10 22:43:57 Memory worker started
 2026/04/10 22:43:57 Routes setup completed
 2026/04/10 22:43:57 Server starting on port 8080...
 [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.
 [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
 - using env:	export GIN_MODE=release
 - using code:	gin.SetMode(gin.ReleaseMode)
 [GIN-debug] GET    /api/v1/health            --> github.com/LoveLosita/smartflow/backend/routers.RegisterRouters.func1 (3 handlers)
 [GIN-debug] POST   /api/v1/user/register     --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).UserRegister-fm (3 handlers)
 [GIN-debug] POST   /api/v1/user/login        --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).UserLogin-fm (3 handlers)
 [GIN-debug] POST   /api/v1/user/refresh-token --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).RefreshTokenHandler-fm (3 handlers)
 [GIN-debug] POST   /api/v1/user/logout       --> github.com/LoveLosita/smartflow/backend/api.(*UserHandler).UserLogout-fm (5 handlers)
 [GIN-debug] POST   /api/v1/task/create       --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).AddTask-fm (6 handlers)
 [GIN-debug] PUT    /api/v1/task/complete     --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).CompleteTask-fm (6 handlers)
 [GIN-debug] PUT    /api/v1/task/undo-complete --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).UndoCompleteTask-fm (6 handlers)
 [GIN-debug] GET    /api/v1/task/get          --> github.com/LoveLosita/smartflow/backend/api.(*TaskHandler).GetUserTasks-fm (5 handlers)
 [GIN-debug] POST   /api/v1/course/validate   --> github.com/LoveLosita/smartflow/backend/api.(*CourseHandler).CheckUserCourse-fm (5 handlers)
 [GIN-debug] POST   /api/v1/course/import     --> github.com/LoveLosita/smartflow/backend/api.(*CourseHandler).AddUserCourses-fm (6 handlers)
 [GIN-debug] POST   /api/v1/task-class/add    --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserAddTaskClass-fm (6 handlers)
 [GIN-debug] GET    /api/v1/task-class/list   --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserGetTaskClassInfos-fm (5 handlers)
 [GIN-debug] GET    /api/v1/task-class/get    --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserGetCompleteTaskClass-fm (5 handlers)
 [GIN-debug] PUT    /api/v1/task-class/update --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserUpdateTaskClass-fm (6 handlers)
 [GIN-debug] POST   /api/v1/task-class/insert-into-schedule --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserAddTaskClassItemIntoSchedule-fm (6 handlers)
 [GIN-debug] DELETE /api/v1/task-class/delete-item --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).DeleteTaskClassItem-fm (6 handlers)
 [GIN-debug] DELETE /api/v1/task-class/delete-class --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).DeleteTaskClass-fm (6 handlers)
 [GIN-debug] PUT    /api/v1/task-class/apply-batch-into-schedule --> github.com/LoveLosita/smartflow/backend/api.(*TaskClassHandler).UserInsertBatchTaskClassItemsIntoSchedule-fm (6 handlers)
 [GIN-debug] GET    /api/v1/schedule/today    --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserTodaySchedule-fm (5 handlers)
 [GIN-debug] GET    /api/v1/schedule/week     --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserWeeklySchedule-fm (5 handlers)
 [GIN-debug] DELETE /api/v1/schedule/delete   --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).DeleteScheduleEvent-fm (6 handlers)
 [GIN-debug] GET    /api/v1/schedule/recent-completed --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserRecentCompletedSchedules-fm (5 handlers)
 [GIN-debug] GET    /api/v1/schedule/current  --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).GetUserOngoingSchedule-fm (5 handlers)
 [GIN-debug] DELETE /api/v1/schedule/undo-task-item --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).UserRevocateTaskItemFromSchedule-fm (6 handlers)
 [GIN-debug] GET    /api/v1/schedule/smart-planning --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).SmartPlanning-fm (5 handlers)
 [GIN-debug] POST   /api/v1/schedule/smart-planning-multi --> github.com/LoveLosita/smartflow/backend/api.(*ScheduleAPI).SmartPlanningMulti-fm (5 handlers)
 [GIN-debug] POST   /api/v1/agent/chat        --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).ChatAgent-fm (6 handlers)
 [GIN-debug] GET    /api/v1/agent/conversation-meta --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetConversationMeta-fm (5 handlers)
 [GIN-debug] GET    /api/v1/agent/conversation-list --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetConversationList-fm (5 handlers)
 [GIN-debug] GET    /api/v1/agent/conversation-history --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetConversationHistory-fm (5 handlers)
 [GIN-debug] GET    /api/v1/agent/schedule-preview --> github.com/LoveLosita/smartflow/backend/api.(*AgentHandler).GetSchedulePlanPreview-fm (5 handlers)
 [GIN-debug] [WARNING] You trusted all proxies, this is NOT safe. We recommend you to set a value.
 Please check https://github.com/gin-gonic/gin/blob/master/docs/doc.md#dont-trust-all-proxies for details.
 [GIN-debug] Listening and serving HTTP on :8080
 2026/04/10 22:43:57 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [3.151ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:43:57.526')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:43:59 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.599ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:43:59.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:01 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.046ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:01.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:03 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.652ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:03.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:05 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.918ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:05.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:07 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.113ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:07.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:09 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.325ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:09.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:11 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.483ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:11.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:13 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.070ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:13.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:15 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.599ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:15.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:17 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.523ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:17.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:19 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.107ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:19.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:21 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.056ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:21.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:23 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.285ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:23.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:25 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.920ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:25.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:27 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.565ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:27.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:29 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.119ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:29.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:31 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.254ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:31.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:33 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.475ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:33.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:35 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.489ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:35.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:37 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.128ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:37.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:39 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.532ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:39.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:41 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.111ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:41.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:43 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.596ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:43.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:45 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.047ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:45.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:47 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.141ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:47.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:49 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.091ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:49.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:51 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.118ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:51.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:53 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.068ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:53.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:55 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.112ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:55.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:57 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.612ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:57.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:44:59 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.474ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:44:59.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:01 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.514ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:01.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:03 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.485ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:03.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:05 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.543ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:05.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:07 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.649ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:07.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:09 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.035ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:09.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:11 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.046ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:11.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:13 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.055ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:13.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:15 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.177ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:15.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:17 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.250ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:17.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:19 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.074ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:19.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:21 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.903ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:21.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:23 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.081ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:23.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:25 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.147ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:25.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:27 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.064ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:27.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:29 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.056ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:29.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:31 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.577ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:31.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:33 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.464ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:33.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:35 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.467ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:35.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:37 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.541ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:37.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:39 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.457ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:39.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:41 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.545ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:41.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:43 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.342ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:43.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:45 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.577ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:45.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:47 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.538ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:47.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:49 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.219ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:49.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:51 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.073ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:51.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:53 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.101ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:53.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:55 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.099ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:55.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:57 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.549ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:57.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:45:59 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.098ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:45:59.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:01 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.993ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:01.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:03 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.203ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:03.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:05 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.514ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:05.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:07 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.033ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:07.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:09 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.586ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:09.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:11 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.123ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:11.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:11 D:/SmartFlow-Agent/backend/dao/agent.go:306 record not found
 [44.927ms] [rows:0] SELECT * FROM `agent_chats` WHERE user_id = 1 AND chat_id = '325b37d1-3483-4c6f-b755-44532a4dbe3c' ORDER BY `agent_chats`.`id` LIMIT 1
 2026/04/10 22:46:11 [DEBUG] loadOrCreateRuntimeState chatID=325b37d1-3483-4c6f-b755-44532a4dbe3c ok=false err=<nil> hasRuntime=false hasPending=false hasCtx=false hasSchedule=false hasOriginal=false
 2026/04/10 22:46:11 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 325b37d1-3483-4c6f-b755-44532a4dbe3c
 2026/04/10 22:46:12 D:/SmartFlow-Agent/backend/memory/repo/settings_repo.go:40 record not found
 [48.854ms] [rows:0] SELECT * FROM `memory_user_settings` WHERE user_id = 1 ORDER BY `memory_user_settings`.`user_id` LIMIT 1
 2026/04/10 22:46:13 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.150ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:13.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:15 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.082ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:15.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:15 [DEBUG] chat routing chat=325b37d1-3483-4c6f-b755-44532a4dbe3c route=execute needs_rough_build=true needs_refine_after_rough_build=true allow_reorder=false has_rough_build_done=false task_class_count=4 reason=批量排课需求，有任务类ID，且给出明确微调偏好（避开早八和晚10）
 2026/04/10 22:46:16 [DEBUG] rough_build scope_task_classes=[2 3 4 5] placements=44 applied=44 day_mapping_miss=0 task_item_match_miss=0 pending_in_scope=0 total_tasks=105 window_days=42
 2026/04/10 22:46:16 [DEBUG] execute LLM context begin chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=1 message_count=4
 ----- message[0] -----
 role: system
 content:
  你叫 SmartFlow，是专为重邮（CQUPT）学子打造的智能排程专家。
  你的回复应当专业、干练，偶尔可以带一点程序员式的冷幽默。
  重要约束：你无法直接写入数据库。除非系统明确告知“任务已落库成功”，否则禁止使用“已安排/已记录/已帮你记下”等完成态表述。
  你是 SmartFlow NewAgent 的执行器，当前处于自由执行模式（无预定义 plan 步骤）。
  阶段事实（强约束）：
  1. 若上下文给出“粗排已完成/rough_build_done”，表示目标任务类已经进入 suggested/existing，不是待排入状态。
  2. 当前阶段目标是“微调”，不是“重新粗排”。
  3. 若上下文明确“当前未收到明确微调偏好/本轮先收口”，应直接结束而不是继续优化循环。
  4. 若用户提出了二次微调方向，本轮优先目标就是满足该方向。
  你可以做什么：
  1. 你可以基于用户给定的二次微调方向，对 suggested 做定向微调。
  2. existing 属于已安排事实层，可用于冲突判断和参考，不作为 move/batch_move/spread_even 的目标。
  3. 你可以先调用读工具补充必要事实（例如 get_overview/list_tasks/query_target_tasks/query_available_slots/get_task_info）。
  4. 你可以在需要改动时提出 confirm（move/swap/unplace/batch_move/spread_even）。
  5. 只有用户明确允许打乱顺序时，才可使用 min_context_switch。
  6. 多任务处理默认使用队列链路：先 query_target_tasks(enqueue=true) 入队，再 queue_pop_head 逐项处理。
  你不要做什么：
  1. 不要假设任务还没排进去，然后改成逐个手动 place。
  2. 不要伪造工具结果。
  3. 不要重复做同类查询而没有新增结论；连续两轮同类读查询后，必须转入执行、ask_user，或明确阻塞原因。
  4. list_tasks 的 status 只允许单值：all / existing / suggested / pending。禁止使用 "existing,suggested" 这类拼接值。
  5. 若工具结果与已知事实明显冲突（如无写操作却从“有任务”变成“0任务”），先自我纠错并重查一次，不要直接 ask_user。
  6. 不要连续两轮调用“同一读工具 + 等价 arguments”；若上一轮已成功返回，下一轮必须换工具或进入 confirm。
  7. list_tasks.category 只接受任务类名称，不接受 task_class_ids（如 "1,2,3"）。
  8. 若已明确“本轮先收口”，不要继续调用 list_tasks/query_available_slots/move 做无目标微调。
  9. 若用户明确了微调方向，不要只做“局部看起来更空”的随机调整；每次改动都要能对应到该方向。
  10. 若顺序策略为“保持顺序”，禁止调用 min_context_switch。
  11. 不要在同一轮构造大规模 batch_move；batch_move 最多 2 条，超过请走队列逐项处理。
  12. 未调用 queue_pop_head 获取 current 前，不要调用 queue_apply_head_move。
  13. 工具参数必须严格使用 schema 字段，禁止自造别名；例如 day_from/day_to 非法，必须改用 day_start/day_end。
  执行规则：
  1. 只输出严格 JSON，不要输出 markdown，不要在 JSON 外补充文本。
  2. 读操作：action=continue + tool_call。
  3. 写操作：action=confirm + tool_call。
  4. 缺关键上下文且无法通过工具补齐：action=ask_user。
  5. 任务完成：action=done，并在 goal_check 总结完成证据。
  6. 流程应正式终止：action=abort。
  补充 JSON 约束：
  1. 只输出当前 action 真正需要的字段；无关字段直接省略，不要用 ""、{}、[]、null 占位。
  2. 若输出 tool_call，参数字段名只能是 arguments，禁止写成 parameters。
  3. tool_call 只能是单个对象：{"name":"工具名","arguments":{...}}，不能输出数组。
  4. 只有 action=abort 时才允许输出 abort 字段；非 abort 动作不要输出 abort。
  5. action=continue / ask_user / confirm 时，speak 必须是非空自然语言。
  可用工具（简表）：
  1. batch_move：原子性批量移动多个任务（仅 suggested，最多2条），全部成功才生效。若含 existing/pending 或任一冲突将整批失败回滚。
     参数：moves(必填,array)
     返回类型：string（自然语言文本）
     返回示例：批量移动完成，2个任务全部成功。（单次最多2条）
  2. get_overview：获取规划窗口总览（任务视角，全量返回）：保留课程占位统计，展开任务清单（过滤课程明细）。
     参数：{}
     返回类型：string（自然语言文本）
     返回示例：规划窗口共27天...课程占位条目34个...任务清单（全量，已过滤课程）...
  3. get_task_info：查询单个任务详细信息，包括类别、状态、占用时段、嵌入关系。
     参数：task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：[35]第一章随机事件与概率 | 状态：已预排(suggested) | 占用时段：第3天第5-6节
  4. list_tasks：列出任务清单，可按类别和状态过滤。category 传任务类名称，status 仅支持单值 all/existing/suggested/pending。
     参数：category(可选,string)；status(可选,string:all/existing/suggested/pending)
     返回类型：string（自然语言文本）
     返回示例：已预排任务共24个： [35]第一章随机事件与概率 — 已预排至 第3天第5-6节...
  5. min_context_switch：在指定任务集合内重排 suggested 任务，尽量让同类任务连续以减少上下文切换。仅在用户明确允许打乱顺序时使用。task_ids 必填（兼容 task_id）。
     参数：task_id(可选,int)；task_ids(必填,array)
     返回类型：string（自然语言文本）
     返回示例：最少上下文切换重排完成：共处理 6 个任务，上下文切换次数 5 -> 2。
  6. move：将一个已预排任务（仅 suggested）移动到新位置。existing 属于已安排事实层，不参与 move。task_id/new_day/new_slot_start 必填。
     参数：new_day(必填,int)；new_slot_start(必填,int)；task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：已将 [35]... 从第3天第5-6节移至第5天第3-4节。
  7. place：将一个待安排任务预排到指定位置。自动检测可嵌入宿主。task_id/day/slot_start 必填。
     参数：day(必填,int)；slot_start(必填,int)；task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：已将 [35]... 预排到第5天第3-4节。
  8. query_available_slots：查询候选空位池（先返回纯空位，不足再补可嵌入位），适合 move 前的落点筛选。
     参数：after_section(可选,int)；allow_embed(可选,bool)；before_section(可选,int)；day(可选,int)；day_end(可选,int)；day_of_week(可选,array)；day_scope(可选,string:all/workday/weekend)；day_start(可选,int)；duration(可选,int)；exclude_sections(可选,array)；limit(可选,int)；section_from(可选,int)；section_to(可选,int)；slot_type(可选,string)；slot_types(可选,array)；span(可选,int)；week(可选,int)；week_filter(可选,array)；week_from(可选,int)；week_to(可选,int)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"query_available_slots","count":12,"strict_count":8,"embedded_count":4,"slots":[{"day":5,"week":12,"day_of_week":3,"slot_start":1,"slot_end":2,"slot_type":"empty"}]}
  9. query_range：查看某天或某时段的细粒度占用详情。day 必填，slot_start/slot_end 选填（不填查整天）。
     参数：day(必填,int)；slot_end(可选,int)；slot_start(可选,int)
     返回类型：string（自然语言文本）
     返回示例：第5天第3-6节：第3节空、第4节空...
  10. query_target_tasks：查询候选任务集合，可按 status/week/day/task_id/category 筛选；默认自动入队，供后续 queue_pop_head 逐项处理。
     参数：category(可选,string)；day(可选,int)；day_end(可选,int)；day_of_week(可选,array)；day_scope(可选,string:all/workday/weekend)；day_start(可选,int)；enqueue(可选,bool)；limit(可选,int)；reset_queue(可选,bool)；status(可选,string:all/existing/suggested/pending)；task_id(可选,int)；task_ids(可选,array)；task_item_id(可选,int)；task_item_ids(可选,array)；week(可选,int)；week_filter(可选,array)；week_from(可选,int)；week_to(可选,int)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"query_target_tasks","count":6,"status":"suggested","enqueue":true,"enqueued":6,"queue":{"pending_count":6},"items":[{"task_id":35,"name":"示例任务","status":"suggested","slots":[{"day":3,"week":12,"day_of_week":1,"slot_start":5,"slot_end":6}]}]}
  11. queue_apply_head_move：将当前队首任务移动到指定位置并自动出队。仅作用于 current，不接受 task_id。new_day/new_slot_start 必填。
     参数：new_day(必填,int)；new_slot_start(必填,int)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_apply_head_move","success":true,"task_id":35,"pending_count":4,"completed_count":2,"result":"已将 [35]... 从第3天第5-6节移至第5天第3-4节。"}
  12. queue_pop_head：弹出并返回当前队首任务；若已有 current 则复用，保证一次只处理一个任务。
     参数：{}
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_pop_head","has_head":true,"pending_count":5,"current":{"task_id":35,"name":"示例任务","status":"suggested","slots":[{"day":3,"week":12,"day_of_week":1,"slot_start":5,"slot_end":6}]}}
  13. queue_skip_head：跳过当前队首任务（不改日程），将其标记为 skipped 并继续后续队列。
     参数：reason(可选,string)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_skip_head","success":true,"skipped_task_id":35,"pending_count":4,"skipped_count":1}
  14. queue_status：查看当前待处理队列状态（pending/current/completed/skipped）。
     参数：{}
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_status","pending_count":5,"completed_count":1,"skipped_count":0,"current_task_id":35,"current_attempt":1}
  15. spread_even：在给定任务集合内做均匀化铺开：先按筛选条件收集候选坑位，再规划并原子落地。task_ids 必填（兼容 task_id）。
     参数：after_section(可选,int)；allow_embed(可选,bool)；before_section(可选,int)；day(可选,int)；day_end(可选,int)；day_of_week(可选,array)；day_scope(可选,string:all/workday/weekend)；day_start(可选,int)；exclude_sections(可选,array)；limit(可选,int)；slot_type(可选,string)；slot_types(可选,array)；task_id(可选,int)；task_ids(必填,array)；week(可选,int)；week_filter(可选,array)；week_from(可选,int)；week_to(可选,int)
     返回类型：string（自然语言文本）
     返回示例：均匀化调整完成：共处理 6 个任务，候选坑位 24 个。
  16. swap：交换两个已落位任务的位置。两个任务必须时长相同。task_a/task_b 必填。
     参数：task_a(必填,int)；task_b(必填,int)
     返回类型：string（自然语言文本）
     返回示例：交换完成：[35]... ↔ [36]...
  17. unplace：将一个已落位任务移除，恢复为待安排状态。会自动清理嵌入关系。task_id 必填。
     参数：task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：已将 [35]... 移除，恢复为待安排状态。
 ----- message[1] -----
 role: assistant
 content:
  历史上下文（仅供参考）：
  - 用户目标：帮我排一下这些任务类，直接排，不要早八和晚10
  - 阶段锚点：粗排已完成，本轮仅做微调，不重新 place。
  - 历史归档 ReAct 摘要：暂无。
  - 历史归档 ReAct 窗口：暂无。
  - 当前循环早期摘要：暂无。
 ----- message[2] -----
 role: assistant
 content:
  当轮 ReAct Loop 记录（窗口）：
  - 已清空（新一轮 loop 准备中）。
 ----- message[3] -----
 role: system
 content:
  当前执行状态：
  - 当前轮次：1/60
  - 当前模式：自由执行（无预定义步骤）
  执行锚点：
  - 当前用户诉求：帮我排一下这些任务类，直接排，不要早八和晚10
  - 目标任务类：task_class_ids=[2,3,4,5]
  - 啥时候结束Loop：你可以根据工具调用记录自行判断。
  - 非目标：不重新粗排、不修改无关任务类。
  - 阶段约束：粗排已完成，本轮只微调 suggested；existing 仅作已安排事实参考，不作为可移动目标。
  - 参数纪律：工具参数必须严格使用 schema 字段；若返回“参数非法”，需先改参再继续。
  - 顺序策略：默认保持 suggested 相对顺序，禁止调用 min_context_switch。
  本轮指令：请继续当前任务的执行阶段，严格输出 JSON。
 [DEBUG] execute LLM context end chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=1
 2026/04/10 22:46:17 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.502ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:17.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:19 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.403ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:19.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:21 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.476ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:21.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:23 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.170ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:23.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:25 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.487ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:25.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:27 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.460ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:27.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:29 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.473ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:29.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:30 [DEBUG] execute LLM 响应 chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=1 action=continue speak_len=103 raw_len=186 raw_preview={"action":"continue","tool_call":{"name":"get_overview","arguments":{}},"speak":"我先看看当前粗排完成后的总览，找出需要调整的早八/晚十时段的目标任务。"}
 2026/04/10 22:46:30 [DEBUG] execute tool chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=1 tool=get_overview args={} before=tasks=105 pending=0 suggested=44 existing=61 task_item_with_slot=44 event_with_slot=52 after=tasks=105 pending=0 suggested=44 existing=61 task_item_with_slot=44 event_with_slot=52 result_preview=规划窗口共42天，每天12个时段，总计504个时段。 当前已占用197个，空闲307个。课程占位条目61个（仅用于占位统计）；任务条目：已安排(existing)0个、已预排(suggested)44个、待安排(pending)0个。  每日概况： 第1天(星期3)：总占3/12（课程占3/12，任务占0/12） — 任务：无 第2天(星期4)：总占6/12（课程占6/12，任务占0/12） —
 2026/04/10 22:46:30 [DEBUG] execute LLM context begin chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=2 message_count=4
 ----- message[0] -----
 role: system
 content:
  你叫 SmartFlow，是专为重邮（CQUPT）学子打造的智能排程专家。
  你的回复应当专业、干练，偶尔可以带一点程序员式的冷幽默。
  重要约束：你无法直接写入数据库。除非系统明确告知“任务已落库成功”，否则禁止使用“已安排/已记录/已帮你记下”等完成态表述。
  你是 SmartFlow NewAgent 的执行器，当前处于自由执行模式（无预定义 plan 步骤）。
  阶段事实（强约束）：
  1. 若上下文给出“粗排已完成/rough_build_done”，表示目标任务类已经进入 suggested/existing，不是待排入状态。
  2. 当前阶段目标是“微调”，不是“重新粗排”。
  3. 若上下文明确“当前未收到明确微调偏好/本轮先收口”，应直接结束而不是继续优化循环。
  4. 若用户提出了二次微调方向，本轮优先目标就是满足该方向。
  你可以做什么：
  1. 你可以基于用户给定的二次微调方向，对 suggested 做定向微调。
  2. existing 属于已安排事实层，可用于冲突判断和参考，不作为 move/batch_move/spread_even 的目标。
  3. 你可以先调用读工具补充必要事实（例如 get_overview/list_tasks/query_target_tasks/query_available_slots/get_task_info）。
  4. 你可以在需要改动时提出 confirm（move/swap/unplace/batch_move/spread_even）。
  5. 只有用户明确允许打乱顺序时，才可使用 min_context_switch。
  6. 多任务处理默认使用队列链路：先 query_target_tasks(enqueue=true) 入队，再 queue_pop_head 逐项处理。
  你不要做什么：
  1. 不要假设任务还没排进去，然后改成逐个手动 place。
  2. 不要伪造工具结果。
  3. 不要重复做同类查询而没有新增结论；连续两轮同类读查询后，必须转入执行、ask_user，或明确阻塞原因。
  4. list_tasks 的 status 只允许单值：all / existing / suggested / pending。禁止使用 "existing,suggested" 这类拼接值。
  5. 若工具结果与已知事实明显冲突（如无写操作却从“有任务”变成“0任务”），先自我纠错并重查一次，不要直接 ask_user。
  6. 不要连续两轮调用“同一读工具 + 等价 arguments”；若上一轮已成功返回，下一轮必须换工具或进入 confirm。
  7. list_tasks.category 只接受任务类名称，不接受 task_class_ids（如 "1,2,3"）。
  8. 若已明确“本轮先收口”，不要继续调用 list_tasks/query_available_slots/move 做无目标微调。
  9. 若用户明确了微调方向，不要只做“局部看起来更空”的随机调整；每次改动都要能对应到该方向。
  10. 若顺序策略为“保持顺序”，禁止调用 min_context_switch。
  11. 不要在同一轮构造大规模 batch_move；batch_move 最多 2 条，超过请走队列逐项处理。
  12. 未调用 queue_pop_head 获取 current 前，不要调用 queue_apply_head_move。
  13. 工具参数必须严格使用 schema 字段，禁止自造别名；例如 day_from/day_to 非法，必须改用 day_start/day_end。
  执行规则：
  1. 只输出严格 JSON，不要输出 markdown，不要在 JSON 外补充文本。
  2. 读操作：action=continue + tool_call。
  3. 写操作：action=confirm + tool_call。
  4. 缺关键上下文且无法通过工具补齐：action=ask_user。
  5. 任务完成：action=done，并在 goal_check 总结完成证据。
  6. 流程应正式终止：action=abort。
  补充 JSON 约束：
  1. 只输出当前 action 真正需要的字段；无关字段直接省略，不要用 ""、{}、[]、null 占位。
  2. 若输出 tool_call，参数字段名只能是 arguments，禁止写成 parameters。
  3. tool_call 只能是单个对象：{"name":"工具名","arguments":{...}}，不能输出数组。
  4. 只有 action=abort 时才允许输出 abort 字段；非 abort 动作不要输出 abort。
  5. action=continue / ask_user / confirm 时，speak 必须是非空自然语言。
  可用工具（简表）：
  1. batch_move：原子性批量移动多个任务（仅 suggested，最多2条），全部成功才生效。若含 existing/pending 或任一冲突将整批失败回滚。
     参数：moves(必填,array)
     返回类型：string（自然语言文本）
     返回示例：批量移动完成，2个任务全部成功。（单次最多2条）
  2. get_overview：获取规划窗口总览（任务视角，全量返回）：保留课程占位统计，展开任务清单（过滤课程明细）。
     参数：{}
     返回类型：string（自然语言文本）
     返回示例：规划窗口共27天...课程占位条目34个...任务清单（全量，已过滤课程）...
  3. get_task_info：查询单个任务详细信息，包括类别、状态、占用时段、嵌入关系。
     参数：task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：[35]第一章随机事件与概率 | 状态：已预排(suggested) | 占用时段：第3天第5-6节
  4. list_tasks：列出任务清单，可按类别和状态过滤。category 传任务类名称，status 仅支持单值 all/existing/suggested/pending。
     参数：category(可选,string)；status(可选,string:all/existing/suggested/pending)
     返回类型：string（自然语言文本）
     返回示例：已预排任务共24个： [35]第一章随机事件与概率 — 已预排至 第3天第5-6节...
  5. min_context_switch：在指定任务集合内重排 suggested 任务，尽量让同类任务连续以减少上下文切换。仅在用户明确允许打乱顺序时使用。task_ids 必填（兼容 task_id）。
     参数：task_id(可选,int)；task_ids(必填,array)
     返回类型：string（自然语言文本）
     返回示例：最少上下文切换重排完成：共处理 6 个任务，上下文切换次数 5 -> 2。
  6. move：将一个已预排任务（仅 suggested）移动到新位置。existing 属于已安排事实层，不参与 move。task_id/new_day/new_slot_start 必填。
     参数：new_day(必填,int)；new_slot_start(必填,int)；task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：已将 [35]... 从第3天第5-6节移至第5天第3-4节。
  7. place：将一个待安排任务预排到指定位置。自动检测可嵌入宿主。task_id/day/slot_start 必填。
     参数：day(必填,int)；slot_start(必填,int)；task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：已将 [35]... 预排到第5天第3-4节。
  8. query_available_slots：查询候选空位池（先返回纯空位，不足再补可嵌入位），适合 move 前的落点筛选。
     参数：after_section(可选,int)；allow_embed(可选,bool)；before_section(可选,int)；day(可选,int)；day_end(可选,int)；day_of_week(可选,array)；day_scope(可选,string:all/workday/weekend)；day_start(可选,int)；duration(可选,int)；exclude_sections(可选,array)；limit(可选,int)；section_from(可选,int)；section_to(可选,int)；slot_type(可选,string)；slot_types(可选,array)；span(可选,int)；week(可选,int)；week_filter(可选,array)；week_from(可选,int)；week_to(可选,int)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"query_available_slots","count":12,"strict_count":8,"embedded_count":4,"slots":[{"day":5,"week":12,"day_of_week":3,"slot_start":1,"slot_end":2,"slot_type":"empty"}]}
  9. query_range：查看某天或某时段的细粒度占用详情。day 必填，slot_start/slot_end 选填（不填查整天）。
     参数：day(必填,int)；slot_end(可选,int)；slot_start(可选,int)
     返回类型：string（自然语言文本）
     返回示例：第5天第3-6节：第3节空、第4节空...
  10. query_target_tasks：查询候选任务集合，可按 status/week/day/task_id/category 筛选；默认自动入队，供后续 queue_pop_head 逐项处理。
     参数：category(可选,string)；day(可选,int)；day_end(可选,int)；day_of_week(可选,array)；day_scope(可选,string:all/workday/weekend)；day_start(可选,int)；enqueue(可选,bool)；limit(可选,int)；reset_queue(可选,bool)；status(可选,string:all/existing/suggested/pending)；task_id(可选,int)；task_ids(可选,array)；task_item_id(可选,int)；task_item_ids(可选,array)；week(可选,int)；week_filter(可选,array)；week_from(可选,int)；week_to(可选,int)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"query_target_tasks","count":6,"status":"suggested","enqueue":true,"enqueued":6,"queue":{"pending_count":6},"items":[{"task_id":35,"name":"示例任务","status":"suggested","slots":[{"day":3,"week":12,"day_of_week":1,"slot_start":5,"slot_end":6}]}]}
  11. queue_apply_head_move：将当前队首任务移动到指定位置并自动出队。仅作用于 current，不接受 task_id。new_day/new_slot_start 必填。
     参数：new_day(必填,int)；new_slot_start(必填,int)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_apply_head_move","success":true,"task_id":35,"pending_count":4,"completed_count":2,"result":"已将 [35]... 从第3天第5-6节移至第5天第3-4节。"}
  12. queue_pop_head：弹出并返回当前队首任务；若已有 current 则复用，保证一次只处理一个任务。
     参数：{}
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_pop_head","has_head":true,"pending_count":5,"current":{"task_id":35,"name":"示例任务","status":"suggested","slots":[{"day":3,"week":12,"day_of_week":1,"slot_start":5,"slot_end":6}]}}
  13. queue_skip_head：跳过当前队首任务（不改日程），将其标记为 skipped 并继续后续队列。
     参数：reason(可选,string)
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_skip_head","success":true,"skipped_task_id":35,"pending_count":4,"skipped_count":1}
  14. queue_status：查看当前待处理队列状态（pending/current/completed/skipped）。
     参数：{}
     返回类型：string（JSON字符串）
     返回示例：{"tool":"queue_status","pending_count":5,"completed_count":1,"skipped_count":0,"current_task_id":35,"current_attempt":1}
  15. spread_even：在给定任务集合内做均匀化铺开：先按筛选条件收集候选坑位，再规划并原子落地。task_ids 必填（兼容 task_id）。
     参数：after_section(可选,int)；allow_embed(可选,bool)；before_section(可选,int)；day(可选,int)；day_end(可选,int)；day_of_week(可选,array)；day_scope(可选,string:all/workday/weekend)；day_start(可选,int)；exclude_sections(可选,array)；limit(可选,int)；slot_type(可选,string)；slot_types(可选,array)；task_id(可选,int)；task_ids(必填,array)；week(可选,int)；week_filter(可选,array)；week_from(可选,int)；week_to(可选,int)
     返回类型：string（自然语言文本）
     返回示例：均匀化调整完成：共处理 6 个任务，候选坑位 24 个。
  16. swap：交换两个已落位任务的位置。两个任务必须时长相同。task_a/task_b 必填。
     参数：task_a(必填,int)；task_b(必填,int)
     返回类型：string（自然语言文本）
     返回示例：交换完成：[35]... ↔ [36]...
  17. unplace：将一个已落位任务移除，恢复为待安排状态。会自动清理嵌入关系。task_id 必填。
     参数：task_id(必填,int)
     返回类型：string（自然语言文本）
     返回示例：已将 [35]... 移除，恢复为待安排状态。
 ----- message[1] -----
 role: assistant
 content:
  历史上下文（仅供参考）：
  - 用户目标：帮我排一下这些任务类，直接排，不要早八和晚10
  - 阶段锚点：粗排已完成，本轮仅做微调，不重新 place。
  - 历史归档 ReAct 摘要：暂无。
  - 历史归档 ReAct 窗口：暂无。
  - 当前循环早期摘要：无（当前窗口已覆盖全部 ReAct 记录）。
 ----- message[2] -----
 role: assistant
 content:
  当轮 ReAct Loop 记录（窗口）：
  1) thought/reason：我先看看当前粗排完成后的总览，找出需要调整的早八/晚十时段的目标任务。
     tool_call：get_overview({})
     observation：规划窗口共42天，每天12个时段，总计504个时段。
  当前已占用197个，空闲307个。课程占位条目61个（仅用于占位统计）；任务条目：已安排(existing)0个、已预排(suggested)44个、待安排(pending)0个。
  每日概况：
  第1天(星期3)：总占3/12（课程占3/12，任务占0/12） — 任务：无
  第2天(星期4)：总占6/12（课程占6/12，任务占0/12） — 任务：无
  第3天(星期5)：总占6/12（课程占4/12，任务占2/12） — 任务：[94]线性表（顺序表/链表）与复杂度分析(suggested,9-10节)
  第4天(星期6)：总占4/12（课程占0/12，任务占4/12） — 任务：[70]数制与码制、逻辑代数基础(suggested,3-4节) [82]命题逻辑与等值演算(suggested,5-6节)
  第5天(星期7)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第6天(星期1)：总占6/12（课程占2/12，任务占4/12） — 任务：[71]组合逻辑电路分析方法(suggested,7-8节) [95]栈与队列及典型应用(suggested,9-10节)
  第7天(星期2)：总占12/12（课程占10/12，任务占2/12） — 任务：[83]谓词逻辑与量词推理(suggested,3-4节)
  第8天(星期3)：总占5/12（课程占5/12，任务占0/12） — 任务：无
  第9天(星期4)：总占8/12（课程占6/12，任务占2/12） — 任务：[72]组合逻辑电路设计方法（含卡诺图）(suggested,9-10节)
  第10天(星期5)：总占6/12（课程占2/12，任务占4/12） — 任务：[96]串与模式匹配（KMP）(suggested,7-8节) [84]集合与关系基本性质(suggested,9-10节)
  第11天(星期6)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第12天(星期7)：总占2/12（课程占0/12，任务占2/12） — 任务：[73]译码器、编码器、多路选择器综合应用(suggested,7-8节)
  第13天(星期1)：总占6/12（课程占2/12，任务占4/12） — 任务：[97]数组与广义表、稀疏矩阵(suggested,5-6节) [85]关系闭包与等价关系/偏序关系(suggested,7-8节)
  第14天(星期2)：总占10/12（课程占10/12，任务占0/12） — 任务：无
  第15天(星期3)：总占7/12（课程占3/12，任务占4/12） — 任务：[74]触发器工作原理与时序特性(suggested,3-4节) [62]第一章 随机事件与概率(suggested,5-6节)
  第16天(星期4)：总占6/12（课程占4/12，任务占2/12） — 任务：[98]树与二叉树遍历、线索化(suggested,9-10节)
  第17天(星期5)：总占6/12（课程占4/12，任务占2/12） — 任务：[86]函数与映射（单射满射双射）(suggested,5-6节)
  第18天(星期6)：总占4/12（课程占0/12，任务占4/12） — 任务：[63]第二章 条件概率与全概率公式(suggested,7-8节) [75]计数器设计与分析(suggested,9-10节)
  第19天(星期7)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第20天(星期1)：总占6/12（课程占2/12，任务占4/12） — 任务：[87]代数系统与群环域入门(suggested,3-4节) [99]二叉排序树、AVL、红黑树概念(suggested,5-6节)
  第21天(星期2)：总占14/12（课程占10/12，任务占4/12） — 任务：[64]第三章 随机变量及其分布(suggested,3-4节) [76]寄存器与移位寄存器(suggested,7-8节)
  第22天(星期3)：总占5/12（课程占5/12，任务占0/12） — 任务：无
  第23天(星期4)：总占6/12（课程占4/12，任务占2/12） — 任务：[88]图的基本概念与图的表示(suggested,9-10节)
  第24天(星期5)：总占6/12（课程占2/12，任务占4/12） — 任务：[100]堆与优先队列(suggested,5-6节) [65]第四章 多维随机变量(suggested,7-8节)
  第25天(星期6)：总占2/12（课程占0/12，任务占2/12） — 任务：[77]时序逻辑电路设计（同步/异步）(suggested,5-6节)
  第26天(星期7)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第27天(星期1)：总占8/12（课程占2/12，任务占6/12） — 任务：[66]第五章 数字特征与大数定律(suggested,3-4节) [89]欧拉图、哈密顿图、最短路(suggested,5-6节) [101]图的存储与遍历（DFS/BFS）(suggested,7-8节)
  第28天(星期2)：总占12/12（课程占10/12，任务占2/12） — 任务：[78]状态机建模与化简(suggested,3-4节)
  第29天(星期3)：总占5/12（课程占3/12，任务占2/12） — 任务：[67]第六章 中心极限定理与参数估计(suggested,7-8节)
  第30天(星期4)：总占4/12（课程占2/12，任务占2/12） — 任务：[90]树与生成树、最小生成树(suggested,9-10节)
  第31天(星期5)：总占8/12（课程占4/12，任务占4/12） — 任务：[102]最短路径与拓扑排序(suggested,5-6节) [79]A/D 与 D/A 基础电路(suggested,7-8节)
  第32天(星期6)：总占2/12（课程占0/12，任务占2/12） — 任务：[68]综合刷题与错题回顾(suggested,9-10节)
  第33天(星期7)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第34天(星期1)：总占6/12（课程占0/12，任务占6/12） — 任务：[91]组合计数（加法乘法原理）(suggested,3-4节) [80]历年真题专项（组合逻辑）(suggested,5-6节) [103]查找（顺序/折半/散列）(suggested,7-8节)
  第35天(星期2)：总占4/12（课程占2/12，任务占2/12） — 任务：[69]模拟考试与查漏补缺(suggested,5-6节)
  第36天(星期3)：总占2/12（课程占2/12，任务占0/12） — 任务：无
  第37天(星期4)：总占6/12（课程占0/12，任务占6/12） — 任务：[81]历年真题专项（时序逻辑）(suggested,5-6节) [92]递推关系与母函数基础(suggested,7-8节) [104]排序（插入/交换/选择/归并/快排）(suggested,9-10节)
  第38天(星期5)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第39天(星期6)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第40天(星期7)：总占4/12（课程占0/12，任务占4/12） — 任务：[93]离散综合题与证明题训练(suggested,3-4节) [105]综合算法题实战与代码模板整理(suggested,5-6节)
  第41天(星期1)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  第42天(星期2)：总占0/12（课程占0/12，任务占0/12） — 任务：无
  任务清单（全量，已过滤课程）：
  [62]第一章 随机事件与概率 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第15天(星期3)第5-6节
  [63]第二章 条件概率与全概率公式 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第18天(星期6)第7-8节
  [64]第三章 随机变量及其分布 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第21天(星期2)第3-4节
  [65]第四章 多维随机变量 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第24天(星期5)第7-8节
  [66]第五章 数字特征与大数定律 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第27天(星期1)第3-4节
  [67]第六章 中心极限定理与参数估计 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第29天(星期3)第7-8节
  [68]综合刷题与错题回顾 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第32天(星期6)第9-10节
  [69]模拟考试与查漏补缺 | 状态:suggested | 类别:复习概率论 | task_class_id:2 | 时段:第35天(星期2)第5-6节
  [70]数制与码制、逻辑代数基础 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第4天(星期6)第3-4节
  [71]组合逻辑电路分析方法 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第6天(星期1)第7-8节
  [72]组合逻辑电路设计方法（含卡诺图） | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第9天(星期4)第9-10节
  [73]译码器、编码器、多路选择器综合应用 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第12天(星期7)第7-8节
  [74]触发器工作原理与时序特性 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第15天(星期3)第3-4节
  [75]计数器设计与分析 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第18天(星期6)第9-10节
  [76]寄存器与移位寄存器 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第21天(星期2)第7-8节
  [77]时序逻辑电路设计（同步/异步） | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第25天(星期6)第5-6节
  [78]状态机建模与化简 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第28天(星期2)第3-4节
  [79]A/D 与 D/A 基础电路 | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第31天(星期5)第7-8节
  [80]历年真题专项（组合逻辑） | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第34天(星期1)第5-6节
  [81]历年真题专项（时序逻辑） | 状态:suggested | 类别:数电期末复习 | task_class_id:3 | 时段:第37天(星期4)第5-6节
  [82]命题逻辑与等值演算 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第4天(星期6)第5-6节
  [83]谓词逻辑与量词推理 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第7天(星期2)第3-4节
  [84]集合与关系基本性质 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第10天(星期5)第9-10节
  [85]关系闭包与等价关系/偏序关系 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第13天(星期1)第7-8节
  [86]函数与映射（单射满射双射） | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第17天(星期5)第5-6节
  [87]代数系统与群环域入门 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第20天(星期1)第3-4节
  [88]图的基本概念与图的表示 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第23天(星期4)第9-10节
  [89]欧拉图、哈密顿图、最短路 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第27天(星期1)第5-6节
  [90]树与生成树、最小生成树 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第30天(星期4)第9-10节
  [91]组合计数（加法乘法原理） | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第34天(星期1)第3-4节
  [92]递推关系与母函数基础 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第37天(星期4)第7-8节
  [93]离散综合题与证明题训练 | 状态:suggested | 类别:离散数学期末复习 | task_class_id:4 | 时段:第40天(星期7)第3-4节
  [94]线性表（顺序表/链表）与复杂度分析 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第3天(星期5)第9-10节
  [95]栈与队列及典型应用 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第6天(星期1)第9-10节
  [96]串与模式匹配（KMP） | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第10天(星期5)第7-8节
  [97]数组与广义表、稀疏矩阵 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第13天(星期1)第5-6节
  [98]树与二叉树遍历、线索化 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第16天(星期4)第9-10节
  [99]二叉排序树、AVL、红黑树概念 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第20天(星期1)第5-6节
  [100]堆与优先队列 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第24天(星期5)第5-6节
  [101]图的存储与遍历（DFS/BFS） | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第27天(星期1)第7-8节
  [102]最短路径与拓扑排序 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第31天(星期5)第5-6节
  [103]查找（顺序/折半/散列） | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第34天(星期1)第7-8节
  [104]排序（插入/交换/选择/归并/快排） | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第37天(星期4)第9-10节
  [105]综合算法题实战与代码模板整理 | 状态:suggested | 类别:数据结构期末复习 | task_class_id:5 | 时段:第40天(星期7)第5-6节
  任务类约束（排课时请遵守）：
    [复习概率论] 策略=均匀分布 总预算=16节 允许嵌水课=是 排除时段=[1,6]
    [数电期末复习] 策略=均匀分布 总预算=30节 允许嵌水课=是 排除时段=[1,6]
    [离散数学期末复习] 策略=均匀分布 总预算=26节 允许嵌水课=是 排除时段=[1,6]
    [数据结构期末复习] 策略=均匀分布 总预算=30节 允许嵌水课=是 排除时段=[1,6]
 ----- message[3] -----
 role: system
 content:
  当前执行状态：
  - 当前轮次：2/60
  - 当前模式：自由执行（无预定义步骤）
  执行锚点：
  - 当前用户诉求：帮我排一下这些任务类，直接排，不要早八和晚10
  - 目标任务类：task_class_ids=[2,3,4,5]
  - 啥时候结束Loop：你可以根据工具调用记录自行判断。
  - 非目标：不重新粗排、不修改无关任务类。
  - 阶段约束：粗排已完成，本轮只微调 suggested；existing 仅作已安排事实参考，不作为可移动目标。
  - 参数纪律：工具参数必须严格使用 schema 字段；若返回“参数非法”，需先改参再继续。
  - 顺序策略：默认保持 suggested 相对顺序，禁止调用 min_context_switch。
  本轮指令：请继续当前任务的执行阶段，严格输出 JSON。
 [DEBUG] execute LLM context end chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=2
 2026/04/10 22:46:31 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.512ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:31.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:33 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.057ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:33.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:35 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.540ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:35.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:37 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.072ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:37.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:39 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.028ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:39.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:41 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.462ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:41.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:43 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.523ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:43.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:45 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.457ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:45.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:47 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.042ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:47.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:49 [DEBUG] execute LLM 响应 chat=325b37d1-3483-4c6f-b755-44532a4dbe3c round=2 action=done speak_len=119 raw_len=296 raw_preview={"action":"done","speak":"检查完毕，所有已预排任务均避开了早八（第1-2节）和晚十（第11-12节）时段，微调完成！","goal_check":"当前所有suggested状态的任务（共44个）均安排在第3-10节，无早八/晚十时段冲突，符合用户微调需求。"}
 2026/04/10 22:46:49 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.096ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:49.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:51 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.559ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:51.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:53 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.047ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:53.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:54 [DEBUG] schedule preview write chat=325b37d1-3483-4c6f-b755-44532a4dbe3c user=1 state=tasks=105 pending=0 suggested=44 existing=61 task_item_with_slot=44 event_with_slot=52 preview=entries=96 existing=52 suggested=44 task_type=44 course_type=52 generated_at=2026-04-10T22:46:54+08:00
 [GIN] 2026/04/10 - 22:46:55 | 200 |   43.3002757s |       127.0.0.1 | POST     "/api/v1/agent/chat"
 2026/04/10 22:46:55 outbox due messages=3, start dispatch
 2026/04/10 22:46:55 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.984ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:55.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:56 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 325b37d1-3483-4c6f-b755-44532a4dbe3c
 2026/04/10 22:46:57 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.039ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:57.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:57 [GORM-Cache] Invalidated conversation history cache for user 1 conversation 325b37d1-3483-4c6f-b755-44532a4dbe3c
 2026/04/10 22:46:58 outbox due messages=1, start dispatch
 2026/04/10 22:46:58 [GORM-Cache] No logic defined for model: model.AgentStateSnapshotRecord
 2026/04/10 22:46:59 异步生成会话标题失败(模型生成失败) chat=325b37d1-3483-4c6f-b755-44532a4dbe3c err=failed to create chat completion: context deadline exceeded
 2026/04/10 22:46:59 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.138ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:46:59.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:46:59 [GORM-Cache] No logic defined for model: model.MemoryJob
 2026/04/10 22:47:01 [GORM-Cache] No logic defined for model: model.MemoryJob
 2026/04/10 22:47:01 D:/SmartFlow-Agent/backend/memory/repo/settings_repo.go:40 record not found
 [0.596ms] [rows:0] SELECT * FROM `memory_user_settings` WHERE user_id = 1 ORDER BY `memory_user_settings`.`user_id` LIMIT 1
 2026/04/10 22:47:10 [GORM-Cache] No logic defined for model: model.MemoryItem
 2026/04/10 22:47:10 [GORM-Cache] No logic defined for model: model.MemoryAuditLog
 2026/04/10 22:47:10 [GORM-Cache] No logic defined for model: model.MemoryJob
 2026/04/10 22:47:10 memory worker run once success: job_id=1 extracted_facts=1
 2026/04/10 22:47:10 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.918ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:10.174')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:11 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.892ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:11.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:13 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.189ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:13.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:15 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.516ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:15.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:17 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.075ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:17.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:19 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.266ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:19.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:21 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.529ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:21.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:23 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.659ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:23.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:25 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.255ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:25.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:27 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.482ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:27.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:29 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.769ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:29.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:31 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.113ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:31.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:33 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.073ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:33.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:35 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.533ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:35.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:37 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.173ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:37.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:39 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.626ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:39.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:41 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.991ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:41.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:43 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.504ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:43.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:45 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.485ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:45.578')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:47 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.059ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:47.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:49 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.042ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:49.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:51 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.043ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:51.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:53 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.579ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:53.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:55 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [0.973ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:55.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 2026/04/10 22:47:57 D:/SmartFlow-Agent/backend/memory/repo/job_repo.go:96 record not found
 [1.236ms] [rows:0] SELECT * FROM `memory_jobs` WHERE job_type = 'extract' AND status IN ('pending','failed') AND ((next_retry_at IS NULL OR next_retry_at <= '2026-04-10 22:47:57.579')) ORDER BY id ASC,`memory_jobs`.`id` LIMIT 1 FOR UPDATE
 进程 已完成，退出代码为 -1073741510 (0xC000013A: interrupted by Ctrl+C)
--- a/backend/memory/model/config.go
+++ b/backend/memory/model/config.go
@@ -9,6 +9,7 @@ import "time"
 // 2. 允许启动期统一注入，避免业务层直接依赖配置中心。
 type Config struct {
 	Enabled    bool
 	RAGEnabled bool
 	ExtractPrompt  string
 	DecisionPrompt string
--- a/backend/memory/model/item.go
+++ b/backend/memory/model/item.go
@@ -25,3 +25,51 @@ type ItemDTO struct {
 	CreatedAt        *time.Time
 	UpdatedAt        *time.Time
 }
 // ItemQuery 描述 memory_items 的通用查询条件。
 //
 // 职责边界：
 // 1. 只表达 memory 仓储层需要的过滤条件；
 // 2. 不直接承载注入策略、重排策略等上层业务语义；
 // 3. IncludeGlobal 用于“会话级 + 全局级”混合读取场景。
 type ItemQuery struct {
 	UserID         int
 	ConversationID string
 	AssistantID    string
 	RunID          string
 	Statuses       []string
 	MemoryTypes    []string
 	IncludeGlobal  bool
 	OnlyUnexpired  bool
 	Limit          int
 	Now            time.Time
 }
 // RetrieveRequest 描述“供提示词注入前读取”所需的最小参数。
 type RetrieveRequest struct {
 	Query          string
 	UserID         int
 	ConversationID string
 	AssistantID    string
 	RunID          string
 	MemoryTypes    []string
 	Limit          int
 	Now            time.Time
 }
 // ListItemsRequest 描述记忆管理页列表查询参数。
 type ListItemsRequest struct {
 	UserID         int
 	ConversationID string
 	Statuses       []string
 	MemoryTypes    []string
 	Limit          int
 }
 // DeleteItemRequest 描述软删除一条记忆时所需的最小参数。
 type DeleteItemRequest struct {
 	UserID       int
 	MemoryID     int64
 	Reason       string
 	OperatorType string
 }
--- a/backend/memory/model/job.go
+++ b/backend/memory/model/job.go
@@ -26,6 +26,8 @@ type FactCandidate struct {
 	Title            string
 	Content          string
 	Confidence       float64
 	Importance       float64
 	SensitivityLevel int
 	IsExplicit       bool
 }
@@ -37,5 +39,7 @@ type NormalizedFact struct {
 	NormalizedContent string
 	ContentHash       string
 	Confidence        float64
 	Importance        float64
 	SensitivityLevel  int
 	IsExplicit        bool
 }
--- a/backend/memory/model/settings.go
+++ b/backend/memory/model/settings.go
@@ -10,3 +10,11 @@ type UserSettingDTO struct {
 	SensitiveMemoryEnabled bool
 	UpdatedAt              *time.Time
 }
 // UpdateUserSettingRequest 描述记忆开关写入请求。
 type UpdateUserSettingRequest struct {
 	UserID                 int
 	MemoryEnabled          bool
 	ImplicitMemoryEnabled  bool
 	SensitiveMemoryEnabled bool
 }
--- a/backend/memory/module.go
+++ b/backend/memory/module.go
@@ -0,0 +1,171 @@
 package memory
 import (
 	"context"
 	"errors"
 	"log"
 	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryorchestrator "github.com/LoveLosita/smartflow/backend/memory/orchestrator"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryservice "github.com/LoveLosita/smartflow/backend/memory/service"
 	memoryworker "github.com/LoveLosita/smartflow/backend/memory/worker"
 	"gorm.io/gorm"
 )
 // Module 是 memory 模块对外暴露的统一门面。
 //
 // 职责边界：
 // 1. 负责把 repo、service、worker、orchestrator 组装成一个稳定入口；
 // 2. 负责对外暴露“写入 / 读取 / 管理 / 启动 worker”这些高层意图；
 // 3. 不负责替代应用层 DI，也不负责替代上层事务管理器，事务边界仍由调用方掌控。
 type Module struct {
 	db         *gorm.DB
 	cfg        memorymodel.Config
 	llmClient  *infrallm.Client
 	ragRuntime infrarag.Runtime
 	jobRepo      *memoryrepo.JobRepo
 	itemRepo     *memoryrepo.ItemRepo
 	auditRepo    *memoryrepo.AuditRepo
 	settingsRepo *memoryrepo.SettingsRepo
 	enqueueService *memoryservice.EnqueueService
 	readService    *memoryservice.ReadService
 	manageService  *memoryservice.ManageService
 	runner         *memoryworker.Runner
 }
 // LoadConfigFromViper 复用 memory 子包里的配置加载逻辑，对外收口一个统一入口。
 func LoadConfigFromViper() memorymodel.Config {
 	return memoryservice.LoadConfigFromViper()
 }
 // NewModule 创建 memory 模块门面。
 //
 // 设计说明：
 // 1. 这里做的是“轻组装”，不引入额外容器概念，方便先接进现有项目；
 // 2. llmClient 允许为 nil，此时写入链路会自动回退到本地 fallback 抽取；
 // 3. ragRuntime 允许为 nil，此时读取/向量同步自动回退旧逻辑；
 // 4. 若后续接入统一 DI 容器，也应优先注册这个 Module，而不是把内部 repo/service 继续向外泄漏。
 func NewModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module {
 	return wireModule(db, llmClient, ragRuntime, cfg)
 }
 // WithTx 返回绑定到指定事务连接的同构门面。
 //
 // 步骤化说明：
 // 1. 上层事务管理器先创建 tx；
 // 2. 再通过 WithTx(tx) 把 memory 内部所有 repo/service 一次性切到同一个事务连接；
 // 3. 这样外部无需重新 new 一堆 repo，也不会破坏既有跨表事务边界。
 func (m *Module) WithTx(tx *gorm.DB) *Module {
 	if m == nil {
 		return nil
 	}
 	if tx == nil {
 		return m
 	}
 	return wireModule(tx, m.llmClient, m.ragRuntime, m.cfg)
 }
 // EnqueueExtract 把一次记忆抽取请求入队到 memory_jobs。
 func (m *Module) EnqueueExtract(
 	ctx context.Context,
 	payload memorymodel.ExtractJobPayload,
 	sourceEventID string,
 ) error {
 	if m == nil || m.enqueueService == nil {
 		return errors.New("memory module enqueue service is nil")
 	}
 	return m.enqueueService.EnqueueExtractJob(ctx, payload, sourceEventID)
 }
 // Retrieve 读取后续可供 prompt 注入使用的候选记忆。
 func (m *Module) Retrieve(ctx context.Context, req memorymodel.RetrieveRequest) ([]memorymodel.ItemDTO, error) {
 	if m == nil || m.readService == nil {
 		return nil, errors.New("memory module read service is nil")
 	}
 	return m.readService.Retrieve(ctx, req)
 }
 // ListItems 列出用户当前可管理的记忆条目。
 func (m *Module) ListItems(ctx context.Context, req memorymodel.ListItemsRequest) ([]memorymodel.ItemDTO, error) {
 	if m == nil || m.manageService == nil {
 		return nil, errors.New("memory module manage service is nil")
 	}
 	return m.manageService.ListItems(ctx, req)
 }
 // DeleteItem 软删除一条记忆，并补写审计日志。
 func (m *Module) DeleteItem(ctx context.Context, req memorymodel.DeleteItemRequest) (*memorymodel.ItemDTO, error) {
 	if m == nil || m.manageService == nil {
 		return nil, errors.New("memory module manage service is nil")
 	}
 	return m.manageService.DeleteItem(ctx, req)
 }
 // GetUserSetting 读取用户当前生效的记忆开关。
 func (m *Module) GetUserSetting(ctx context.Context, userID int) (memorymodel.UserSettingDTO, error) {
 	if m == nil || m.manageService == nil {
 		return memorymodel.UserSettingDTO{}, errors.New("memory module manage service is nil")
 	}
 	return m.manageService.GetUserSetting(ctx, userID)
 }
 // UpsertUserSetting 写入用户记忆开关。
 func (m *Module) UpsertUserSetting(ctx context.Context, req memorymodel.UpdateUserSettingRequest) (memorymodel.UserSettingDTO, error) {
 	if m == nil || m.manageService == nil {
 		return memorymodel.UserSettingDTO{}, errors.New("memory module manage service is nil")
 	}
 	return m.manageService.UpsertUserSetting(ctx, req)
 }
 // StartWorker 启动 memory 后台 worker。
 //
 // 说明：
 // 1. 这里只负责按当前配置拉起轮询循环；
 // 2. 若 memory.enabled=false，则直接记录日志并返回；
 // 3. 当前不做重复启动保护，生命周期仍假设由应用启动层统一掌控。
 func (m *Module) StartWorker(ctx context.Context) {
 	if m == nil || m.runner == nil {
 		log.Println("Memory worker is not initialized")
 		return
 	}
 	if !m.cfg.Enabled {
 		log.Println("Memory worker is disabled")
 		return
 	}
 	go memoryworker.RunPollingLoop(ctx, m.runner, m.cfg.WorkerPollEvery, m.cfg.WorkerClaimBatch)
 	log.Println("Memory worker started")
 }
 func wireModule(db *gorm.DB, llmClient *infrallm.Client, ragRuntime infrarag.Runtime, cfg memorymodel.Config) *Module {
 	jobRepo := memoryrepo.NewJobRepo(db)
 	itemRepo := memoryrepo.NewItemRepo(db)
 	auditRepo := memoryrepo.NewAuditRepo(db)
 	settingsRepo := memoryrepo.NewSettingsRepo(db)
 	enqueueService := memoryservice.NewEnqueueService(jobRepo)
 	readService := memoryservice.NewReadService(itemRepo, settingsRepo, ragRuntime, cfg)
 	manageService := memoryservice.NewManageService(db, itemRepo, auditRepo, settingsRepo)
 	extractor := memoryorchestrator.NewLLMWriteOrchestrator(llmClient, cfg)
 	runner := memoryworker.NewRunner(db, jobRepo, itemRepo, auditRepo, settingsRepo, extractor, ragRuntime)
 	return &Module{
 		db:             db,
 		cfg:            cfg,
 		llmClient:      llmClient,
 		ragRuntime:     ragRuntime,
 		jobRepo:        jobRepo,
 		itemRepo:       itemRepo,
 		auditRepo:      auditRepo,
 		settingsRepo:   settingsRepo,
 		enqueueService: enqueueService,
 		readService:    readService,
 		manageService:  manageService,
 		runner:         runner,
 	}
 }
--- a/backend/memory/orchestrator/llm_write_orchestrator.go
+++ b/backend/memory/orchestrator/llm_write_orchestrator.go
@@ -0,0 +1,299 @@
 package orchestrator
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"log"
 	"strings"
 	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 )
 const (
 	defaultMemoryExtractMaxTokens = 1200
 	defaultMemoryExtractMaxFacts  = 5
 )
 // LLMWriteOrchestrator 负责把单条对话消息转成可入库的记忆候选。
 //
 // 职责边界：
 // 1. 负责调用 LLM 做抽取、把输出标准化成 memory_facts；
 // 2. 不负责落库，不负责任务状态机推进；
 // 3. 当 LLM 不可用或输出异常时，回退到保守的本地抽取，保证链路不完全断。
 type LLMWriteOrchestrator struct {
 	client *infrallm.Client
 	cfg    memorymodel.Config
 	logger *log.Logger
 }
 // NewLLMWriteOrchestrator 构造 LLM 版记忆写入编排器。
 func NewLLMWriteOrchestrator(client *infrallm.Client, cfg memorymodel.Config) *LLMWriteOrchestrator {
 	return &LLMWriteOrchestrator{
 		client: client,
 		cfg:    cfg,
 		logger: log.Default(),
 	}
 }
 // ExtractFacts 从单条消息中抽取可入库事实。
 //
 // 返回语义：
 // 1. 成功时返回标准化后的候选事实；
 // 2. 即使 LLM 失败，也尽量返回保守的 fallback 结果，避免 worker 空转报错；
 // 3. 只有输入本身为空时才返回空结果。
 func (o *LLMWriteOrchestrator) ExtractFacts(ctx context.Context, payload memorymodel.ExtractJobPayload) ([]memorymodel.NormalizedFact, error) {
 	sourceText := strings.TrimSpace(payload.SourceText)
 	if sourceText == "" {
 		return nil, nil
 	}
 	if o == nil || o.client == nil {
 		return fallbackNormalizedFacts(payload), nil
 	}
 	messages := infrallm.BuildSystemUserMessages(
 		buildMemoryExtractSystemPrompt(o.cfg.ExtractPrompt),
 		nil,
 		buildMemoryExtractUserPrompt(payload),
 	)
 	resp, rawResult, err := infrallm.GenerateJSON[memoryExtractResponse](
 		ctx,
 		o.client,
 		messages,
 		infrallm.GenerateOptions{
 			Temperature: clampTemperature(o.cfg.LLMTemperature),
 			MaxTokens:   defaultMemoryExtractMaxTokens,
 			Thinking:    infrallm.ThinkingModeDisabled,
 			Metadata: map[string]any{
 				"stage":           "memory_extract",
 				"user_id":         payload.UserID,
 				"conversation_id": payload.ConversationID,
 			},
 		},
 	)
 	if err != nil {
 		if o.logger != nil {
 			o.logger.Printf("[WARN] memory extract llm failed user_id=%d conversation_id=%s err=%v raw=%s",
 				payload.UserID, payload.ConversationID, err, truncateForLog(rawResult))
 		}
 		return fallbackNormalizedFacts(payload), nil
 	}
 	facts := convertExtractResponse(resp)
 	normalized := memoryutils.NormalizeFacts(facts)
 	if len(normalized) == 0 {
 		return fallbackNormalizedFacts(payload), nil
 	}
 	return normalized, nil
 }
 type memoryExtractResponse struct {
 	Facts []memoryExtractFact `json:"facts"`
 }
 type memoryExtractFact struct {
 	MemoryType       string  `json:"memory_type"`
 	Title            string  `json:"title"`
 	Content          string  `json:"content"`
 	Confidence       float64 `json:"confidence"`
 	Importance       float64 `json:"importance"`
 	SensitivityLevel int     `json:"sensitivity_level"`
 	IsExplicit       bool    `json:"is_explicit"`
 }
 type memoryExtractPromptInput struct {
 	UserID          int    `json:"user_id"`
 	ConversationID  string `json:"conversation_id"`
 	AssistantID     string `json:"assistant_id,omitempty"`
 	RunID           string `json:"run_id,omitempty"`
 	SourceMessageID int64  `json:"source_message_id,omitempty"`
 	SourceRole      string `json:"source_role"`
 	SourceText      string `json:"source_text"`
 	OccurredAt      string `json:"occurred_at"`
 	TraceID         string `json:"trace_id,omitempty"`
 }
 func buildMemoryExtractSystemPrompt(override string) string {
 	override = strings.TrimSpace(override)
 	if override != "" {
 		return override
 	}
 	return strings.TrimSpace(`你是一个“记忆抽取器”。
 你的任务是从单条用户消息中抽取值得长期记住的事实、偏好、约束、待办线索。
 请只输出 JSON 对象，不要输出解释、不要输出 markdown。
 输出格式：
 {
  "facts": [
    {
      "memory_type": "preference|constraint|fact|todo_hint",
      "title": "短标题",
      "content": "完整事实内容",
      "confidence": 0.0,
      "importance": 0.0,
      "sensitivity_level": 0,
      "is_explicit": false
    }
  ]
 }
 规则：
 1. 最多输出 5 条事实。
 2. 只保留稳定、未来可能复用的信息，闲聊、寒暄、一次性噪声不要记。
 3. 用户明确说“记住”或“以后提醒我”时，is_explicit 设为 true。
 4. confidence 表示这条事实是否真的值得记，取 0 到 1。
 5. importance 表示对后续提醒/陪伴的价值，取 0 到 1。
 6. sensitivity_level 取 0 到 2，数字越大越敏感。
 7. 不确定就少记，不要编造。`)
 }
 func buildMemoryExtractUserPrompt(payload memorymodel.ExtractJobPayload) string {
 	request := memoryExtractPromptInput{
 		UserID:          payload.UserID,
 		ConversationID:  payload.ConversationID,
 		AssistantID:     payload.AssistantID,
 		RunID:           payload.RunID,
 		SourceMessageID: payload.SourceMessageID,
 		SourceRole:      payload.SourceRole,
 		SourceText:      payload.SourceText,
 		OccurredAt:      payload.OccurredAt.Format("2006-01-02 15:04:05"),
 		TraceID:         payload.TraceID,
 	}
 	raw, err := json.MarshalIndent(request, "", "  ")
 	if err != nil {
 		return fmt.Sprintf("请从这条消息中抽取可长期记住的信息：%s", payload.SourceText)
 	}
 	return fmt.Sprintf("请从下面这条用户消息中抽取可长期记住的信息，最多 %d 条。\n输入：\n%s",
 		defaultMemoryExtractMaxFacts, string(raw))
 }
 func convertExtractResponse(resp *memoryExtractResponse) []memorymodel.FactCandidate {
 	if resp == nil || len(resp.Facts) == 0 {
 		return nil
 	}
 	result := make([]memorymodel.FactCandidate, 0, len(resp.Facts))
 	for _, fact := range resp.Facts {
 		memoryType := memorymodel.NormalizeMemoryType(fact.MemoryType)
 		if memoryType == "" {
 			continue
 		}
 		content := strings.TrimSpace(fact.Content)
 		if content == "" {
 			continue
 		}
 		confidence := clamp01(fact.Confidence)
 		if confidence == 0 {
 			confidence = 0.6
 		}
 		importance := clamp01(fact.Importance)
 		if importance == 0 {
 			importance = defaultImportanceByType(memoryType)
 		}
 		result = append(result, memorymodel.FactCandidate{
 			MemoryType:       memoryType,
 			Title:            strings.TrimSpace(fact.Title),
 			Content:          content,
 			Confidence:       confidence,
 			Importance:       importance,
 			SensitivityLevel: clampInt(fact.SensitivityLevel, 0, 2),
 			IsExplicit:       fact.IsExplicit,
 		})
 	}
 	return result
 }
 func fallbackNormalizedFacts(payload memorymodel.ExtractJobPayload) []memorymodel.NormalizedFact {
 	sourceText := strings.TrimSpace(payload.SourceText)
 	if sourceText == "" {
 		return nil
 	}
 	return memoryutils.NormalizeFacts([]memorymodel.FactCandidate{
 		{
 			MemoryType:       memorymodel.MemoryTypeFact,
 			Title:            buildFallbackTitle(sourceText),
 			Content:          sourceText,
 			Confidence:       0.55,
 			Importance:       defaultImportanceByType(memorymodel.MemoryTypeFact),
 			SensitivityLevel: 0,
 			IsExplicit:       false,
 		},
 	})
 }
 func buildFallbackTitle(sourceText string) string {
 	runes := []rune(strings.TrimSpace(sourceText))
 	if len(runes) == 0 {
 		return "用户提到"
 	}
 	if len(runes) > 24 {
 		runes = runes[:24]
 	}
 	return "用户提到：" + string(runes)
 }
 func clampTemperature(v float64) float64 {
 	if v <= 0 {
 		return 0.1
 	}
 	if v > 1 {
 		return 1
 	}
 	return v
 }
 func clamp01(v float64) float64 {
 	if v < 0 {
 		return 0
 	}
 	if v > 1 {
 		return 1
 	}
 	return v
 }
 func clampInt(v, minValue, maxValue int) int {
 	if v < minValue {
 		return minValue
 	}
 	if v > maxValue {
 		return maxValue
 	}
 	return v
 }
 func defaultImportanceByType(memoryType string) float64 {
 	switch memoryType {
 	case memorymodel.MemoryTypePreference:
 		return 0.85
 	case memorymodel.MemoryTypeConstraint:
 		return 0.95
 	case memorymodel.MemoryTypeTodoHint:
 		return 0.8
 	default:
 		return 0.6
 	}
 }
 func truncateForLog(raw *infrallm.TextResult) string {
 	if raw == nil {
 		return ""
 	}
 	text := strings.TrimSpace(raw.Text)
 	if len(text) <= 200 {
 		return text
 	}
 	return text[:200] + "..."
 }
--- a/backend/memory/orchestrator/write_orchestrator.go
+++ b/backend/memory/orchestrator/write_orchestrator.go
@@ -8,36 +8,33 @@ import (
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 )
-// WriteOrchestrator 是写入链路编排器（Day1 首版）。
+// WriteOrchestrator 是 Day1 的本地回退版本。
 //
 // 职责边界：
-// 1. Day1 只做 mock 抽取 + 标准化，不接 LLM 决策；
+// 1. 只做最保守的“从 source_text 直接生成一条候选事实”；
-// 2. Day2/Day3 再引入冲突消解、重排与向量召回。
+// 2. 不依赖 LLM，便于在模型不可用时保底；
 // 3. 后续会逐步被 LLM 版编排器取代，但不会直接删掉，方便回退。
 type WriteOrchestrator struct{}
 func NewWriteOrchestrator() *WriteOrchestrator {
 	return &WriteOrchestrator{}
 }
-// ExtractFacts 执行“候选事实抽取 -> 标准化”链路。
+// ExtractFacts 执行最小回退链路。
 //
 // Day1 策略：
 // 1. 先用 source_text 直接构造候选事实，确保链路可跑通；
 // 2. 后续再替换成 LLM 抽取与结构化决策。
 func (o *WriteOrchestrator) ExtractFacts(_ context.Context, payload memorymodel.ExtractJobPayload) ([]memorymodel.NormalizedFact, error) {
 	sourceText := strings.TrimSpace(payload.SourceText)
 	if sourceText == "" {
 		return nil, nil
 	}
-	candidates := []memorymodel.FactCandidate{
+	candidates := []memorymodel.FactCandidate{{
 		{
 		MemoryType:       memorymodel.MemoryTypeFact,
-			Title:      "用户近期提及",
+		Title:            "用户提到",
 		Content:          sourceText,
 		Confidence:       0.6,
 		Importance:       0.6,
 		SensitivityLevel: 0,
 		IsExplicit:       false,
-		},
+	}}
 	}
 	return memoryutils.NormalizeFacts(candidates), nil
 }
--- a/backend/memory/repo/audit_repo.go
+++ b/backend/memory/repo/audit_repo.go
@@ -17,6 +17,10 @@ func NewAuditRepo(db *gorm.DB) *AuditRepo {
 	return &AuditRepo{db: db}
 }
 func (r *AuditRepo) WithTx(tx *gorm.DB) *AuditRepo {
 	return &AuditRepo{db: tx}
 }
 func (r *AuditRepo) Create(ctx context.Context, log model.MemoryAuditLog) error {
 	if r == nil || r.db == nil {
 		return errors.New("memory audit repo is nil")
--- a/backend/memory/repo/item_repo.go
+++ b/backend/memory/repo/item_repo.go
@@ -3,12 +3,20 @@ package repo
 import (
 	"context"
 	"errors"
 	"strings"
 	"time"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	"github.com/LoveLosita/smartflow/backend/model"
 	"gorm.io/gorm"
 )
-// ItemRepo 封装 memory_items 的数据访问（Day1 先占位）。
+// ItemRepo 封装 memory_items 的数据访问。
 //
 // 职责边界：
 // 1. 只负责表级读写，不承载注入、重排、审计决策；
 // 2. 查询条件统一由 ItemQuery 表达，避免 service 层拼装 SQL；
 // 3. 软删除、访问时间刷新等状态变更也收敛到这里。
 type ItemRepo struct {
 	db *gorm.DB
 }
@@ -17,11 +25,11 @@ func NewItemRepo(db *gorm.DB) *ItemRepo {
 	return &ItemRepo{db: db}
 }
-// UpsertItems 预留给 Day2/Day3 的写入链路。
+func (r *ItemRepo) WithTx(tx *gorm.DB) *ItemRepo {
-//
+	return &ItemRepo{db: tx}
-// Day1 约束：
+}
-// 1. 先完成任务入队与状态机闭环；
+
-// 2. 不在本阶段引入复杂冲突消解与向量写入。
+// UpsertItems 批量写入记忆条目。
 func (r *ItemRepo) UpsertItems(ctx context.Context, items []model.MemoryItem) error {
 	if r == nil || r.db == nil {
 		return errors.New("memory item repo is nil")
@@ -29,5 +37,186 @@ func (r *ItemRepo) UpsertItems(ctx context.Context, items []model.MemoryItem) er
 	if len(items) == 0 {
 		return nil
 	}
-	return r.db.WithContext(ctx).Create(&items).Error
+
 	for i := range items {
 		if err := r.db.WithContext(ctx).Create(&items[i]).Error; err != nil {
 			return err
 		}
 	}
 	return nil
 }
 // FindByQuery 按统一过滤条件读取记忆条目。
 //
 // 步骤化说明：
 // 1. 先强制 user_id 过滤，避免跨用户串记忆；
 // 2. 再按会话/助手/run 维度补充过滤，IncludeGlobal=true 时允许读取对应全局条目；
 // 3. 最后补状态、类型、过期时间和 limit，返回稳定排序结果。
 func (r *ItemRepo) FindByQuery(ctx context.Context, query memorymodel.ItemQuery) ([]model.MemoryItem, error) {
 	if r == nil || r.db == nil {
 		return nil, errors.New("memory item repo is nil")
 	}
 	if query.UserID <= 0 {
 		return nil, errors.New("memory item query user_id is invalid")
 	}
 	db := r.db.WithContext(ctx).Model(&model.MemoryItem{}).Where("user_id = ?", query.UserID)
 	db = applyScopedEquality(db, "conversation_id", query.ConversationID, query.IncludeGlobal)
 	db = applyScopedEquality(db, "assistant_id", query.AssistantID, query.IncludeGlobal)
 	db = applyScopedEquality(db, "run_id", query.RunID, query.IncludeGlobal)
 	if len(query.Statuses) > 0 {
 		db = db.Where("status IN ?", query.Statuses)
 	}
 	if len(query.MemoryTypes) > 0 {
 		db = db.Where("memory_type IN ?", query.MemoryTypes)
 	}
 	if query.OnlyUnexpired {
 		now := query.Now
 		if now.IsZero() {
 			now = time.Now()
 		}
 		db = db.Where("(ttl_at IS NULL OR ttl_at > ?)", now)
 	}
 	if query.Limit > 0 {
 		db = db.Limit(query.Limit)
 	}
 	var items []model.MemoryItem
 	err := db.
 		Order("is_explicit DESC").
 		Order("importance DESC").
 		Order("updated_at DESC").
 		Find(&items).Error
 	return items, err
 }
 // GetByIDForUser 读取某个用户的一条记忆条目。
 func (r *ItemRepo) GetByIDForUser(ctx context.Context, userID int, memoryID int64) (*model.MemoryItem, error) {
 	if r == nil || r.db == nil {
 		return nil, errors.New("memory item repo is nil")
 	}
 	if userID <= 0 || memoryID <= 0 {
 		return nil, errors.New("memory item query params is invalid")
 	}
 	var item model.MemoryItem
 	err := r.db.WithContext(ctx).
 		Where("id = ? AND user_id = ?", memoryID, userID).
 		First(&item).Error
 	if err != nil {
 		return nil, err
 	}
 	return &item, nil
 }
 // UpdateStatusByID 更新某条记忆的状态。
 func (r *ItemRepo) UpdateStatusByID(ctx context.Context, userID int, memoryID int64, status string) error {
 	return r.UpdateStatusByIDAt(ctx, userID, memoryID, status, time.Now())
 }
 // UpdateStatusByIDAt 更新某条记忆的状态，并允许上层显式指定更新时间。
 //
 // 这样做的原因：
 // 1. 管理侧删除时，需要让“库内更新时间”和“审计 after 快照时间”保持一致；
 // 2. 读取侧若只是刷新 last_access_at，不应该误改 updated_at；
 // 3. 因此把“更新时间来源”收口到 repo，避免 service 层自己拼 SQL。
 func (r *ItemRepo) UpdateStatusByIDAt(
 	ctx context.Context,
 	userID int,
 	memoryID int64,
 	status string,
 	updatedAt time.Time,
 ) error {
 	if r == nil || r.db == nil {
 		return errors.New("memory item repo is nil")
 	}
 	if userID <= 0 || memoryID <= 0 {
 		return errors.New("memory item update params is invalid")
 	}
 	status = strings.TrimSpace(status)
 	if status == "" {
 		return errors.New("memory item status is empty")
 	}
 	if updatedAt.IsZero() {
 		updatedAt = time.Now()
 	}
 	return r.db.WithContext(ctx).
 		Model(&model.MemoryItem{}).
 		Where("id = ? AND user_id = ?", memoryID, userID).
 		Updates(map[string]any{
 			"status":     status,
 			"updated_at": updatedAt,
 		}).Error
 }
 // TouchLastAccessAt 批量刷新记忆访问时间。
 //
 // 说明：
 // 1. 这里只更新 last_access_at，不更新 updated_at；
 // 2. 因为 updated_at 代表“内容被修改”的时间，不能被一次普通读取污染；
 // 3. 否则后续读取重排会把“最近被读过的旧记忆”误判成“最近被更新的记忆”。
 func (r *ItemRepo) TouchLastAccessAt(ctx context.Context, ids []int64, accessedAt time.Time) error {
 	if r == nil || r.db == nil {
 		return errors.New("memory item repo is nil")
 	}
 	if len(ids) == 0 {
 		return nil
 	}
 	if accessedAt.IsZero() {
 		accessedAt = time.Now()
 	}
 	return r.db.WithContext(ctx).
 		Model(&model.MemoryItem{}).
 		Where("id IN ?", ids).
 		Updates(map[string]any{
 			"last_access_at": accessedAt,
 		}).Error
 }
 // UpdateVectorStateByID 更新单条记忆的向量同步桥接状态。
 //
 // 说明：
 // 1. 这里只更新 vector_status/vector_id，不更新 updated_at；
 // 2. 因为向量同步属于索引层状态，不代表记忆内容本身被修改；
 // 3. 若误改 updated_at，会污染读取侧的时间排序语义。
 func (r *ItemRepo) UpdateVectorStateByID(
 	ctx context.Context,
 	memoryID int64,
 	vectorStatus string,
 	vectorID *string,
 ) error {
 	if r == nil || r.db == nil {
 		return errors.New("memory item repo is nil")
 	}
 	if memoryID <= 0 {
 		return errors.New("memory item vector update id is invalid")
 	}
 	vectorStatus = strings.TrimSpace(vectorStatus)
 	if vectorStatus == "" {
 		return errors.New("memory item vector status is empty")
 	}
 	return r.db.WithContext(ctx).
 		Model(&model.MemoryItem{}).
 		Where("id = ?", memoryID).
 		UpdateColumns(map[string]any{
 			"vector_status": vectorStatus,
 			"vector_id":     vectorID,
 		}).Error
 }
 func applyScopedEquality(db *gorm.DB, column, value string, includeGlobal bool) *gorm.DB {
 	value = strings.TrimSpace(value)
 	if value == "" {
 		return db
 	}
 	if includeGlobal {
 		return db.Where("("+column+" = ? OR "+column+" IS NULL)", value)
 	}
 	return db.Where(column+" = ?", value)
 }
--- a/backend/memory/repo/job_repo.go
+++ b/backend/memory/repo/job_repo.go
@@ -87,18 +87,19 @@ func (r *JobRepo) ClaimNextRunnableExtractJob(ctx context.Context, now time.Time
 	var claimed *model.MemoryJob
 	err := r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
 		var job model.MemoryJob
-		queryErr := tx.
+		query := tx.
 			Clauses(clause.Locking{Strength: "UPDATE"}).
 			Where("job_type = ?", model.MemoryJobTypeExtract).
 			Where("status IN ?", []string{model.MemoryJobStatusPending, model.MemoryJobStatusFailed}).
 			Where("(next_retry_at IS NULL OR next_retry_at <= ?)", now).
 			Order("id ASC").
-			First(&job).Error
+			Limit(1).
-		if queryErr != nil {
+			Find(&job)
-			if errors.Is(queryErr, gorm.ErrRecordNotFound) {
+		if query.Error != nil {
-				return nil
+			return query.Error
 		}
-			return queryErr
+		if query.RowsAffected == 0 {
 			return nil
 		}
 		updates := map[string]any{
--- a/backend/memory/repo/settings_repo.go
+++ b/backend/memory/repo/settings_repo.go
@@ -18,6 +18,36 @@ func NewSettingsRepo(db *gorm.DB) *SettingsRepo {
 	return &SettingsRepo{db: db}
 }
 func (r *SettingsRepo) WithTx(tx *gorm.DB) *SettingsRepo {
 	return &SettingsRepo{db: tx}
 }
 // GetByUserID 读取用户记忆设置。
 //
 // 返回语义：
 // 1. 命中时返回真实记录；
 // 2. 未命中时返回 nil,nil，由上层决定是否走默认开关；
 // 3. 不在仓储层偷偷补默认值，避免写路径和读路径语义不一致。
 func (r *SettingsRepo) GetByUserID(ctx context.Context, userID int) (*model.MemoryUserSetting, error) {
 	if r == nil || r.db == nil {
 		return nil, errors.New("memory settings repo is nil")
 	}
 	if userID <= 0 {
 		return nil, errors.New("memory settings user_id is invalid")
 	}
 	var setting model.MemoryUserSetting
 	query := r.db.WithContext(ctx).Where("user_id = ?", userID).Limit(1).Find(&setting)
 	if query.Error != nil {
 		return nil, query.Error
 	}
 	if query.RowsAffected == 0 {
 		return nil, nil
 	}
 	return &setting, nil
 }
 // Upsert 写入用户记忆设置。
 func (r *SettingsRepo) Upsert(ctx context.Context, setting model.MemoryUserSetting) error {
 	if r == nil || r.db == nil {
 		return errors.New("memory settings repo is nil")
--- a/backend/memory/service/common.go
+++ b/backend/memory/service/common.go
@@ -0,0 +1,119 @@
 package service
 import (
 	"strings"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	"github.com/LoveLosita/smartflow/backend/model"
 )
 func toItemDTO(item model.MemoryItem) memorymodel.ItemDTO {
 	return memorymodel.ItemDTO{
 		ID:               item.ID,
 		UserID:           item.UserID,
 		ConversationID:   strValue(item.ConversationID),
 		AssistantID:      strValue(item.AssistantID),
 		RunID:            strValue(item.RunID),
 		MemoryType:       item.MemoryType,
 		Title:            item.Title,
 		Content:          item.Content,
 		Confidence:       item.Confidence,
 		Importance:       item.Importance,
 		SensitivityLevel: item.SensitivityLevel,
 		IsExplicit:       item.IsExplicit,
 		Status:           item.Status,
 		TTLAt:            item.TTLAt,
 		CreatedAt:        item.CreatedAt,
 		UpdatedAt:        item.UpdatedAt,
 	}
 }
 func toItemDTOs(items []model.MemoryItem) []memorymodel.ItemDTO {
 	if len(items) == 0 {
 		return nil
 	}
 	result := make([]memorymodel.ItemDTO, 0, len(items))
 	for _, item := range items {
 		result = append(result, toItemDTO(item))
 	}
 	return result
 }
 func toUserSettingDTO(setting model.MemoryUserSetting) memorymodel.UserSettingDTO {
 	return memorymodel.UserSettingDTO{
 		UserID:                 setting.UserID,
 		MemoryEnabled:          setting.MemoryEnabled,
 		ImplicitMemoryEnabled:  setting.ImplicitMemoryEnabled,
 		SensitiveMemoryEnabled: setting.SensitiveMemoryEnabled,
 		UpdatedAt:              setting.UpdatedAt,
 	}
 }
 func normalizeMemoryTypes(raw []string) []string {
 	if len(raw) == 0 {
 		return nil
 	}
 	result := make([]string, 0, len(raw))
 	seen := make(map[string]struct{}, len(raw))
 	for _, item := range raw {
 		normalized := memorymodel.NormalizeMemoryType(item)
 		if normalized == "" {
 			continue
 		}
 		if _, exists := seen[normalized]; exists {
 			continue
 		}
 		seen[normalized] = struct{}{}
 		result = append(result, normalized)
 	}
 	return result
 }
 func normalizeManageStatuses(raw []string) []string {
 	if len(raw) == 0 {
 		return []string{
 			model.MemoryItemStatusActive,
 			model.MemoryItemStatusArchived,
 		}
 	}
 	result := make([]string, 0, len(raw))
 	seen := make(map[string]struct{}, len(raw))
 	for _, item := range raw {
 		status := strings.ToLower(strings.TrimSpace(item))
 		if status != model.MemoryItemStatusActive &&
 			status != model.MemoryItemStatusArchived &&
 			status != model.MemoryItemStatusDeleted {
 			continue
 		}
 		if _, exists := seen[status]; exists {
 			continue
 		}
 		seen[status] = struct{}{}
 		result = append(result, status)
 	}
 	if len(result) == 0 {
 		return []string{
 			model.MemoryItemStatusActive,
 			model.MemoryItemStatusArchived,
 		}
 	}
 	return result
 }
 func normalizeLimit(limit, defaultValue, maxValue int) int {
 	if limit <= 0 {
 		limit = defaultValue
 	}
 	if maxValue > 0 && limit > maxValue {
 		return maxValue
 	}
 	return limit
 }
 func strValue(v *string) string {
 	if v == nil {
 		return ""
 	}
 	return strings.TrimSpace(*v)
 }
--- a/backend/memory/service/config_loader.go
+++ b/backend/memory/service/config_loader.go
@@ -16,6 +16,7 @@ import (
 func LoadConfigFromViper() memorymodel.Config {
 	cfg := memorymodel.Config{
 		Enabled:          viper.GetBool("memory.enabled"),
 		RAGEnabled:       viper.GetBool("memory.rag.enabled"),
 		ExtractPrompt:    viper.GetString("memory.prompt.extract"),
 		DecisionPrompt:   viper.GetString("memory.prompt.decision"),
 		Threshold:        viper.GetFloat64("memory.threshold"),
--- a/backend/memory/service/manage_service.go
+++ b/backend/memory/service/manage_service.go
@@ -0,0 +1,203 @@
 package service
 import (
 	"context"
 	"errors"
 	"strings"
 	"time"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	"github.com/LoveLosita/smartflow/backend/model"
 	"gorm.io/gorm"
 )
 const (
 	defaultManageListLimit = 20
 	maxManageListLimit     = 100
 )
 // ManageService 负责 memory 模块内部的管理面能力。
 //
 // 职责边界：
 // 1. 负责“列出记忆 / 删除记忆 / 读取与更新用户开关”这类维护动作；
 // 2. 负责把用户主动管理行为补充进 memory_audit_logs；
 // 3. 不负责 prompt 注入、不负责向量召回，也不负责后台抽取任务执行。
 type ManageService struct {
 	db           *gorm.DB
 	itemRepo     *memoryrepo.ItemRepo
 	auditRepo    *memoryrepo.AuditRepo
 	settingsRepo *memoryrepo.SettingsRepo
 }
 func NewManageService(
 	db *gorm.DB,
 	itemRepo *memoryrepo.ItemRepo,
 	auditRepo *memoryrepo.AuditRepo,
 	settingsRepo *memoryrepo.SettingsRepo,
 ) *ManageService {
 	return &ManageService{
 		db:           db,
 		itemRepo:     itemRepo,
 		auditRepo:    auditRepo,
 		settingsRepo: settingsRepo,
 	}
 }
 // ListItems 列出某个用户当前可管理的记忆条目。
 //
 // 说明：
 // 1. 这里面向“管理视角”，不会按用户开关再做二次过滤；
 // 2. 即便用户暂时关闭 memory，总览页仍需要看见已有记忆，便于手动删除或核对；
 // 3. 默认只返回 active/archived，除非显式传入 deleted。
 func (s *ManageService) ListItems(ctx context.Context, req memorymodel.ListItemsRequest) ([]memorymodel.ItemDTO, error) {
 	if s == nil || s.itemRepo == nil {
 		return nil, errors.New("memory manage service is nil")
 	}
 	if req.UserID <= 0 {
 		return nil, nil
 	}
 	conversationID := strings.TrimSpace(req.ConversationID)
 	query := memorymodel.ItemQuery{
 		UserID:         req.UserID,
 		ConversationID: conversationID,
 		Statuses:       normalizeManageStatuses(req.Statuses),
 		MemoryTypes:    normalizeMemoryTypes(req.MemoryTypes),
 		IncludeGlobal:  conversationID != "",
 		OnlyUnexpired:  false,
 		Limit:          normalizeLimit(req.Limit, defaultManageListLimit, maxManageListLimit),
 	}
 	items, err := s.itemRepo.FindByQuery(ctx, query)
 	if err != nil {
 		return nil, err
 	}
 	return toItemDTOs(items), nil
 }
 // DeleteItem 软删除一条记忆，并补写审计日志。
 //
 // 步骤化说明：
 // 1. 先在事务里读取当前条目快照，确保审计前镜像和实际删除对象一致；
 // 2. 若该条目已是 deleted，则直接按幂等语义返回，避免重复写多条删除审计；
 // 3. 状态更新成功后再写 audit log，保证“有删除就有审计”，失败时整笔事务回滚。
 func (s *ManageService) DeleteItem(ctx context.Context, req memorymodel.DeleteItemRequest) (*memorymodel.ItemDTO, error) {
 	if s == nil || s.db == nil || s.itemRepo == nil || s.auditRepo == nil {
 		return nil, errors.New("memory manage service is not initialized")
 	}
 	if req.UserID <= 0 || req.MemoryID <= 0 {
 		return nil, nil
 	}
 	now := time.Now()
 	operatorType := memoryutils.NormalizeOperatorType(req.OperatorType)
 	reason := normalizeDeleteReason(req.Reason)
 	var deletedItem model.MemoryItem
 	err := s.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
 		itemRepo := s.itemRepo.WithTx(tx)
 		auditRepo := s.auditRepo.WithTx(tx)
 		current, err := itemRepo.GetByIDForUser(ctx, req.UserID, req.MemoryID)
 		if err != nil {
 			return err
 		}
 		if current.Status == model.MemoryItemStatusDeleted {
 			deletedItem = *current
 			return nil
 		}
 		before := *current
 		after := before
 		after.Status = model.MemoryItemStatusDeleted
 		after.UpdatedAt = &now
 		if err = itemRepo.UpdateStatusByIDAt(ctx, req.UserID, req.MemoryID, model.MemoryItemStatusDeleted, now); err != nil {
 			return err
 		}
 		audit := memoryutils.BuildItemAuditLog(
 			req.MemoryID,
 			req.UserID,
 			memoryutils.AuditOperationDelete,
 			operatorType,
 			reason,
 			&before,
 			&after,
 		)
 		if err = auditRepo.Create(ctx, audit); err != nil {
 			return err
 		}
 		deletedItem = after
 		return nil
 	})
 	if err != nil {
 		return nil, err
 	}
 	if deletedItem.ID <= 0 {
 		return nil, nil
 	}
 	result := toItemDTO(deletedItem)
 	return &result, nil
 }
 // GetUserSetting 返回用户当前生效的记忆开关。
 //
 // 返回语义：
 // 1. 若数据库中还没有记录，返回系统默认开关，而不是 nil；
 // 2. 这样前端/上层调用方始终拿到完整结构，避免再做一层判空补默认值；
 // 3. 这里只读 settings，不附带修改动作。
 func (s *ManageService) GetUserSetting(ctx context.Context, userID int) (memorymodel.UserSettingDTO, error) {
 	if s == nil || s.settingsRepo == nil {
 		return memorymodel.UserSettingDTO{}, errors.New("memory manage service is nil")
 	}
 	if userID <= 0 {
 		return memorymodel.UserSettingDTO{}, nil
 	}
 	setting, err := s.settingsRepo.GetByUserID(ctx, userID)
 	if err != nil {
 		return memorymodel.UserSettingDTO{}, err
 	}
 	return toUserSettingDTO(memoryutils.EffectiveUserSetting(setting, userID)), nil
 }
 // UpsertUserSetting 写入用户记忆开关。
 //
 // 说明：
 // 1. 当前阶段先直接覆盖三类开关，不做 patch 语义；
 // 2. 这样便于前端把整块设置表单一次性提交，接口语义更稳定；
 // 3. 若后续需要记录设置变更审计，再单独扩展 setting audit，而不是复用 item audit。
 func (s *ManageService) UpsertUserSetting(ctx context.Context, req memorymodel.UpdateUserSettingRequest) (memorymodel.UserSettingDTO, error) {
 	if s == nil || s.settingsRepo == nil {
 		return memorymodel.UserSettingDTO{}, errors.New("memory manage service is nil")
 	}
 	if req.UserID <= 0 {
 		return memorymodel.UserSettingDTO{}, nil
 	}
 	now := time.Now()
 	setting := model.MemoryUserSetting{
 		UserID:                 req.UserID,
 		MemoryEnabled:          req.MemoryEnabled,
 		ImplicitMemoryEnabled:  req.ImplicitMemoryEnabled,
 		SensitiveMemoryEnabled: req.SensitiveMemoryEnabled,
 		UpdatedAt:              &now,
 	}
 	if err := s.settingsRepo.Upsert(ctx, setting); err != nil {
 		return memorymodel.UserSettingDTO{}, err
 	}
 	return toUserSettingDTO(setting), nil
 }
 func normalizeDeleteReason(reason string) string {
 	reason = strings.TrimSpace(reason)
 	if reason == "" {
 		return "用户删除记忆"
 	}
 	return reason
 }
--- a/backend/memory/service/read_service.go
+++ b/backend/memory/service/read_service.go
@@ -0,0 +1,347 @@
 package service
 import (
 	"context"
 	"fmt"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	"github.com/LoveLosita/smartflow/backend/model"
 )
 const (
 	defaultRetrieveLimit = 5
 	maxRetrieveLimit     = 20
 )
 // ReadService 负责 memory 模块内部的读取、门控与轻量重排。
 //
 // 职责边界：
 // 1. 负责把 memory_items 读出来并做用户设置过滤；
 // 2. 负责最小可用的排序与截断，为后续 prompt 注入提供稳定入口；
 // 3. 不直接依赖 newAgent，不负责真正把记忆拼进 prompt。
 type ReadService struct {
 	itemRepo     *memoryrepo.ItemRepo
 	settingsRepo *memoryrepo.SettingsRepo
 	ragRuntime   infrarag.Runtime
 	cfg          memorymodel.Config
 }
 func NewReadService(
 	itemRepo *memoryrepo.ItemRepo,
 	settingsRepo *memoryrepo.SettingsRepo,
 	ragRuntime infrarag.Runtime,
 	cfg memorymodel.Config,
 ) *ReadService {
 	return &ReadService{
 		itemRepo:     itemRepo,
 		settingsRepo: settingsRepo,
 		ragRuntime:   ragRuntime,
 		cfg:          cfg,
 	}
 }
 // Retrieve 读取可供后续注入使用的候选记忆。
 func (s *ReadService) Retrieve(ctx context.Context, req memorymodel.RetrieveRequest) ([]memorymodel.ItemDTO, error) {
 	if s == nil || s.itemRepo == nil || s.settingsRepo == nil {
 		return nil, nil
 	}
 	if req.UserID <= 0 {
 		return nil, nil
 	}
 	now := req.Now
 	if now.IsZero() {
 		now = time.Now()
 	}
 	setting, err := s.settingsRepo.GetByUserID(ctx, req.UserID)
 	if err != nil {
 		return nil, err
 	}
 	effectiveSetting := memoryutils.EffectiveUserSetting(setting, req.UserID)
 	if !effectiveSetting.MemoryEnabled {
 		return nil, nil
 	}
 	limit := normalizeLimit(req.Limit, defaultRetrieveLimit, maxRetrieveLimit)
 	if s.cfg.RAGEnabled && s.ragRuntime != nil && strings.TrimSpace(req.Query) != "" {
 		items, ragErr := s.retrieveByRAG(ctx, req, effectiveSetting, limit, now)
 		if ragErr == nil && len(items) > 0 {
 			return items, nil
 		}
 	}
 	return s.retrieveByLegacy(ctx, req, limit, now, effectiveSetting)
 }
 func (s *ReadService) retrieveByLegacy(
 	ctx context.Context,
 	req memorymodel.RetrieveRequest,
 	limit int,
 	now time.Time,
 	effectiveSetting model.MemoryUserSetting,
 ) ([]memorymodel.ItemDTO, error) {
 	if !effectiveSetting.MemoryEnabled {
 		return nil, nil
 	}
 	query := memorymodel.ItemQuery{
 		UserID:         req.UserID,
 		ConversationID: req.ConversationID,
 		AssistantID:    req.AssistantID,
 		RunID:          req.RunID,
 		Statuses:       []string{model.MemoryItemStatusActive},
 		MemoryTypes:    normalizeRetrieveMemoryTypes(req.MemoryTypes),
 		IncludeGlobal:  true,
 		OnlyUnexpired:  true,
 		Limit:          normalizeLimit(limit*3, limit*3, maxRetrieveLimit*3),
 		Now:            now,
 	}
 	items, err := s.itemRepo.FindByQuery(ctx, query)
 	if err != nil {
 		return nil, err
 	}
 	items = memoryutils.FilterItemsBySetting(items, effectiveSetting)
 	if len(items) == 0 {
 		return nil, nil
 	}
 	sort.SliceStable(items, func(i, j int) bool {
 		left := scoreRetrievedItem(items[i], now, req.ConversationID)
 		right := scoreRetrievedItem(items[j], now, req.ConversationID)
 		if left == right {
 			return items[i].ID > items[j].ID
 		}
 		return left > right
 	})
 	if len(items) > limit {
 		items = items[:limit]
 	}
 	_ = s.itemRepo.TouchLastAccessAt(ctx, collectMemoryIDs(items), now)
 	return toItemDTOs(items), nil
 }
 func (s *ReadService) retrieveByRAG(
 	ctx context.Context,
 	req memorymodel.RetrieveRequest,
 	effectiveSetting model.MemoryUserSetting,
 	limit int,
 	now time.Time,
 ) ([]memorymodel.ItemDTO, error) {
 	if !effectiveSetting.MemoryEnabled {
 		return nil, nil
 	}
 	result, err := s.ragRuntime.RetrieveMemory(ctx, infrarag.MemoryRetrieveRequest{
 		Query:          req.Query,
 		TopK:           limit,
 		Threshold:      s.cfg.Threshold,
 		Action:         "search",
 		UserID:         req.UserID,
 		ConversationID: req.ConversationID,
 		AssistantID:    req.AssistantID,
 		RunID:          req.RunID,
 		MemoryTypes:    normalizeRetrieveMemoryTypes(req.MemoryTypes),
 	})
 	if err != nil || result == nil || len(result.Items) == 0 {
 		return nil, err
 	}
 	items := make([]memorymodel.ItemDTO, 0, len(result.Items))
 	ids := make([]int64, 0, len(result.Items))
 	for _, hit := range result.Items {
 		dto, memoryID := buildMemoryDTOFromRetrieveHit(hit)
 		if !effectiveSetting.ImplicitMemoryEnabled && !dto.IsExplicit {
 			continue
 		}
 		if !effectiveSetting.SensitiveMemoryEnabled && dto.SensitivityLevel > 0 {
 			continue
 		}
 		if dto.ID <= 0 && memoryID > 0 {
 			dto.ID = memoryID
 		}
 		items = append(items, dto)
 		if dto.ID > 0 {
 			ids = append(ids, dto.ID)
 		}
 	}
 	if len(items) > limit {
 		items = items[:limit]
 	}
 	_ = s.itemRepo.TouchLastAccessAt(ctx, ids, now)
 	return items, nil
 }
 func normalizeRetrieveMemoryTypes(raw []string) []string {
 	normalized := normalizeMemoryTypes(raw)
 	if len(normalized) > 0 {
 		return normalized
 	}
 	return []string{
 		memorymodel.MemoryTypeConstraint,
 		memorymodel.MemoryTypePreference,
 		memorymodel.MemoryTypeTodoHint,
 		memorymodel.MemoryTypeFact,
 	}
 }
 func scoreRetrievedItem(item model.MemoryItem, now time.Time, conversationID string) float64 {
 	score := 0.35*clamp01(item.Importance) + 0.3*clamp01(item.Confidence) + 0.2*recencyScore(item, now)
 	if item.IsExplicit {
 		score += 0.1
 	}
 	if strValue(item.ConversationID) != "" && strValue(item.ConversationID) == conversationID {
 		score += 0.08
 	}
 	switch item.MemoryType {
 	case memorymodel.MemoryTypeConstraint:
 		score += 0.12
 	case memorymodel.MemoryTypePreference:
 		score += 0.08
 	case memorymodel.MemoryTypeTodoHint:
 		score += 0.05
 	}
 	return score
 }
 func recencyScore(item model.MemoryItem, now time.Time) float64 {
 	base := item.UpdatedAt
 	if base == nil {
 		base = item.CreatedAt
 	}
 	if base == nil || now.Before(*base) {
 		return 0.5
 	}
 	age := now.Sub(*base)
 	switch {
 	case age <= 24*time.Hour:
 		return 1
 	case age <= 7*24*time.Hour:
 		return 0.85
 	case age <= 30*24*time.Hour:
 		return 0.65
 	case age <= 90*24*time.Hour:
 		return 0.45
 	default:
 		return 0.25
 	}
 }
 func clamp01(v float64) float64 {
 	if v < 0 {
 		return 0
 	}
 	if v > 1 {
 		return 1
 	}
 	return v
 }
 func collectMemoryIDs(items []model.MemoryItem) []int64 {
 	if len(items) == 0 {
 		return nil
 	}
 	ids := make([]int64, 0, len(items))
 	for _, item := range items {
 		if item.ID <= 0 {
 			continue
 		}
 		ids = append(ids, item.ID)
 	}
 	return ids
 }
 func buildMemoryDTOFromRetrieveHit(hit infrarag.RetrieveHit) (memorymodel.ItemDTO, int64) {
 	memoryID := parseMemoryIDFromDocumentID(hit.DocumentID)
 	metadata := hit.Metadata
 	dto := memorymodel.ItemDTO{
 		ID:               memoryID,
 		UserID:           int(readFloatLike(metadata["user_id"])),
 		ConversationID:   readString(metadata["conversation_id"]),
 		AssistantID:      readString(metadata["assistant_id"]),
 		RunID:            readString(metadata["run_id"]),
 		MemoryType:       readString(metadata["memory_type"]),
 		Title:            readString(metadata["title"]),
 		Content:          strings.TrimSpace(hit.Text),
 		Confidence:       readFloatLike(metadata["confidence"]),
 		Importance:       readFloatLike(metadata["importance"]),
 		SensitivityLevel: int(readFloatLike(metadata["sensitivity_level"])),
 		IsExplicit:       readBoolLike(metadata["is_explicit"]),
 		Status:           readString(metadata["status"]),
 		TTLAt:            readTimeLike(metadata["ttl_at"]),
 	}
 	return dto, memoryID
 }
 func parseMemoryIDFromDocumentID(documentID string) int64 {
 	documentID = strings.TrimSpace(documentID)
 	if !strings.HasPrefix(documentID, "memory:") {
 		return 0
 	}
 	raw := strings.TrimPrefix(documentID, "memory:")
 	if strings.HasPrefix(raw, "uid:") {
 		return 0
 	}
 	parsed, err := strconv.ParseInt(raw, 10, 64)
 	if err != nil {
 		return 0
 	}
 	return parsed
 }
 func readString(v any) string {
 	if v == nil {
 		return ""
 	}
 	return strings.TrimSpace(fmt.Sprintf("%v", v))
 }
 func readFloatLike(v any) float64 {
 	switch value := v.(type) {
 	case float64:
 		return value
 	case float32:
 		return float64(value)
 	case int:
 		return float64(value)
 	case int64:
 		return float64(value)
 	case string:
 		parsed, err := strconv.ParseFloat(strings.TrimSpace(value), 64)
 		if err == nil {
 			return parsed
 		}
 	}
 	return 0
 }
 func readBoolLike(v any) bool {
 	switch value := v.(type) {
 	case bool:
 		return value
 	case string:
 		return strings.EqualFold(strings.TrimSpace(value), "true")
 	default:
 		return false
 	}
 }
 func readTimeLike(v any) *time.Time {
 	text := readString(v)
 	if text == "" {
 		return nil
 	}
 	parsed, err := time.Parse(time.RFC3339, text)
 	if err != nil {
 		return nil
 	}
 	return &parsed
 }
--- a/backend/memory/utils/audit.go
+++ b/backend/memory/utils/audit.go
@@ -0,0 +1,71 @@
 package utils
 import (
 	"encoding/json"
 	"strings"
 	"github.com/LoveLosita/smartflow/backend/model"
 )
 const (
 	// AuditOperationCreate 表示系统新建一条记忆。
 	AuditOperationCreate = "create"
 	// AuditOperationDelete 表示对已有记忆做软删除。
 	AuditOperationDelete = "delete"
 )
 // BuildItemAuditLog 构造记忆变更审计日志。
 //
 // 职责边界：
 // 1. 负责把 before/after 快照统一序列化为审计日志结构；
 // 2. 不负责决定“是否应该写审计”，该决策由上层 service/worker 控制；
 // 3. 不负责落库，调用方仍需显式调用 AuditRepo。
 func BuildItemAuditLog(
 	memoryID int64,
 	userID int,
 	operation string,
 	operatorType string,
 	reason string,
 	before *model.MemoryItem,
 	after *model.MemoryItem,
 ) model.MemoryAuditLog {
 	return model.MemoryAuditLog{
 		MemoryID:     memoryID,
 		UserID:       userID,
 		Operation:    strings.TrimSpace(operation),
 		OperatorType: NormalizeOperatorType(operatorType),
 		Reason:       strings.TrimSpace(reason),
 		BeforeJSON:   marshalMemoryItemSnapshot(before),
 		AfterJSON:    marshalMemoryItemSnapshot(after),
 	}
 }
 // NormalizeOperatorType 统一规整审计操作者类型。
 //
 // 规则说明：
 // 1. 目前只接受 user/system 两类固定值；
 // 2. 空值或未知值统一回退为 user，避免把脏值直接写进审计表；
 // 3. 若后续扩展 admin/tool 等类型，再在这里集中放开即可。
 func NormalizeOperatorType(raw string) string {
 	switch strings.ToLower(strings.TrimSpace(raw)) {
 	case "system":
 		return "system"
 	default:
 		return "user"
 	}
 }
 func marshalMemoryItemSnapshot(item *model.MemoryItem) *string {
 	if item == nil {
 		return nil
 	}
 	raw, err := json.Marshal(item)
 	if err != nil {
 		empty := "{}"
 		return &empty
 	}
 	value := string(raw)
 	return &value
 }
--- a/backend/memory/utils/normalize_facts.go
+++ b/backend/memory/utils/normalize_facts.go
@@ -49,6 +49,11 @@ func NormalizeFacts(candidates []memorymodel.FactCandidate) []memorymodel.Normal
 		if confidence == 0 {
 			confidence = 0.6
 		}
 		importance := clamp01(candidate.Importance)
 		if importance == 0 {
 			importance = defaultImportanceByType(memoryType)
 		}
 		sensitivityLevel := clampInt(candidate.SensitivityLevel, 0, 2)
 		normalizedContent := strings.ToLower(content)
 		contentHash := hashContent(memoryType, normalizedContent)
@@ -65,6 +70,8 @@ func NormalizeFacts(candidates []memorymodel.FactCandidate) []memorymodel.Normal
 			NormalizedContent: normalizedContent,
 			ContentHash:       contentHash,
 			Confidence:        confidence,
 			Importance:        importance,
 			SensitivityLevel:  sensitivityLevel,
 			IsExplicit:        candidate.IsExplicit,
 		})
 	}
@@ -96,6 +103,29 @@ func clamp01(v float64) float64 {
 	return v
 }
 func clampInt(v, minValue, maxValue int) int {
 	if v < minValue {
 		return minValue
 	}
 	if v > maxValue {
 		return maxValue
 	}
 	return v
 }
 func defaultImportanceByType(memoryType string) float64 {
 	switch memoryType {
 	case memorymodel.MemoryTypePreference:
 		return 0.85
 	case memorymodel.MemoryTypeConstraint:
 		return 0.95
 	case memorymodel.MemoryTypeTodoHint:
 		return 0.8
 	default:
 		return 0.6
 	}
 }
 func hashContent(memoryType, normalizedContent string) string {
 	sum := sha256.Sum256([]byte(memoryType + "::" + normalizedContent))
 	return hex.EncodeToString(sum[:])
--- a/backend/memory/utils/settings.go
+++ b/backend/memory/utils/settings.go
@@ -0,0 +1,62 @@
 package utils
 import (
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	"github.com/LoveLosita/smartflow/backend/model"
 )
 // EffectiveUserSetting 返回用户记忆设置的生效值。
 //
 // 规则说明：
 // 1. 用户未显式配置时，走系统默认值；
 // 2. 默认允许普通记忆和隐式记忆，但默认关闭敏感记忆；
 // 3. 返回值始终是完整对象，方便调用方直接使用，不再分支判空。
 func EffectiveUserSetting(setting *model.MemoryUserSetting, userID int) model.MemoryUserSetting {
 	if setting == nil {
 		return model.MemoryUserSetting{
 			UserID:                 userID,
 			MemoryEnabled:          true,
 			ImplicitMemoryEnabled:  true,
 			SensitiveMemoryEnabled: false,
 		}
 	}
 	return *setting
 }
 // FilterFactsBySetting 按用户记忆开关过滤候选事实。
 func FilterFactsBySetting(facts []memorymodel.NormalizedFact, setting model.MemoryUserSetting) []memorymodel.NormalizedFact {
 	if !setting.MemoryEnabled || len(facts) == 0 {
 		return nil
 	}
 	result := make([]memorymodel.NormalizedFact, 0, len(facts))
 	for _, fact := range facts {
 		if !setting.ImplicitMemoryEnabled && !fact.IsExplicit {
 			continue
 		}
 		if !setting.SensitiveMemoryEnabled && fact.SensitivityLevel > 0 {
 			continue
 		}
 		result = append(result, fact)
 	}
 	return result
 }
 // FilterItemsBySetting 按用户记忆开关过滤已入库记忆。
 func FilterItemsBySetting(items []model.MemoryItem, setting model.MemoryUserSetting) []model.MemoryItem {
 	if !setting.MemoryEnabled || len(items) == 0 {
 		return nil
 	}
 	result := make([]model.MemoryItem, 0, len(items))
 	for _, item := range items {
 		if !setting.ImplicitMemoryEnabled && !item.IsExplicit {
 			continue
 		}
 		if !setting.SensitiveMemoryEnabled && item.SensitivityLevel > 0 {
 			continue
 		}
 		result = append(result, item)
 	}
 	return result
 }
--- a/backend/memory/worker/loop.go
+++ b/backend/memory/worker/loop.go
@@ -0,0 +1,56 @@
 package worker
 import (
 	"context"
 	"log"
 	"time"
 )
 // RunPollingLoop 持续轮询 memory_jobs，把异步 worker 真正跑起来。
 //
 // 职责边界：
 // 1. 这里只负责“循环 + 轮询频率 + 批量触发”；
 // 2. 不负责抽取逻辑，也不负责落库逻辑；
 // 3. 任意一次 RunOnce 报错时只打日志并继续下一轮，避免整个后台循环退出。
 func RunPollingLoop(ctx context.Context, runner *Runner, pollEvery time.Duration, claimBatch int) {
 	if runner == nil {
 		return
 	}
 	if runner.logger == nil {
 		runner.logger = log.Default()
 	}
 	if pollEvery <= 0 {
 		pollEvery = 2 * time.Second
 	}
 	if claimBatch <= 0 {
 		claimBatch = 1
 	}
 	runBatch := func() {
 		for i := 0; i < claimBatch; i++ {
 			result, err := runner.RunOnce(ctx)
 			if err != nil {
 				runner.logger.Printf("memory worker loop run once failed: %v", err)
 				return
 			}
 			if result == nil || !result.Claimed {
 				return
 			}
 		}
 	}
 	runBatch()
 	ticker := time.NewTicker(pollEvery)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-ctx.Done():
 			runner.logger.Printf("memory worker loop stopped: %v", ctx.Err())
 			return
 		case <-ticker.C:
 			runBatch()
 		}
 	}
 }
--- a/backend/memory/worker/runner.go
+++ b/backend/memory/worker/runner.go
@@ -6,14 +6,19 @@ import (
 	"errors"
 	"fmt"
 	"log"
 	"strconv"
 	"strings"
 	"time"
 	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryutils "github.com/LoveLosita/smartflow/backend/memory/utils"
 	"github.com/LoveLosita/smartflow/backend/model"
 	"gorm.io/gorm"
 )
-// RunOnceResult 描述一次手工触发执行结果。
+// RunOnceResult 描述单次手工触发执行的结果。
 type RunOnceResult struct {
 	Claimed bool
 	JobID   int64
@@ -21,36 +26,57 @@ type RunOnceResult struct {
 	Facts   int
 }
-// Runner 是 Day1 首版任务执行器。
+// Runner 负责把 memory_jobs 推进成 memory_items 和审计日志。
 //
 // 职责边界：
-// 1. 只负责推进 memory_jobs 状态机；
+// 1. 负责任务抢占、抽取、落库和状态推进；
-// 2. Day1 不做 memory_items 真正落库，仅做 mock 抽取与状态推进。
+// 2. 不负责 outbox 消费，也不负责 LLM prompt 组装；
 // 3. 失败时只做可恢复的状态回写，避免把业务错误直接抛到启动层。
 type Runner struct {
 	db           *gorm.DB
 	jobRepo      *memoryrepo.JobRepo
 	itemRepo     *memoryrepo.ItemRepo
 	auditRepo    *memoryrepo.AuditRepo
 	settingsRepo *memoryrepo.SettingsRepo
 	extractor    Extractor
 	ragRuntime   infrarag.Runtime
 	logger       *log.Logger
 }
-func NewRunner(jobRepo *memoryrepo.JobRepo, extractor Extractor) *Runner {
+// NewRunner 构造记忆 worker 执行器。
 func NewRunner(
 	db *gorm.DB,
 	jobRepo *memoryrepo.JobRepo,
 	itemRepo *memoryrepo.ItemRepo,
 	auditRepo *memoryrepo.AuditRepo,
 	settingsRepo *memoryrepo.SettingsRepo,
 	extractor Extractor,
 	ragRuntime infrarag.Runtime,
 ) *Runner {
 	return &Runner{
 		db:           db,
 		jobRepo:      jobRepo,
 		itemRepo:     itemRepo,
 		auditRepo:    auditRepo,
 		settingsRepo: settingsRepo,
 		extractor:    extractor,
 		ragRuntime:   ragRuntime,
 		logger:       log.Default(),
 	}
 }
-// RunOnce 手工执行一次任务抢占与处理。
+// RunOnce 手工执行一轮任务处理。
 //
 // 返回语义：
-// 1. Claimed=false 表示当前无可执行任务；
+// 1. Claimed=false 表示当前没有可执行任务；
-// 2. Claimed=true 且 Status=success/failed/dead 表示状态已推进完成。
+// 2. Claimed=true 且 Status=success/failed/dead 表示本轮已经推进过一个任务；
 // 3. 只有初始化缺失或数据库级错误才返回 error。
 func (r *Runner) RunOnce(ctx context.Context) (*RunOnceResult, error) {
-	if r == nil || r.jobRepo == nil || r.extractor == nil {
+	if r == nil || r.db == nil || r.jobRepo == nil || r.itemRepo == nil || r.auditRepo == nil || r.settingsRepo == nil || r.extractor == nil {
 		return nil, errors.New("memory worker runner is not initialized")
 	}
-	// 1. 抢占一条可执行任务，避免并发 worker 重复处理同一记录。
+	// 1. 先抢占一条可执行任务，避免多个 worker 重复处理同一条记录。
 	job, err := r.jobRepo.ClaimNextRunnableExtractJob(ctx, time.Now())
 	if err != nil {
 		return nil, err
@@ -66,7 +92,7 @@ func (r *Runner) RunOnce(ctx context.Context) (*RunOnceResult, error) {
 		Facts:   0,
 	}
-	// 2. 解析 payload_json。解析失败属于数据质量问题，走失败重试并打日志。
+	// 2. 解析任务载荷。这里属于数据质量问题，解析失败就直接标记为可重试失败。
 	var payload memorymodel.ExtractJobPayload
 	if err = json.Unmarshal([]byte(job.PayloadJSON), &payload); err != nil {
 		failReason := fmt.Sprintf("解析任务载荷失败: %v", err)
@@ -75,7 +101,22 @@ func (r *Runner) RunOnce(ctx context.Context) (*RunOnceResult, error) {
 		return result, nil
 	}
-	// 3. 调用抽取器执行 mock 抽取。Day1 先保证“能推进状态”，不引入重计算。
+	// 3. 先读取用户记忆设置。总开关关闭时，任务直接成功结束，不再继续抽取和落库。
 	setting, err := r.settingsRepo.GetByUserID(ctx, payload.UserID)
 	if err != nil {
 		return nil, err
 	}
 	effectiveSetting := memoryutils.EffectiveUserSetting(setting, payload.UserID)
 	if !effectiveSetting.MemoryEnabled {
 		if err = r.jobRepo.MarkSuccess(ctx, job.ID); err != nil {
 			return nil, err
 		}
 		result.Status = model.MemoryJobStatusSuccess
 		r.logger.Printf("memory worker skipped by user setting: job_id=%d user_id=%d", job.ID, payload.UserID)
 		return result, nil
 	}
 	// 4. 调用抽取器。LLM 失败时由编排器做保守 fallback，worker 只关心最终结果。
 	facts, extractErr := r.extractor.ExtractFacts(ctx, payload)
 	if extractErr != nil {
 		failReason := fmt.Sprintf("抽取执行失败: %v", extractErr)
@@ -83,13 +124,213 @@ func (r *Runner) RunOnce(ctx context.Context) (*RunOnceResult, error) {
 		result.Status = model.MemoryJobStatusFailed
 		return result, nil
 	}
 	facts = memoryutils.FilterFactsBySetting(facts, effectiveSetting)
-	// 4. 抽取成功后把任务置为 success。
+	if len(facts) == 0 {
 		if err = r.jobRepo.MarkSuccess(ctx, job.ID); err != nil {
 			return nil, err
 		}
 		result.Status = model.MemoryJobStatusSuccess
-	result.Facts = len(facts)
+		r.logger.Printf("memory worker run once noop: job_id=%d", job.ID)
 	r.logger.Printf("memory worker run once success: job_id=%d extracted_facts=%d", job.ID, len(facts))
 		return result, nil
 	}
 	items := buildMemoryItems(job, payload, facts)
 	if len(items) == 0 {
 		if err = r.jobRepo.MarkSuccess(ctx, job.ID); err != nil {
 			return nil, err
 		}
 		result.Status = model.MemoryJobStatusSuccess
 		r.logger.Printf("memory worker run once empty-after-normalize: job_id=%d", job.ID)
 		return result, nil
 	}
 	// 5. 先在事务里写入记忆条目和审计日志，再统一确认 job 成功。
 	if err = r.persistMemoryWrite(ctx, job.ID, items); err != nil {
 		failReason := fmt.Sprintf("记忆落库失败: %v", err)
 		_ = r.jobRepo.MarkFailed(ctx, job.ID, failReason)
 		result.Status = model.MemoryJobStatusFailed
 		return result, nil
 	}
 	result.Status = model.MemoryJobStatusSuccess
 	result.Facts = len(items)
 	r.syncMemoryVectors(ctx, items)
 	r.logger.Printf("memory worker run once success: job_id=%d extracted_facts=%d", job.ID, len(items))
 	return result, nil
 }
 func (r *Runner) persistMemoryWrite(ctx context.Context, jobID int64, items []model.MemoryItem) error {
 	return r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
 		jobRepo := r.jobRepo.WithTx(tx)
 		itemRepo := r.itemRepo.WithTx(tx)
 		auditRepo := r.auditRepo.WithTx(tx)
 		if err := itemRepo.UpsertItems(ctx, items); err != nil {
 			return err
 		}
 		for i := range items {
 			audit := memoryutils.BuildItemAuditLog(
 				items[i].ID,
 				items[i].UserID,
 				memoryutils.AuditOperationCreate,
 				"system",
 				"LLM 提取入库",
 				nil,
 				&items[i],
 			)
 			if err := auditRepo.Create(ctx, audit); err != nil {
 				return err
 			}
 		}
 		return jobRepo.MarkSuccess(ctx, jobID)
 	})
 }
 func buildMemoryItems(job *model.MemoryJob, payload memorymodel.ExtractJobPayload, facts []memorymodel.NormalizedFact) []model.MemoryItem {
 	if job == nil || len(facts) == 0 {
 		return nil
 	}
 	items := make([]model.MemoryItem, 0, len(facts))
 	for _, fact := range facts {
 		items = append(items, model.MemoryItem{
 			UserID:            payload.UserID,
 			ConversationID:    strPtrOrNil(payload.ConversationID),
 			AssistantID:       strPtrOrNil(payload.AssistantID),
 			RunID:             strPtrOrNil(payload.RunID),
 			MemoryType:        fact.MemoryType,
 			Title:             fact.Title,
 			Content:           fact.Content,
 			NormalizedContent: strPtrFromValue(fact.NormalizedContent),
 			ContentHash:       strPtrFromValue(fact.ContentHash),
 			Confidence:        fact.Confidence,
 			Importance:        fact.Importance,
 			SensitivityLevel:  fact.SensitivityLevel,
 			SourceMessageID:   int64PtrOrNil(payload.SourceMessageID),
 			SourceEventID:     job.SourceEventID,
 			IsExplicit:        fact.IsExplicit,
 			Status:            model.MemoryItemStatusActive,
 			TTLAt:             resolveMemoryTTLAt(payload.OccurredAt, fact.MemoryType),
 			VectorStatus:      "pending",
 		})
 	}
 	return items
 }
 func (r *Runner) syncMemoryVectors(ctx context.Context, items []model.MemoryItem) {
 	if r == nil || r.ragRuntime == nil || r.itemRepo == nil || len(items) == 0 {
 		return
 	}
 	requestItems := make([]infrarag.MemoryIngestItem, 0, len(items))
 	for _, item := range items {
 		requestItems = append(requestItems, infrarag.MemoryIngestItem{
 			MemoryID:         item.ID,
 			UserID:           item.UserID,
 			ConversationID:   strValue(item.ConversationID),
 			AssistantID:      strValue(item.AssistantID),
 			RunID:            strValue(item.RunID),
 			MemoryType:       item.MemoryType,
 			Title:            item.Title,
 			Content:          item.Content,
 			Confidence:       item.Confidence,
 			Importance:       item.Importance,
 			SensitivityLevel: item.SensitivityLevel,
 			IsExplicit:       item.IsExplicit,
 			Status:           item.Status,
 			TTLAt:            item.TTLAt,
 			CreatedAt:        item.CreatedAt,
 		})
 	}
 	result, err := r.ragRuntime.IngestMemory(ctx, infrarag.MemoryIngestRequest{
 		Action: "add",
 		Items:  requestItems,
 	})
 	if err != nil {
 		r.logger.Printf("memory vector sync failed: err=%v", err)
 		for _, item := range items {
 			_ = r.itemRepo.UpdateVectorStateByID(ctx, item.ID, "failed", nil)
 		}
 		return
 	}
 	vectorIDMap := make(map[int64]string, len(result.DocumentIDs))
 	for _, documentID := range result.DocumentIDs {
 		memoryID := parseMemoryID(documentID)
 		if memoryID <= 0 {
 			continue
 		}
 		vectorIDMap[memoryID] = documentID
 	}
 	for _, item := range items {
 		vectorID := strPtrOrNil(vectorIDMap[item.ID])
 		_ = r.itemRepo.UpdateVectorStateByID(ctx, item.ID, "synced", vectorID)
 	}
 }
 func resolveMemoryTTLAt(base time.Time, memoryType string) *time.Time {
 	switch memoryType {
 	case memorymodel.MemoryTypeTodoHint:
 		t := base.Add(30 * 24 * time.Hour)
 		return &t
 	case memorymodel.MemoryTypeFact:
 		t := base.Add(180 * 24 * time.Hour)
 		return &t
 	default:
 		return nil
 	}
 }
 func strPtrFromValue(v string) *string {
 	v = strings.TrimSpace(v)
 	if v == "" {
 		return nil
 	}
 	value := v
 	return &value
 }
 func strPtrOrNil(v string) *string {
 	v = strings.TrimSpace(v)
 	if v == "" {
 		return nil
 	}
 	value := v
 	return &value
 }
 func int64PtrOrNil(v int64) *int64 {
 	if v <= 0 {
 		return nil
 	}
 	value := v
 	return &value
 }
 func strValue(v *string) string {
 	if v == nil {
 		return ""
 	}
 	return strings.TrimSpace(*v)
 }
 func parseMemoryID(documentID string) int64 {
 	documentID = strings.TrimSpace(documentID)
 	if !strings.HasPrefix(documentID, "memory:") {
 		return 0
 	}
 	raw := strings.TrimPrefix(documentID, "memory:")
 	if strings.HasPrefix(raw, "uid:") {
 		return 0
 	}
 	memoryID, err := strconv.ParseInt(raw, 10, 64)
 	if err != nil {
 		return 0
 	}
 	return memoryID
 }
--- a/backend/middleware/cache_deleter.go
+++ b/backend/middleware/cache_deleter.go
@@ -11,6 +11,12 @@ import (
 	"gorm.io/gorm"
 )
 // GormCachePlugin 负责在 GORM 写操作成功后，按模型类型触发对应缓存失效。
 //
 // 职责边界：
 // 1. 只负责“识别模型 -> 调用对应缓存删除逻辑”；
 // 2. 不负责业务事务提交，也不负责缓存回填；
 // 3. 只处理当前项目真正依赖的前台读取缓存，未接缓存的模型应静默忽略。
 type GormCachePlugin struct {
 	cacheDAO *dao.CacheDAO
 }
@@ -21,14 +27,13 @@ func NewGormCachePlugin(dao *dao.CacheDAO) *GormCachePlugin {
 	}
 }
-// Name 插件名称
+// Name 返回 GORM 插件名。
 func (p *GormCachePlugin) Name() string {
 	return "GormCachePlugin"
 }
-// Initialize 注册 GORM 钩子
+// Initialize 注册 create/update/delete 成功后的统一失效钩子。
 func (p *GormCachePlugin) Initialize(db *gorm.DB) error {
 	// 在增、删、改成功后，统一触发清理逻辑
 	_ = db.Callback().Create().After("gorm:create").Register("clear_related_cache_after_create", p.afterWrite)
 	_ = db.Callback().Update().After("gorm:update").Register("clear_related_cache_after_update", p.afterWrite)
 	_ = db.Callback().Delete().After("gorm:delete").Register("clear_related_cache_after_delete", p.afterWrite)
@@ -40,24 +45,28 @@ func (p *GormCachePlugin) afterWrite(db *gorm.DB) {
 		return
 	}
-	// 获取 Model 的真实值（剥掉所有指针）
+	// 1. 先剥掉所有指针，拿到真实模型值。
 	// 2. 若本次写入的是切片，按“切片元素类型”分发缓存逻辑即可。
 	val := reflect.Indirect(reflect.ValueOf(db.Statement.Model))
 	// 如果是切片，拿切片里元素的类型
 	if val.Kind() == reflect.Slice {
 		if val.Len() > 0 {
-			p.dispatchCacheLogic(val.Index(0).Interface(), db)
+			p.dispatchCacheLogic(val.Index(0).Interface())
 		}
 	} else {
 		p.dispatchCacheLogic(val.Interface(), db)
 		}
 		return
 	}
-// 根据不同的 Model 类型，调用不同的缓存失效逻辑
+	p.dispatchCacheLogic(val.Interface())
-func (p *GormCachePlugin) dispatchCacheLogic(modelObj interface{}, db *gorm.DB) {
+}
 // dispatchCacheLogic 根据模型类型决定是否需要缓存失效。
 //
 // 步骤说明：
 // 1. 先匹配真正有前台缓存读取依赖的模型，命中后执行对应删除逻辑；
 // 2. 对已确认“不需要缓存失效”的模型显式静默忽略，避免正常链路反复刷屏；
 // 3. 只有未知模型才打印日志，方便后续补齐遗漏的缓存策略。
 func (p *GormCachePlugin) dispatchCacheLogic(modelObj interface{}) {
 	switch m := modelObj.(type) {
 	case model.Schedule:
 		// 无论传的是 &s, s, 还是 &[]s，剥开后都是 model.Schedule
 		p.invalidScheduleCache(m.UserID, m.Week)
 	case model.TaskClass:
 		p.invalidTaskClassCache(*m.UserID)
@@ -69,10 +78,16 @@ func (p *GormCachePlugin) dispatchCacheLogic(modelObj interface{}, db *gorm.DB)
 		p.invalidConversationHistoryCache(m.UserID, m.ChatID)
 	case model.AgentChat:
 		p.invalidConversationHistoryCache(m.UserID, m.ChatID)
-	case model.AgentOutboxMessage, model.User:
+	case model.AgentOutboxMessage,
-		// 这些模型目前没有定义缓存逻辑，先不处理
+		model.User,
 		model.AgentStateSnapshotRecord,
 		model.MemoryJob,
 		model.MemoryItem,
 		model.MemoryAuditLog,
 		model.MemoryUserSetting:
 		// 这些模型当前没有前台缓存读取链路依赖，故意静默忽略。
 		return
 	default:
 		// 只有真正没定义的模型才会到这里
 		log.Printf("[GORM-Cache] No logic defined for model: %T", modelObj)
 	}
 }
@@ -81,13 +96,14 @@ func (p *GormCachePlugin) invalidScheduleCache(userID int, week int) {
 	if userID == 0 || week == 0 {
 		return
 	}
-	// 3. 异步执行，不阻塞主业务事务
+
 	// 1. 异步删除缓存，避免阻塞主事务提交。
 	// 2. 周视图变化后，同时清今天/最近完成/进行中缓存，保证口径一致。
 	go func() {
 		// 这里调用你的 CacheDAO 删缓存
 		_ = p.cacheDAO.DeleteUserWeeklyScheduleFromCache(context.Background(), userID, week)
-		_ = p.cacheDAO.DeleteUserTodayScheduleFromCache(context.Background(), userID)            // 同时删当天日程的缓存，确保数据一致
+		_ = p.cacheDAO.DeleteUserTodayScheduleFromCache(context.Background(), userID)
-		_ = p.cacheDAO.DeleteUserRecentCompletedSchedulesFromCache(context.Background(), userID) // 同时删最近完成日程的缓存，确保数据一致
+		_ = p.cacheDAO.DeleteUserRecentCompletedSchedulesFromCache(context.Background(), userID)
-		_ = p.cacheDAO.DeleteUserOngoingScheduleFromCache(context.Background(), userID)          // 同时删正在进行日程的缓存，确保数据一致
+		_ = p.cacheDAO.DeleteUserOngoingScheduleFromCache(context.Background(), userID)
 		log.Printf("[GORM-Cache] Invalidated cache for user %d, week %d", userID, week)
 	}()
 }
@@ -96,6 +112,7 @@ func (p *GormCachePlugin) invalidTaskClassCache(userID int) {
 	if userID == 0 {
 		return
 	}
 	go func() {
 		_ = p.cacheDAO.DeleteTaskClassList(context.Background(), userID)
 		log.Printf("[GORM-Cache] Invalidated task class list cache for user %d", userID)
@@ -106,6 +123,7 @@ func (p *GormCachePlugin) invalidTaskCache(userID int) {
 	if userID == 0 {
 		return
 	}
 	go func() {
 		_ = p.cacheDAO.DeleteUserTasksFromCache(context.Background(), userID)
 		log.Printf("[GORM-Cache] Invalidated task list cache for user %d", userID)
@@ -117,10 +135,10 @@ func (p *GormCachePlugin) invalidSchedulePlanPreviewCache(userID int, conversati
 	if userID == 0 || normalizedConversationID == "" {
 		return
 	}
 	go func() {
-		// 1. 这里的调用目的：当排程状态快照发生覆盖写入时，主动删除对应会话预览缓存。
+		// 1. 排程快照被覆盖后，预览缓存必须同步删除，避免 Redis 里继续挂旧结果。
-		// 2. 这样可以避免“Redis 里还是旧预览，但 MySQL 已经是新快照”的短暂口径不一致。
+		// 2. 删除失败只记日志，不影响主事务，因为缓存永远是可回源的副本。
 		// 3. 失败策略：缓存删除失败只记日志，不影响主事务提交。
 		if err := p.cacheDAO.DeleteSchedulePlanPreviewFromCache(context.Background(), userID, normalizedConversationID); err != nil {
 			log.Printf("[GORM-Cache] Failed to invalidate schedule preview cache for user %d conversation %s: %v", userID, normalizedConversationID, err)
 			return
@@ -134,10 +152,10 @@ func (p *GormCachePlugin) invalidConversationHistoryCache(userID int, conversati
 	if userID == 0 || normalizedConversationID == "" {
 		return
 	}
 	go func() {
-		// 1. 这里的调用目的：当聊天历史写入或重试补种更新后，删除“前端历史视图缓存”。
+		// 1. 聊天历史写入或重试补种后，删除历史视图缓存，保证下次列表/详情能拿到最新版本。
-		// 2. 这样下次访问 conversation-history 时会回源 DB，并把最新 retry 版本完整回填缓存。
+		// 2. 这里只清“前台历史视图缓存”，不碰 LLM 上下文热缓存，避免影响首 token 体验。
 		// 3. 注意：这里只删历史视图缓存，不删 Agent 上下文热缓存，避免影响聊天首 token。
 		if err := p.cacheDAO.DeleteConversationHistoryFromCache(context.Background(), userID, normalizedConversationID); err != nil {
 			log.Printf("[GORM-Cache] Failed to invalidate conversation history cache for user %d conversation %s: %v", userID, normalizedConversationID, err)
 			return
--- a/backend/newAgent/model/graph_run_state.go
+++ b/backend/newAgent/model/graph_run_state.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"strings"
-	newagentllm "github.com/LoveLosita/smartflow/backend/newAgent/llm"
+	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
 )
@@ -57,10 +57,10 @@ type WriteSchedulePreviewFunc func(ctx context.Context, state *newagenttools.Sch
 // 2. Chat/Plan/Execute/Deliver 允许分别挂不同 client，但也允许先复用同一个 client；
 // 3. ChunkEmitter 统一承接阶段提示、正文、工具事件、确认请求等 SSE 输出。
 type AgentGraphDeps struct {
-	ChatClient           *newagentllm.Client
+	ChatClient           *infrallm.Client
-	PlanClient           *newagentllm.Client
+	PlanClient           *infrallm.Client
-	ExecuteClient        *newagentllm.Client
+	ExecuteClient        *infrallm.Client
-	DeliverClient        *newagentllm.Client
+	DeliverClient        *infrallm.Client
 	ChunkEmitter         *newagentstream.ChunkEmitter
 	StateStore           AgentStateStore
 	ToolRegistry         *newagenttools.ToolRegistry
@@ -87,7 +87,7 @@ func (d *AgentGraphDeps) EnsureChunkEmitter() *newagentstream.ChunkEmitter {
 }
 // ResolveChatClient 返回 chat 阶段可用的模型客户端。
-func (d *AgentGraphDeps) ResolveChatClient() *newagentllm.Client {
+func (d *AgentGraphDeps) ResolveChatClient() *infrallm.Client {
 	if d == nil {
 		return nil
 	}
@@ -100,7 +100,7 @@ func (d *AgentGraphDeps) ResolveChatClient() *newagentllm.Client {
 // 1. 优先使用显式注入的 PlanClient；
 // 2. 若未单独注入，则回退到 ChatClient；
 // 3. 这样在骨架期可先用一套 client 跑通，再按需拆分 strategist / worker。
-func (d *AgentGraphDeps) ResolvePlanClient() *newagentllm.Client {
+func (d *AgentGraphDeps) ResolvePlanClient() *infrallm.Client {
 	if d == nil {
 		return nil
 	}
@@ -111,7 +111,7 @@ func (d *AgentGraphDeps) ResolvePlanClient() *newagentllm.Client {
 }
 // ResolveExecuteClient 返回 execute 阶段可用的模型客户端。
-func (d *AgentGraphDeps) ResolveExecuteClient() *newagentllm.Client {
+func (d *AgentGraphDeps) ResolveExecuteClient() *infrallm.Client {
 	if d == nil {
 		return nil
 	}
@@ -125,7 +125,7 @@ func (d *AgentGraphDeps) ResolveExecuteClient() *newagentllm.Client {
 }
 // ResolveDeliverClient 返回 deliver 阶段可用的模型客户端。
-func (d *AgentGraphDeps) ResolveDeliverClient() *newagentllm.Client {
+func (d *AgentGraphDeps) ResolveDeliverClient() *infrallm.Client {
 	if d == nil {
 		return nil
 	}
--- a/backend/newAgent/node/chat.go
+++ b/backend/newAgent/node/chat.go
@@ -9,7 +9,7 @@ import (
 	"github.com/cloudwego/eino/schema"
-	newagentllm "github.com/LoveLosita/smartflow/backend/newAgent/llm"
+	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
@@ -46,7 +46,7 @@ type ChatNodeInput struct {
 	ConversationContext *newagentmodel.ConversationContext
 	UserInput           string
 	ConfirmAction       string
-	Client              *newagentllm.Client
+	Client              *infrallm.Client
 	ChunkEmitter        *newagentstream.ChunkEmitter
 }
@@ -91,14 +91,14 @@ func RunChatNode(ctx context.Context, input ChatNodeInput) error {
 	}
 	messages := newagentprompt.BuildChatRoutingMessages(conversationContext, input.UserInput, flowState)
-	decision, rawResult, err := newagentllm.GenerateJSON[newagentmodel.ChatRoutingDecision](
+	decision, rawResult, err := infrallm.GenerateJSON[newagentmodel.ChatRoutingDecision](
 		ctx,
 		input.Client,
 		messages,
-		newagentllm.GenerateOptions{
+		infrallm.GenerateOptions{
 			Temperature: 0.1,
 			MaxTokens:   500,
-			Thinking:    newagentllm.ThinkingModeDisabled,
+			Thinking:    infrallm.ThinkingModeDisabled,
 			Metadata: map[string]any{
 				"stage": chatStageName,
 				"phase": "routing",
@@ -412,10 +412,10 @@ func handleDeepAnswer(
 	// 2. 第二次 LLM 调用：开 thinking，深度回答。
 	deepMessages := newagentprompt.BuildDeepAnswerMessages(conversationContext, input.UserInput)
-	deepResult, err := input.Client.GenerateText(ctx, deepMessages, newagentllm.GenerateOptions{
+	deepResult, err := input.Client.GenerateText(ctx, deepMessages, infrallm.GenerateOptions{
 		Temperature: 0.5,
 		MaxTokens:   2000,
-		Thinking:    newagentllm.ThinkingModeEnabled,
+		Thinking:    infrallm.ThinkingModeEnabled,
 		Metadata: map[string]any{
 			"stage": chatStageName,
 			"phase": "deep_answer",
--- a/backend/newAgent/node/deliver.go
+++ b/backend/newAgent/node/deliver.go
@@ -8,7 +8,7 @@ import (
 	"github.com/cloudwego/eino/schema"
-	newagentllm "github.com/LoveLosita/smartflow/backend/newAgent/llm"
+	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
@@ -30,7 +30,7 @@ const (
 type DeliverNodeInput struct {
 	RuntimeState        *newagentmodel.AgentRuntimeState
 	ConversationContext *newagentmodel.ConversationContext
-	Client              *newagentllm.Client
+	Client              *infrallm.Client
 	ChunkEmitter        *newagentstream.ChunkEmitter
 }
@@ -95,7 +95,7 @@ func RunDeliverNode(ctx context.Context, input DeliverNodeInput) error {
 // generateDeliverSummary 尝试调用 LLM 生成交付总结，失败时降级到机械格式化。
 func generateDeliverSummary(
 	ctx context.Context,
-	client *newagentllm.Client,
+	client *infrallm.Client,
 	flowState *newagentmodel.CommonState,
 	conversationContext *newagentmodel.ConversationContext,
 ) string {
@@ -116,10 +116,10 @@ func generateDeliverSummary(
 	result, err := client.GenerateText(
 		ctx,
 		messages,
-		newagentllm.GenerateOptions{
+		infrallm.GenerateOptions{
 			Temperature: 0.5,
 			MaxTokens:   800,
-			Thinking:    newagentllm.ThinkingModeDisabled,
+			Thinking:    infrallm.ThinkingModeDisabled,
 			Metadata: map[string]any{
 				"stage": deliverStageName,
 			},
--- a/backend/newAgent/node/execute.go
+++ b/backend/newAgent/node/execute.go
@@ -10,7 +10,7 @@ import (
 	"strings"
 	"time"
-	newagentllm "github.com/LoveLosita/smartflow/backend/newAgent/llm"
+	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
@@ -48,7 +48,7 @@ type ExecuteNodeInput struct {
 	RuntimeState          *newagentmodel.AgentRuntimeState
 	ConversationContext   *newagentmodel.ConversationContext
 	UserInput             string
-	Client                *newagentllm.Client
+	Client                *infrallm.Client
 	ChunkEmitter          *newagentstream.ChunkEmitter
 	ResumeNode            string
 	ToolRegistry          *newagenttools.ToolRegistry
@@ -188,14 +188,14 @@ func RunExecuteNode(ctx context.Context, input ExecuteNodeInput) error {
 		flowState.ConversationID,
 		flowState.RoundUsed,
 	)
-	decision, rawResult, err := newagentllm.GenerateJSON[newagentmodel.ExecuteDecision](
+	decision, rawResult, err := infrallm.GenerateJSON[newagentmodel.ExecuteDecision](
 		ctx,
 		input.Client,
 		messages,
-		newagentllm.GenerateOptions{
+		infrallm.GenerateOptions{
 			Temperature: 1.0,   // thinking 模式强制要求 temperature=1
 			MaxTokens:   16000, // 需为 thinking chain 留出足够预算
-			Thinking:    newagentllm.ThinkingModeEnabled,
+			Thinking:    infrallm.ThinkingModeEnabled,
 			Metadata: map[string]any{
 				"stage":      executeStageName,
 				"step_index": flowState.CurrentStep,
--- a/backend/newAgent/node/plan.go
+++ b/backend/newAgent/node/plan.go
@@ -8,7 +8,7 @@ import (
 	"github.com/google/uuid"
-	newagentllm "github.com/LoveLosita/smartflow/backend/newAgent/llm"
+	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentprompt "github.com/LoveLosita/smartflow/backend/newAgent/prompt"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
@@ -31,7 +31,7 @@ type PlanNodeInput struct {
 	RuntimeState        *newagentmodel.AgentRuntimeState
 	ConversationContext *newagentmodel.ConversationContext
 	UserInput           string
-	Client              *newagentllm.Client
+	Client              *infrallm.Client
 	ChunkEmitter        *newagentstream.ChunkEmitter
 	ResumeNode          string
 	AlwaysExecute       bool // true 时计划生成后自动确认，不进入 confirm 节点
@@ -70,14 +70,14 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 	messages := newagentprompt.BuildPlanMessages(flowState, conversationContext, input.UserInput)
 	// 3. Phase 1：快速评估（开 thinking），让 LLM 同时产出复杂度评估和规划结果。
-	decision, rawResult, err := newagentllm.GenerateJSON[newagentmodel.PlanDecision](
+	decision, rawResult, err := infrallm.GenerateJSON[newagentmodel.PlanDecision](
 		ctx,
 		input.Client,
 		messages,
-		newagentllm.GenerateOptions{
+		infrallm.GenerateOptions{
 			Temperature: 0.2,
 			MaxTokens:   1600,
-			Thinking:    newagentllm.ThinkingModeEnabled,
+			Thinking:    infrallm.ThinkingModeEnabled,
 			Metadata: map[string]any{
 				"stage": planStageName,
 				"phase": "assessment",
@@ -108,14 +108,14 @@ func RunPlanNode(ctx context.Context, input PlanNodeInput) error {
 			return fmt.Errorf("深度规划状态推送失败: %w", err)
 		}
-		deepDecision, _, deepErr := newagentllm.GenerateJSON[newagentmodel.PlanDecision](
+		deepDecision, _, deepErr := infrallm.GenerateJSON[newagentmodel.PlanDecision](
 			ctx,
 			input.Client,
 			messages,
-			newagentllm.GenerateOptions{
+			infrallm.GenerateOptions{
 				Temperature: 0.2,
 				MaxTokens:   3200,
-				Thinking:    newagentllm.ThinkingModeEnabled,
+				Thinking:    infrallm.ThinkingModeEnabled,
 				Metadata: map[string]any{
 					"stage": planStageName,
 					"phase": "deep_planning",
--- a/backend/newAgent/tools/registry.go
+++ b/backend/newAgent/tools/registry.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"sort"
 	"strings"
 	infrarag "github.com/LoveLosita/smartflow/backend/infra/rag"
 )
 // ToolHandler 是所有工具的统一执行签名。
@@ -16,17 +18,34 @@ type ToolSchemaEntry struct {
 	SchemaText string
 }
 // DefaultRegistryDeps 描述默认工具注册表可选依赖。
 //
 // 说明：
 // 1. 这层依赖注入先为后续 websearch / memory 工具预留统一入口；
 // 2. 当前即便部分依赖暂未使用，也不应让业务侧再自行 new 底层 Infra；
 // 3. 后续新增读工具时，应优先在这里扩展依赖而不是走包级全局变量。
 type DefaultRegistryDeps struct {
 	RAGRuntime infrarag.Runtime
 }
 // ToolRegistry 管理工具注册、查找与执行。
 type ToolRegistry struct {
 	handlers map[string]ToolHandler
 	schemas  []ToolSchemaEntry
 	deps     DefaultRegistryDeps
 }
 // NewToolRegistry 创建空注册表。
 func NewToolRegistry() *ToolRegistry {
 	return NewToolRegistryWithDeps(DefaultRegistryDeps{})
 }
 // NewToolRegistryWithDeps 创建带依赖的空注册表。
 func NewToolRegistryWithDeps(deps DefaultRegistryDeps) *ToolRegistry {
 	return &ToolRegistry{
 		handlers: make(map[string]ToolHandler),
 		schemas:  make([]ToolSchemaEntry, 0),
 		deps:     deps,
 	}
 }
@@ -93,7 +112,12 @@ var writeTools = map[string]bool{
 // NewDefaultRegistry 创建默认日程工具注册表。
 func NewDefaultRegistry() *ToolRegistry {
-	r := NewToolRegistry()
+	return NewDefaultRegistryWithDeps(DefaultRegistryDeps{})
 }
 // NewDefaultRegistryWithDeps 创建带依赖的默认日程工具注册表。
 func NewDefaultRegistryWithDeps(deps DefaultRegistryDeps) *ToolRegistry {
 	r := NewToolRegistryWithDeps(deps)
 	// --- 读工具 ---
 	r.Register("get_overview",
--- a/backend/service/agentsvc/agent.go
+++ b/backend/service/agentsvc/agent.go
@@ -56,6 +56,7 @@ type AgentService struct {
 	scheduleProvider  newagentmodel.ScheduleStateProvider
 	schedulePersistor newagentmodel.SchedulePersistor
 	agentStateStore   newagentmodel.AgentStateStore
 	memoryReader      MemoryReader
 }
 // NewAgentService 构造 AgentService。
--- a/backend/service/agentsvc/agent_memory.go
+++ b/backend/service/agentsvc/agent_memory.go
@@ -0,0 +1,161 @@
 package agentsvc
 import (
 	"context"
 	"fmt"
 	"log"
 	"strings"
 	"time"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 )
 const (
 	newAgentMemoryBlockKey      = "memory_context"
 	newAgentMemoryRetrieveLimit = 5
 	newAgentMemoryBlockTitle    = "相关记忆"
 	newAgentMemoryIntroLine     = "以下是与当前对话相关的用户记忆，仅在自然且确实有帮助时参考，不要生硬复述。"
 )
 // MemoryReader 描述 newAgent 主链路读取记忆所需的最小能力。
 //
 // 职责边界：
 // 1. 只负责“按当前输入取回候选记忆”；
 // 2. 不负责 prompt 拼装，也不要求调用方感知 memory 模块内部 repo/service 结构；
 // 3. 返回值直接复用 memory DTO，避免 service 层再维护一套重复结构。
 type MemoryReader interface {
 	Retrieve(ctx context.Context, req memorymodel.RetrieveRequest) ([]memorymodel.ItemDTO, error)
 }
 // SetMemoryReader 注入 newAgent 主链路读取记忆所需的薄接口。
 func (s *AgentService) SetMemoryReader(reader MemoryReader) {
 	s.memoryReader = reader
 }
 // injectMemoryContext 在 graph 执行前，把本轮相关记忆写入 ConversationContext 的 pinned block。
 //
 // 步骤说明：
 // 1. 先做前置门控：没有 reader、没有有效用户、或输入属于“确认/应答型短句”时，直接清掉旧 block，避免快照残留污染本轮 prompt。
 // 2. 再调用 memory 检索：查询失败只记日志，不中断主链路，保证 newAgent 的可用性优先。
 // 3. 检索成功后把结果渲染成稳定的中文文本，并用固定 key 覆盖写入，确保每轮都能刷新而不是越积越多。
 func (s *AgentService) injectMemoryContext(
 	ctx context.Context,
 	conversationContext *newagentmodel.ConversationContext,
 	userID int,
 	chatID string,
 	userMessage string,
 ) {
 	if conversationContext == nil {
 		return
 	}
 	if s.memoryReader == nil || userID <= 0 || !shouldInjectMemoryForInput(userMessage) {
 		conversationContext.RemovePinnedBlock(newAgentMemoryBlockKey)
 		return
 	}
 	items, err := s.memoryReader.Retrieve(ctx, memorymodel.RetrieveRequest{
 		Query:          strings.TrimSpace(userMessage),
 		UserID:         userID,
 		ConversationID: strings.TrimSpace(chatID),
 		Limit:          newAgentMemoryRetrieveLimit,
 		Now:            time.Now(),
 	})
 	if err != nil {
 		conversationContext.RemovePinnedBlock(newAgentMemoryBlockKey)
 		log.Printf("读取记忆上下文失败 user=%d chat=%s err=%v", userID, chatID, err)
 		return
 	}
 	content := renderMemoryPinnedContent(items)
 	if content == "" {
 		conversationContext.RemovePinnedBlock(newAgentMemoryBlockKey)
 		return
 	}
 	conversationContext.UpsertPinnedBlock(newagentmodel.ContextBlock{
 		Key:     newAgentMemoryBlockKey,
 		Title:   newAgentMemoryBlockTitle,
 		Content: content,
 	})
 }
 // shouldInjectMemoryForInput 判断当前输入是否值得触发一次记忆召回。
 //
 // 步骤说明：
 // 1. 空输入直接跳过；
 // 2. 对“好/确认/ok”这类弱语义应答做显式拦截，避免 legacy fallback 在无查询价值时注入一批高分但不相关的旧记忆；
 // 3. 其余输入一律放行，优先保证 MVP 可用。
 func shouldInjectMemoryForInput(userMessage string) bool {
 	trimmed := strings.TrimSpace(userMessage)
 	if trimmed == "" {
 		return false
 	}
 	switch strings.ToLower(trimmed) {
 	case "好", "好的", "嗯", "嗯嗯", "行", "可以", "收到", "明白", "确认", "取消", "是", "不是", "对", "不对", "ok", "okay", "yes", "no":
 		return false
 	default:
 		return true
 	}
 }
 // renderMemoryPinnedContent 把召回结果转成一段稳定、紧凑、适合 prompt 注入的自然语言文本。
 func renderMemoryPinnedContent(items []memorymodel.ItemDTO) string {
 	if len(items) == 0 {
 		return ""
 	}
 	var sb strings.Builder
 	sb.WriteString(newAgentMemoryIntroLine)
 	seen := make(map[string]struct{}, len(items))
 	written := 0
 	for _, item := range items {
 		line := buildMemoryPinnedLine(item)
 		if line == "" {
 			continue
 		}
 		if _, exists := seen[line]; exists {
 			continue
 		}
 		seen[line] = struct{}{}
 		sb.WriteString("\n- ")
 		sb.WriteString(line)
 		written++
 	}
 	if written == 0 {
 		return ""
 	}
 	return strings.TrimSpace(sb.String())
 }
 // buildMemoryPinnedLine 把单条记忆渲染成“[类型] 内容”的简洁格式。
 func buildMemoryPinnedLine(item memorymodel.ItemDTO) string {
 	text := strings.TrimSpace(item.Content)
 	if text == "" {
 		text = strings.TrimSpace(item.Title)
 	}
 	if text == "" {
 		return ""
 	}
 	return fmt.Sprintf("[%s] %s", localizeMemoryType(item.MemoryType), text)
 }
 // localizeMemoryType 把 memory 类型映射成 prompt 里更自然的中文标签。
 func localizeMemoryType(memoryType string) string {
 	switch strings.TrimSpace(memoryType) {
 	case memorymodel.MemoryTypePreference:
 		return "偏好"
 	case memorymodel.MemoryTypeConstraint:
 		return "约束"
 	case memorymodel.MemoryTypeTodoHint:
 		return "待办线索"
 	case memorymodel.MemoryTypeFact:
 		return "事实"
 	default:
 		return "记忆"
 	}
 }
--- a/backend/service/agentsvc/agent_newagent.go
+++ b/backend/service/agentsvc/agent_newagent.go
@@ -7,9 +7,9 @@ import (
 	"strings"
 	"time"
 	infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
 	newagentconv "github.com/LoveLosita/smartflow/backend/newAgent/conv"
 	newagentgraph "github.com/LoveLosita/smartflow/backend/newAgent/graph"
 	newagentllm "github.com/LoveLosita/smartflow/backend/newAgent/llm"
 	newagentmodel "github.com/LoveLosita/smartflow/backend/newAgent/model"
 	newagentstream "github.com/LoveLosita/smartflow/backend/newAgent/stream"
 	newagenttools "github.com/LoveLosita/smartflow/backend/newAgent/tools"
@@ -107,6 +107,10 @@ func (s *AgentService) runNewAgentGraph(
 	} else {
 		conversationContext = s.loadConversationContext(requestCtx, chatID, userMessage)
 	}
 	// 5.1. 在 graph 执行前统一补充与当前输入相关的记忆上下文。
 	// 5.1.1 这里采用 pinned block 注入，这样 chat / plan / execute / deliver 各阶段都能自动复用。
 	// 5.1.2 检索失败只降级为“本轮不注入记忆”，不阻断主链路。
 	s.injectMemoryContext(requestCtx, conversationContext, userID, chatID, userMessage)
 	// 5.5 若 extra 携带 task_class_ids，校验后写入 CommonState（仅首轮/尚未设置时生效，跨轮持久化）。
 	//    校验：通过 LoadTaskClassMetas → GetCompleteTaskClassesByIDs 检查所有 ID 是否存在且属于当前用户；
@@ -141,10 +145,10 @@ func (s *AgentService) runNewAgentGraph(
 	graphRequest.Normalize()
 	// 7. 适配 LLM clients（从 AIHub 的 ark.ChatModel 转换为 newAgent LLM Client）。
-	chatClient := newagentllm.WrapArkClient(s.AIHub.Worker)
+	chatClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	planClient := newagentllm.WrapArkClient(s.AIHub.Worker)
+	planClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	executeClient := newagentllm.WrapArkClient(s.AIHub.Worker)
+	executeClient := infrallm.WrapArkClient(s.AIHub.Worker)
-	deliverClient := newagentllm.WrapArkClient(s.AIHub.Worker)
+	deliverClient := infrallm.WrapArkClient(s.AIHub.Worker)
 	// 8. 适配 SSE emitter。
 	sseEmitter := newagentstream.NewSSEPayloadEmitter(outChan)
--- a/backend/service/events/memory_extract_requested.go
+++ b/backend/service/events/memory_extract_requested.go
@@ -13,9 +13,8 @@ import (
 	kafkabus "github.com/LoveLosita/smartflow/backend/infra/kafka"
 	outboxinfra "github.com/LoveLosita/smartflow/backend/infra/outbox"
 	"github.com/LoveLosita/smartflow/backend/memory"
 	memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
 	memoryrepo "github.com/LoveLosita/smartflow/backend/memory/repo"
 	memoryservice "github.com/LoveLosita/smartflow/backend/memory/service"
 	"github.com/LoveLosita/smartflow/backend/model"
 	"github.com/spf13/viper"
 	"gorm.io/gorm"
@@ -32,10 +31,11 @@ const (
 // 职责边界：
 // 1. 只负责把事件转为 memory_jobs 任务；
 // 2. 不在消费回调里执行 LLM 重计算；
-// 3. 用 outbox 通用事务保证“任务入库 + consumed 推进”原子一致。
+// 3. 通过 memory.Module.WithTx(tx) 复用同一套接入门面，保证事务边界仍由 outbox 掌控。
 func RegisterMemoryExtractRequestedHandler(
 	bus *outboxinfra.EventBus,
 	outboxRepo *outboxinfra.Repository,
 	memoryModule *memory.Module,
 ) error {
 	if bus == nil {
 		return errors.New("event bus is nil")
@@ -43,6 +43,9 @@ func RegisterMemoryExtractRequestedHandler(
 	if outboxRepo == nil {
 		return errors.New("outbox repository is nil")
 	}
 	if memoryModule == nil {
 		return errors.New("memory module is nil")
 	}
 	handler := func(ctx context.Context, envelope kafkabus.Envelope) error {
 		var payload model.MemoryExtractRequestedPayload
@@ -57,7 +60,6 @@ func RegisterMemoryExtractRequestedHandler(
 		}
 		return outboxRepo.ConsumeAndMarkConsumed(ctx, envelope.OutboxID, func(tx *gorm.DB) error {
 			enqueueService := memoryservice.NewEnqueueService(memoryrepo.NewJobRepo(tx))
 			jobPayload := memorymodel.ExtractJobPayload{
 				UserID:          payload.UserID,
 				ConversationID:  strings.TrimSpace(payload.ConversationID),
@@ -70,7 +72,7 @@ func RegisterMemoryExtractRequestedHandler(
 				TraceID:         strings.TrimSpace(payload.TraceID),
 				IdempotencyKey:  strings.TrimSpace(payload.IdempotencyKey),
 			}
-			return enqueueService.EnqueueExtractJob(ctx, jobPayload, envelope.EventID)
+			return memoryModule.WithTx(tx).EnqueueExtract(ctx, jobPayload, envelope.EventID)
 		})
 	}
@@ -80,8 +82,8 @@ func RegisterMemoryExtractRequestedHandler(
 // EnqueueMemoryExtractRequestedInTx 在事务内写入 memory.extract.requested outbox 消息。
 //
 // 设计目的：
-// 1. 让“聊天消息已落库”与“记忆抽取事件已入队”同事务提交；
+// 1. 让“聊天消息已落库”和“记忆抽取事件已入队”同事务提交；
-// 2. 任何一步失败都整体回滚，避免出现链路断点。
+// 2. 任意一步失败都整体回滚，避免出现链路断点。
 func EnqueueMemoryExtractRequestedInTx(
 	ctx context.Context,
 	outboxRepo *outboxinfra.Repository,
@@ -128,6 +130,7 @@ func buildMemoryExtractPayloadFromChat(chatPayload model.ChatHistoryPersistPaylo
 	if role != "user" {
 		return model.MemoryExtractRequestedPayload{}, false
 	}
 	sourceText := strings.TrimSpace(chatPayload.Message)
 	if sourceText == "" {
 		return model.MemoryExtractRequestedPayload{}, false
@@ -179,6 +182,7 @@ func truncateByRune(raw string, max int) string {
 	if max <= 0 {
 		return ""
 	}
 	runes := []rune(raw)
 	if len(runes) <= max {
 		return raw