添加 A_Memorix 插件 v2.0.0（包含运行时与文档）

引入 A_Memorix 插件 v2.0.0：新增大量运行时组件、存储/模式更新、检索能力提升、管理工具、导入/调优工作流以及相关文档。关键新增内容包括：lifecycle_orchestrator、SDKMemoryKernel/运行时初始化器、新的存储层与 metadata_store 变更（SCHEMA_VERSION v8）、检索增强（双路径检索、图关系召回、稀疏 BM25），以及多种工具服务（episode/person_profile/relation/segmentation/tuning/search execution）。同时新增 Web 导入/摘要导入器及大量维护脚本。还更新了插件清单、embedding API 适配器、plugin.py、requirements/pyproject，以及主入口文件，使新插件接入项目。该变更为 2.0.0 版本发布做好准备，实现统一的 SDK Tool 接口并扩展整体运行能力。
2026-03-19 00:09:04 +08:00
parent eb257345dd
commit 71b3a828c6
44 changed files with 18193 additions and 405 deletions
--- a/plugins/A_memorix/scripts/audit_vector_consistency.py
+++ b/plugins/A_memorix/scripts/audit_vector_consistency.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""
+A_Memorix 一致性审计脚本。
+
+输出内容：
+1. paragraph/entity/relation 向量覆盖率
+2. relation vector_state 分布
+3. 孤儿向量数量（向量存在但 metadata 不存在）
+4. 状态与向量文件不一致统计
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import pickle
+import sys
+from pathlib import Path
+from typing import Any, Dict, Set
+
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+PROJECT_ROOT = PLUGIN_ROOT.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PLUGIN_ROOT))
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="审计 A_Memorix 向量一致性")
+    parser.add_argument(
+        "--data-dir",
+        default=str(PLUGIN_ROOT / "data"),
+        help="A_Memorix 数据目录（默认: plugins/A_memorix/data）",
+    )
+    parser.add_argument("--json-out", default="", help="可选：输出 JSON 文件路径")
+    parser.add_argument(
+        "--strict",
+        action="store_true",
+        help="若发现一致性异常则返回非 0 退出码",
+    )
+    return parser
+
+
+# --help/-h fast path: avoid heavy host/plugin bootstrap
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    sys.exit(0)
+
+try:
+    from core.storage.vector_store import VectorStore
+    from core.storage.metadata_store import MetadataStore
+    from core.storage import QuantizationType
+except Exception as e:  # pragma: no cover
+    print(f"❌ 导入核心模块失败: {e}")
+    sys.exit(1)
+
+
+def _safe_ratio(numerator: int, denominator: int) -> float:
+    if denominator <= 0:
+        return 0.0
+    return float(numerator) / float(denominator)
+
+
+def _load_vector_store(data_dir: Path) -> VectorStore:
+    meta_path = data_dir / "vectors" / "vectors_metadata.pkl"
+    if not meta_path.exists():
+        raise FileNotFoundError(f"未找到向量元数据文件: {meta_path}")
+
+    with open(meta_path, "rb") as f:
+        meta = pickle.load(f)
+    dimension = int(meta.get("dimension", 1024))
+
+    store = VectorStore(
+        dimension=max(1, dimension),
+        quantization_type=QuantizationType.INT8,
+        data_dir=data_dir / "vectors",
+    )
+    if store.has_data():
+        store.load()
+    return store
+
+
+def _load_metadata_store(data_dir: Path) -> MetadataStore:
+    store = MetadataStore(data_dir=data_dir / "metadata")
+    store.connect()
+    return store
+
+
+def _hash_set(metadata_store: MetadataStore, table: str) -> Set[str]:
+    return {str(h) for h in metadata_store.list_hashes(table)}
+
+
+def _relation_state_stats(metadata_store: MetadataStore) -> Dict[str, int]:
+    return metadata_store.count_relations_by_vector_state()
+
+
+def run_audit(data_dir: Path) -> Dict[str, Any]:
+    vector_store = _load_vector_store(data_dir)
+    metadata_store = _load_metadata_store(data_dir)
+    try:
+        paragraph_hashes = _hash_set(metadata_store, "paragraphs")
+        entity_hashes = _hash_set(metadata_store, "entities")
+        relation_hashes = _hash_set(metadata_store, "relations")
+
+        known_hashes = set(getattr(vector_store, "_known_hashes", set()))
+        live_vector_hashes = {h for h in known_hashes if h in vector_store}
+
+        para_vector_hits = len(paragraph_hashes & live_vector_hashes)
+        ent_vector_hits = len(entity_hashes & live_vector_hashes)
+        rel_vector_hits = len(relation_hashes & live_vector_hashes)
+
+        orphan_vector_hashes = sorted(
+            live_vector_hashes - paragraph_hashes - entity_hashes - relation_hashes
+        )
+
+        relation_rows = metadata_store.get_relations()
+        ready_but_missing = 0
+        not_ready_but_present = 0
+        for row in relation_rows:
+            h = str(row.get("hash") or "")
+            state = str(row.get("vector_state") or "none").lower()
+            in_vector = h in live_vector_hashes
+            if state == "ready" and not in_vector:
+                ready_but_missing += 1
+            if state != "ready" and in_vector:
+                not_ready_but_present += 1
+
+        relation_states = _relation_state_stats(metadata_store)
+        rel_total = max(0, int(relation_states.get("total", len(relation_hashes))))
+        ready_count = max(0, int(relation_states.get("ready", 0)))
+
+        result = {
+            "counts": {
+                "paragraphs": len(paragraph_hashes),
+                "entities": len(entity_hashes),
+                "relations": len(relation_hashes),
+                "vectors_live": len(live_vector_hashes),
+            },
+            "coverage": {
+                "paragraph_vector_coverage": _safe_ratio(para_vector_hits, len(paragraph_hashes)),
+                "entity_vector_coverage": _safe_ratio(ent_vector_hits, len(entity_hashes)),
+                "relation_vector_coverage": _safe_ratio(rel_vector_hits, len(relation_hashes)),
+                "relation_ready_coverage": _safe_ratio(ready_count, rel_total),
+            },
+            "relation_states": relation_states,
+            "orphans": {
+                "vector_only_count": len(orphan_vector_hashes),
+                "vector_only_sample": orphan_vector_hashes[:30],
+            },
+            "consistency_checks": {
+                "ready_but_missing_vector": ready_but_missing,
+                "not_ready_but_vector_present": not_ready_but_present,
+            },
+        }
+        return result
+    finally:
+        metadata_store.close()
+
+
+def main() -> int:
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+
+    data_dir = Path(args.data_dir).resolve()
+    if not data_dir.exists():
+        print(f"❌ 数据目录不存在: {data_dir}")
+        return 2
+
+    try:
+        result = run_audit(data_dir)
+    except Exception as e:
+        print(f"❌ 审计失败: {e}")
+        return 2
+
+    print("=== A_Memorix Vector Consistency Audit ===")
+    print(f"data_dir: {data_dir}")
+    print(f"paragraphs: {result['counts']['paragraphs']}")
+    print(f"entities: {result['counts']['entities']}")
+    print(f"relations: {result['counts']['relations']}")
+    print(f"vectors_live: {result['counts']['vectors_live']}")
+    print(
+        "coverage: "
+        f"paragraph={result['coverage']['paragraph_vector_coverage']:.3f}, "
+        f"entity={result['coverage']['entity_vector_coverage']:.3f}, "
+        f"relation={result['coverage']['relation_vector_coverage']:.3f}, "
+        f"relation_ready={result['coverage']['relation_ready_coverage']:.3f}"
+    )
+    print(f"relation_states: {result['relation_states']}")
+    print(
+        "consistency_checks: "
+        f"ready_but_missing_vector={result['consistency_checks']['ready_but_missing_vector']}, "
+        f"not_ready_but_vector_present={result['consistency_checks']['not_ready_but_vector_present']}"
+    )
+    print(f"orphan_vectors: {result['orphans']['vector_only_count']}")
+
+    if args.json_out:
+        out_path = Path(args.json_out).resolve()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(out_path, "w", encoding="utf-8") as f:
+            json.dump(result, f, ensure_ascii=False, indent=2)
+        print(f"json_out: {out_path}")
+
+    has_anomaly = (
+        result["orphans"]["vector_only_count"] > 0
+        or result["consistency_checks"]["ready_but_missing_vector"] > 0
+    )
+    if args.strict and has_anomaly:
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/plugins/A_memorix/scripts/backfill_relation_vectors.py
+++ b/plugins/A_memorix/scripts/backfill_relation_vectors.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+"""
+关系向量一次性回填脚本（灰度/离线执行）。
+
+用途：
+1. 对 relations 中 vector_state in (none, failed, pending) 的记录补齐向量。
+2. 支持并发控制，降低总耗时。
+3. 可作为灰度阶段验证工具，与 audit_vector_consistency.py 配合使用。
+4. 可选自动纳入“ready 但向量缺失”的漂移记录进行修复。
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import sys
+import time
+from pathlib import Path
+from typing import Any, Dict, List
+
+import tomlkit
+
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+PROJECT_ROOT = PLUGIN_ROOT.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PLUGIN_ROOT))
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="关系向量一次性回填")
+    parser.add_argument(
+        "--config",
+        default=str(PLUGIN_ROOT / "config.toml"),
+        help="配置文件路径（默认 plugins/A_memorix/config.toml）",
+    )
+    parser.add_argument(
+        "--data-dir",
+        default=str(PLUGIN_ROOT / "data"),
+        help="数据目录（默认 plugins/A_memorix/data）",
+    )
+    parser.add_argument(
+        "--states",
+        default="none,failed,pending",
+        help="待处理状态列表，逗号分隔",
+    )
+    parser.add_argument("--limit", type=int, default=50000, help="最大处理数量")
+    parser.add_argument("--concurrency", type=int, default=8, help="并发数")
+    parser.add_argument("--max-retry", type=int, default=None, help="最大重试次数过滤")
+    parser.add_argument(
+        "--include-ready-missing",
+        action="store_true",
+        help="额外纳入 vector_state=ready 但向量缺失的关系",
+    )
+    parser.add_argument("--dry-run", action="store_true", help="仅统计候选，不写入")
+    return parser
+
+
+# --help/-h fast path: avoid heavy host/plugin bootstrap
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    raise SystemExit(0)
+
+from core.storage import (
+    VectorStore,
+    GraphStore,
+    MetadataStore,
+    QuantizationType,
+    SparseMatrixFormat,
+)
+from core.embedding import create_embedding_api_adapter
+from core.utils.relation_write_service import RelationWriteService
+
+
+def _load_config(config_path: Path) -> Dict[str, Any]:
+    with open(config_path, "r", encoding="utf-8") as f:
+        raw = tomlkit.load(f)
+    return dict(raw) if isinstance(raw, dict) else {}
+
+
+def _build_vector_store(data_dir: Path, emb_cfg: Dict[str, Any]) -> VectorStore:
+    q_type = str(emb_cfg.get("quantization_type", "int8")).lower()
+    if q_type != "int8":
+        raise ValueError(
+            "embedding.quantization_type 在 vNext 仅允许 int8(SQ8)。"
+            " 请先执行 scripts/release_vnext_migrate.py migrate。"
+        )
+    dim = int(emb_cfg.get("dimension", 1024))
+    store = VectorStore(
+        dimension=max(1, dim),
+        quantization_type=QuantizationType.INT8,
+        data_dir=data_dir / "vectors",
+    )
+    if store.has_data():
+        store.load()
+    return store
+
+
+def _build_graph_store(data_dir: Path, graph_cfg: Dict[str, Any]) -> GraphStore:
+    fmt = str(graph_cfg.get("sparse_matrix_format", "csr")).lower()
+    fmt_map = {
+        "csr": SparseMatrixFormat.CSR,
+        "csc": SparseMatrixFormat.CSC,
+    }
+    store = GraphStore(
+        matrix_format=fmt_map.get(fmt, SparseMatrixFormat.CSR),
+        data_dir=data_dir / "graph",
+    )
+    if store.has_data():
+        store.load()
+    return store
+
+
+def _build_metadata_store(data_dir: Path) -> MetadataStore:
+    store = MetadataStore(data_dir=data_dir / "metadata")
+    store.connect()
+    return store
+
+
+def _build_embedding_manager(emb_cfg: Dict[str, Any]):
+    retry_cfg = emb_cfg.get("retry", {})
+    if not isinstance(retry_cfg, dict):
+        retry_cfg = {}
+    return create_embedding_api_adapter(
+        batch_size=int(emb_cfg.get("batch_size", 32)),
+        max_concurrent=int(emb_cfg.get("max_concurrent", 5)),
+        default_dimension=int(emb_cfg.get("dimension", 1024)),
+        model_name=str(emb_cfg.get("model_name", "auto")),
+        retry_config=retry_cfg,
+    )
+
+
+async def _process_rows(
+    service: RelationWriteService,
+    rows: List[Dict[str, Any]],
+    concurrency: int,
+) -> Dict[str, int]:
+    semaphore = asyncio.Semaphore(max(1, int(concurrency)))
+    stat = {"success": 0, "failed": 0, "skipped": 0}
+
+    async def _worker(row: Dict[str, Any]) -> None:
+        async with semaphore:
+            result = await service.ensure_relation_vector(
+                hash_value=str(row["hash"]),
+                subject=str(row.get("subject", "")),
+                predicate=str(row.get("predicate", "")),
+                obj=str(row.get("object", "")),
+            )
+            if result.vector_state == "ready":
+                if result.vector_written:
+                    stat["success"] += 1
+                else:
+                    stat["skipped"] += 1
+            else:
+                stat["failed"] += 1
+
+    await asyncio.gather(*[_worker(row) for row in rows])
+    return stat
+
+
+async def main_async(args: argparse.Namespace) -> int:
+    config_path = Path(args.config).resolve()
+    if not config_path.exists():
+        print(f"❌ 配置文件不存在: {config_path}")
+        return 2
+
+    cfg = _load_config(config_path)
+    emb_cfg = cfg.get("embedding", {}) if isinstance(cfg, dict) else {}
+    graph_cfg = cfg.get("graph", {}) if isinstance(cfg, dict) else {}
+    retrieval_cfg = cfg.get("retrieval", {}) if isinstance(cfg, dict) else {}
+    rv_cfg = retrieval_cfg.get("relation_vectorization", {}) if isinstance(retrieval_cfg, dict) else {}
+    if not isinstance(emb_cfg, dict):
+        emb_cfg = {}
+    if not isinstance(graph_cfg, dict):
+        graph_cfg = {}
+    if not isinstance(rv_cfg, dict):
+        rv_cfg = {}
+
+    data_dir = Path(args.data_dir).resolve()
+    if not data_dir.exists():
+        print(f"❌ 数据目录不存在: {data_dir}")
+        return 2
+
+    print(f"data_dir: {data_dir}")
+    print(f"config: {config_path}")
+
+    vector_store = _build_vector_store(data_dir, emb_cfg)
+    graph_store = _build_graph_store(data_dir, graph_cfg)
+    metadata_store = _build_metadata_store(data_dir)
+    embedding_manager = _build_embedding_manager(emb_cfg)
+    service = RelationWriteService(
+        metadata_store=metadata_store,
+        graph_store=graph_store,
+        vector_store=vector_store,
+        embedding_manager=embedding_manager,
+    )
+
+    try:
+        states = [s.strip() for s in str(args.states).split(",") if s.strip()]
+        if not states:
+            states = ["none", "failed", "pending"]
+        max_retry = int(args.max_retry) if args.max_retry is not None else int(rv_cfg.get("max_retry", 3))
+        limit = int(args.limit)
+
+        rows = metadata_store.list_relations_by_vector_state(
+            states=states,
+            limit=max(1, limit),
+            max_retry=max(1, max_retry),
+        )
+        added_ready_missing = 0
+        if args.include_ready_missing:
+            ready_rows = metadata_store.list_relations_by_vector_state(
+                states=["ready"],
+                limit=max(1, limit),
+                max_retry=max(1, max_retry),
+            )
+            ready_missing_rows = [
+                row for row in ready_rows if str(row.get("hash", "")) not in vector_store
+            ]
+            added_ready_missing = len(ready_missing_rows)
+            if ready_missing_rows:
+                dedup: Dict[str, Dict[str, Any]] = {}
+                for row in rows:
+                    dedup[str(row.get("hash", ""))] = row
+                for row in ready_missing_rows:
+                    dedup.setdefault(str(row.get("hash", "")), row)
+                rows = list(dedup.values())[: max(1, limit)]
+        print(f"candidates: {len(rows)} (states={states}, max_retry={max_retry})")
+        if args.include_ready_missing:
+            print(f"ready_missing_candidates_added: {added_ready_missing}")
+        if not rows:
+            return 0
+
+        if args.dry_run:
+            print("dry_run=true，未执行写入。")
+            return 0
+
+        started = time.time()
+        stat = await _process_rows(
+            service=service,
+            rows=rows,
+            concurrency=int(args.concurrency),
+        )
+        elapsed = (time.time() - started) * 1000.0
+
+        vector_store.save()
+        graph_store.save()
+        state_stats = metadata_store.count_relations_by_vector_state()
+        output = {
+            "processed": len(rows),
+            "success": int(stat["success"]),
+            "failed": int(stat["failed"]),
+            "skipped": int(stat["skipped"]),
+            "elapsed_ms": elapsed,
+            "state_stats": state_stats,
+        }
+        print(json.dumps(output, ensure_ascii=False, indent=2))
+        return 0 if stat["failed"] == 0 else 1
+    finally:
+        metadata_store.close()
+
+
+def parse_args() -> argparse.Namespace:
+    return _build_arg_parser().parse_args()
+
+
+if __name__ == "__main__":
+    arguments = parse_args()
+    raise SystemExit(asyncio.run(main_async(arguments)))
--- a/plugins/A_memorix/scripts/backfill_temporal_metadata.py
+++ b/plugins/A_memorix/scripts/backfill_temporal_metadata.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""
+回填段落时序字段。
+
+默认策略：
+1. 若段落缺失 event_time/event_time_start/event_time_end
+2. 且存在 created_at
+3. 写入 event_time=created_at, time_granularity=day, time_confidence=0.2
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+import sys
+
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+PROJECT_ROOT = PLUGIN_ROOT.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from plugins.A_memorix.core.storage import MetadataStore  # noqa: E402
+
+
+def backfill(
+    data_dir: Path,
+    dry_run: bool,
+    limit: int,
+    no_created_fallback: bool,
+) -> int:
+    store = MetadataStore(data_dir=data_dir)
+    store.connect()
+    summary = store.backfill_temporal_metadata_from_created_at(
+        limit=limit,
+        dry_run=dry_run,
+        no_created_fallback=no_created_fallback,
+    )
+    store.close()
+    if dry_run:
+        print(f"[dry-run] candidates={summary['candidates']}")
+        return int(summary["candidates"])
+    if no_created_fallback:
+        print(f"skip update (no-created-fallback), candidates={summary['candidates']}")
+        return 0
+    print(f"updated={summary['updated']}")
+    return int(summary["updated"])
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Backfill temporal metadata for A_Memorix paragraphs")
+    parser.add_argument("--data-dir", default=str(PLUGIN_ROOT / "data"), help="数据目录")
+    parser.add_argument("--dry-run", action="store_true", help="仅统计，不写入")
+    parser.add_argument("--limit", type=int, default=100000, help="最大处理条数")
+    parser.add_argument(
+        "--no-created-fallback",
+        action="store_true",
+        help="不使用 created_at 回填，仅输出候选数量",
+    )
+    args = parser.parse_args()
+
+    backfill(
+        data_dir=Path(args.data_dir),
+        dry_run=args.dry_run,
+        limit=max(1, int(args.limit)),
+        no_created_fallback=args.no_created_fallback,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
--- a/plugins/A_memorix/scripts/convert_lpmm.py
+++ b/plugins/A_memorix/scripts/convert_lpmm.py
@@ -46,9 +46,14 @@ if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
    _build_arg_parser().print_help()
    sys.exit(0)

-# 设置日志
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-logger = logging.getLogger("LPMM_Converter")
+# 设置日志：优先复用 MaiBot 统一日志体系，失败时回退到标准 logging。
+try:
+    from src.common.logger import get_logger
+
+    logger = get_logger("A_Memorix.LPMMConverter")
+except Exception:
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    logger = logging.getLogger("A_Memorix.LPMMConverter")

 try:
    import networkx as nx
@@ -225,11 +230,11 @@ class LPMMConverter:
                failed += 1

        logger.info(
-            "关系向量重建完成: total=%s success=%s skipped=%s failed=%s",
-            len(rows),
-            success,
-            skipped,
-            failed,
+            "关系向量重建完成: "
+            f"total={len(rows)} "
+            f"success={success} "
+            f"skipped={skipped} "
+            f"failed={failed}"
        )

    @staticmethod
@@ -317,8 +322,8 @@ class LPMMConverter:
            if p_type == "relation":
                relation_count = self._import_relation_metadata_from_parquet(p_path)
                logger.warning(
-                    "跳过 relation.parquet 向量导入（保持一致性）；已导入关系元数据: %s",
-                    relation_count,
+                    "跳过 relation.parquet 向量导入（保持一致性）；"
+                    f"已导入关系元数据: {relation_count}"
                )
                continue

--- a/plugins/A_memorix/scripts/import_lpmm_json.py
+++ b/plugins/A_memorix/scripts/import_lpmm_json.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+LPMM OpenIE JSON 导入工具。
+
+功能：
+1. 读取符合 LPMM 规范的 OpenIE JSON 文件
+2. 转换为 A_Memorix 的统一导入格式
+3. 复用 `process_knowledge.py` 中的 `AutoImporter` 直接入库
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import sys
+import traceback
+from pathlib import Path
+from typing import Any, Dict, List
+
+from rich.console import Console
+from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
+
+console = Console()
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+WORKSPACE_ROOT = PLUGIN_ROOT.parent
+MAIBOT_ROOT = WORKSPACE_ROOT / "MaiBot"
+for path in (CURRENT_DIR, WORKSPACE_ROOT, MAIBOT_ROOT, PLUGIN_ROOT):
+    path_str = str(path)
+    if path_str not in sys.path:
+        sys.path.insert(0, path_str)
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="将 LPMM OpenIE JSON 导入 A_Memorix")
+    parser.add_argument("path", help="LPMM JSON 文件路径或目录")
+    parser.add_argument("--force", action="store_true", help="强制重新导入")
+    parser.add_argument("--concurrency", "-c", type=int, default=5, help="并发数")
+    return parser
+
+
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    raise SystemExit(0)
+
+
+try:
+    from process_knowledge import AutoImporter
+    from A_memorix.core.utils.hash import compute_paragraph_hash
+    from src.common.logger import get_logger
+except ImportError as exc:  # pragma: no cover - script bootstrap
+    print(f"导入模块失败，请确认 PYTHONPATH 与工作区结构: {exc}")
+    raise SystemExit(1)
+
+
+logger = get_logger("A_Memorix.LPMMImport")
+
+
+class LPMMConverter:
+    def convert_lpmm_to_memorix(self, lpmm_data: Dict[str, Any], filename: str) -> Dict[str, Any]:
+        memorix_data = {"paragraphs": [], "entities": []}
+        docs = lpmm_data.get("docs", []) or []
+        if not docs:
+            logger.warning(f"文件中未找到 docs 字段: {filename}")
+            return memorix_data
+
+        all_entities = set()
+        for doc in docs:
+            content = str(doc.get("passage", "") or "").strip()
+            if not content:
+                continue
+
+            relations: List[Dict[str, str]] = []
+            for triple in doc.get("extracted_triples", []) or []:
+                if isinstance(triple, list) and len(triple) == 3:
+                    relations.append(
+                        {
+                            "subject": str(triple[0] or "").strip(),
+                            "predicate": str(triple[1] or "").strip(),
+                            "object": str(triple[2] or "").strip(),
+                        }
+                    )
+
+            entities = [str(item or "").strip() for item in doc.get("extracted_entities", []) or [] if str(item or "").strip()]
+            all_entities.update(entities)
+            for relation in relations:
+                if relation["subject"]:
+                    all_entities.add(relation["subject"])
+                if relation["object"]:
+                    all_entities.add(relation["object"])
+
+            memorix_data["paragraphs"].append(
+                {
+                    "hash": compute_paragraph_hash(content),
+                    "content": content,
+                    "source": filename,
+                    "entities": entities,
+                    "relations": relations,
+                }
+            )
+
+        memorix_data["entities"] = sorted(all_entities)
+        return memorix_data
+
+
+async def main() -> None:
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+
+    target_path = Path(args.path)
+    if not target_path.exists():
+        logger.error(f"路径不存在: {target_path}")
+        return
+
+    if target_path.is_dir():
+        files_to_process = list(target_path.glob("*-openie.json")) or list(target_path.glob("*.json"))
+    else:
+        files_to_process = [target_path]
+
+    if not files_to_process:
+        logger.error("未找到可处理的 JSON 文件")
+        return
+
+    importer = AutoImporter(force=bool(args.force), concurrency=int(args.concurrency))
+    if not await importer.initialize():
+        logger.error("初始化存储失败")
+        return
+
+    converter = LPMMConverter()
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+        TimeElapsedColumn(),
+        console=console,
+        transient=False,
+    ) as progress:
+        for json_file in files_to_process:
+            logger.info(f"正在转换并导入: {json_file.name}")
+            try:
+                with open(json_file, "r", encoding="utf-8") as handle:
+                    lpmm_data = json.load(handle)
+                memorix_data = converter.convert_lpmm_to_memorix(lpmm_data, json_file.name)
+                total_items = len(memorix_data.get("paragraphs", []))
+                if total_items <= 0:
+                    logger.warning(f"转换结果为空: {json_file.name}")
+                    continue
+
+                task_id = progress.add_task(f"Importing {json_file.name}", total=total_items)
+
+                def update_progress(step: int = 1) -> None:
+                    progress.advance(task_id, advance=step)
+
+                await importer.import_json_data(
+                    memorix_data,
+                    filename=f"lpmm_{json_file.name}",
+                    progress_callback=update_progress,
+                )
+            except Exception as exc:
+                logger.error(f"处理文件 {json_file.name} 失败: {exc}\n{traceback.format_exc()}")
+
+    await importer.close()
+    logger.info("全部处理完成")
+
+
+if __name__ == "__main__":
+    if sys.platform == "win32":  # pragma: no cover
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+    asyncio.run(main())
--- a/plugins/A_memorix/scripts/migrate_maibot_memory.py
+++ b/plugins/A_memorix/scripts/migrate_maibot_memory.py
--- a/plugins/A_memorix/scripts/process_knowledge.py
+++ b/plugins/A_memorix/scripts/process_knowledge.py
@@ -0,0 +1,728 @@
+#!/usr/bin/env python3
+"""
+知识库自动导入脚本 (Strategy-Aware Version)
+
+功能：
+1. 扫描 plugins/A_memorix/data/raw 下的 .txt 文件
+2. 检查 data/import_manifest.json 确认是否已导入
+3. 使用 Strategy 模式处理文件 (Narrative/Factual/Quote)
+4. 将生成的数据直接存入 VectorStore/GraphStore/MetadataStore
+5. 更新 manifest
+"""
+
+import sys
+import os
+import json
+import asyncio
+import time
+import random
+import hashlib
+import tomlkit
+import argparse
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn
+from rich.console import Console
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+
+console = Console()
+
+class LLMGenerationError(Exception):
+    pass
+
+# 路径设置
+current_dir = Path(__file__).resolve().parent
+plugin_root = current_dir.parent
+workspace_root = plugin_root.parent
+maibot_root = workspace_root / "MaiBot"
+for path in (workspace_root, maibot_root, plugin_root):
+    path_str = str(path)
+    if path_str not in sys.path:
+        sys.path.insert(0, path_str)
+
+# 数据目录
+DATA_DIR = plugin_root / "data"
+RAW_DIR = DATA_DIR / "raw"
+PROCESSED_DIR = DATA_DIR / "processed"
+MANIFEST_PATH = DATA_DIR / "import_manifest.json"
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="A_Memorix Knowledge Importer (Strategy-Aware)")
+    parser.add_argument("--force", action="store_true", help="Force re-import")
+    parser.add_argument("--clear-manifest", action="store_true", help="Clear manifest")
+    parser.add_argument(
+        "--type",
+        "-t",
+        default="auto",
+        help="Target import strategy override (auto/narrative/factual/quote)",
+    )
+    parser.add_argument("--concurrency", "-c", type=int, default=5)
+    parser.add_argument(
+        "--chat-log",
+        action="store_true",
+        help="聊天记录导入模式：强制 narrative 策略，并使用 LLM 语义抽取 event_time/event_time_range",
+    )
+    parser.add_argument(
+        "--chat-reference-time",
+        default=None,
+        help="chat_log 模式的相对时间参考点（如 2026/02/12 10:30）；不传则使用当前本地时间",
+    )
+    return parser
+
+
+# --help/-h fast path: avoid heavy host/plugin bootstrap
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    sys.exit(0)
+
+
+try:
+    import A_memorix.core as core_module
+    import A_memorix.core.storage as storage_module
+    from src.common.logger import get_logger
+    from src.services import llm_service as llm_api
+    from src.config.config import global_config, model_config
+
+    VectorStore = core_module.VectorStore
+    GraphStore = core_module.GraphStore
+    MetadataStore = core_module.MetadataStore
+    ImportStrategy = core_module.ImportStrategy
+    create_embedding_api_adapter = core_module.create_embedding_api_adapter
+    RelationWriteService = getattr(core_module, "RelationWriteService", None)
+
+    looks_like_quote_text = storage_module.looks_like_quote_text
+    parse_import_strategy = storage_module.parse_import_strategy
+    resolve_stored_knowledge_type = storage_module.resolve_stored_knowledge_type
+    select_import_strategy = storage_module.select_import_strategy
+
+    from A_memorix.core.utils.time_parser import normalize_time_meta
+    from A_memorix.core.utils.import_payloads import normalize_paragraph_import_item
+    from A_memorix.core.strategies.base import BaseStrategy, ProcessedChunk, KnowledgeType as StratKnowledgeType
+    from A_memorix.core.strategies.narrative import NarrativeStrategy
+    from A_memorix.core.strategies.factual import FactualStrategy
+    from A_memorix.core.strategies.quote import QuoteStrategy
+
+except ImportError as e:
+    print(f"❌ 无法导入模块: {e}")
+    import traceback
+    traceback.print_exc()
+    sys.exit(1)
+
+logger = get_logger("A_Memorix.AutoImport")
+
+
+def _log_before_retry(retry_state) -> None:
+    """使用项目统一日志风格记录重试信息。"""
+    exc = None
+    if getattr(retry_state, "outcome", None) is not None and retry_state.outcome.failed:
+        exc = retry_state.outcome.exception()
+    next_sleep = getattr(getattr(retry_state, "next_action", None), "sleep", None)
+    logger.warning(
+        "LLM 调用即将重试: "
+        f"attempt={getattr(retry_state, 'attempt_number', '?')} "
+        f"next_sleep={next_sleep} "
+        f"error={exc}"
+    )
+
+class AutoImporter:
+    def __init__(
+        self,
+        force: bool = False,
+        clear_manifest: bool = False,
+        target_type: str = "auto",
+        concurrency: int = 5,
+        chat_log: bool = False,
+        chat_reference_time: Optional[str] = None,
+    ):
+        self.vector_store: Optional[VectorStore] = None
+        self.graph_store: Optional[GraphStore] = None
+        self.metadata_store: Optional[MetadataStore] = None
+        self.embedding_manager = None
+        self.relation_write_service = None
+        self.plugin_config = {}
+        self.manifest = {}
+        self.force = force
+        self.clear_manifest = clear_manifest
+        self.chat_log = chat_log
+        parsed_target_type = parse_import_strategy(target_type, default=ImportStrategy.AUTO)
+        self.target_type = ImportStrategy.NARRATIVE.value if chat_log else parsed_target_type.value
+        self.chat_reference_dt = self._parse_reference_time(chat_reference_time)
+        if self.chat_log and parsed_target_type not in {ImportStrategy.AUTO, ImportStrategy.NARRATIVE}:
+            logger.warning(
+                f"chat_log 模式已启用，target_type={target_type} 将被覆盖为 narrative"
+            )
+        self.concurrency_limit = concurrency
+        self.semaphore = None
+        self.storage_lock = None
+
+    async def initialize(self):
+        logger.info(f"正在初始化... (并发数: {self.concurrency_limit})")
+        self.semaphore = asyncio.Semaphore(self.concurrency_limit)
+        self.storage_lock = asyncio.Lock()
+        
+        RAW_DIR.mkdir(parents=True, exist_ok=True)
+        PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
+        
+        if self.clear_manifest:
+            logger.info("🧹 清理 Mainfest")
+            self.manifest = {}
+            self._save_manifest()
+        elif MANIFEST_PATH.exists():
+            try:
+                with open(MANIFEST_PATH, "r", encoding="utf-8") as f:
+                    self.manifest = json.load(f)
+            except Exception:
+                self.manifest = {}
+        
+        config_path = plugin_root / "config.toml"
+        try:
+            with open(config_path, "r", encoding="utf-8") as f:
+                self.plugin_config = tomlkit.load(f)
+        except Exception as e:
+            logger.error(f"加载插件配置失败: {e}")
+            return False
+
+        try:
+            await self._init_stores()
+        except Exception as e:
+            logger.error(f"初始化存储失败: {e}")
+            return False
+            
+        return True
+
+    async def _init_stores(self):
+        # ... (Same as original)
+        self.embedding_manager = create_embedding_api_adapter(
+            batch_size=self.plugin_config.get("embedding", {}).get("batch_size", 32),
+            default_dimension=self.plugin_config.get("embedding", {}).get("dimension", 384),
+            model_name=self.plugin_config.get("embedding", {}).get("model_name", "auto"),
+            retry_config=self.plugin_config.get("embedding", {}).get("retry", {}),
+        )
+        try:
+            dim = await self.embedding_manager._detect_dimension()
+        except:
+            dim = self.embedding_manager.default_dimension
+            
+        q_type_str = str(self.plugin_config.get("embedding", {}).get("quantization_type", "int8") or "int8").lower()
+        # Need to access QuantizationType from storage_module if not imported globally
+        QuantizationType = storage_module.QuantizationType
+        if q_type_str != "int8":
+            raise ValueError(
+                "embedding.quantization_type 在 vNext 仅允许 int8(SQ8)。"
+                " 请先执行 scripts/release_vnext_migrate.py migrate。"
+            )
+
+        self.vector_store = VectorStore(
+            dimension=dim,
+            quantization_type=QuantizationType.INT8,
+            data_dir=DATA_DIR / "vectors"
+        )
+        
+        SparseMatrixFormat = storage_module.SparseMatrixFormat
+        m_fmt_str = self.plugin_config.get("graph", {}).get("sparse_matrix_format", "csr")
+        m_map = {"csr": SparseMatrixFormat.CSR, "csc": SparseMatrixFormat.CSC}
+        
+        self.graph_store = GraphStore(
+            matrix_format=m_map.get(m_fmt_str, SparseMatrixFormat.CSR),
+            data_dir=DATA_DIR / "graph"
+        )
+        
+        self.metadata_store = MetadataStore(data_dir=DATA_DIR / "metadata")
+        self.metadata_store.connect()
+
+        if RelationWriteService is not None:
+            self.relation_write_service = RelationWriteService(
+                metadata_store=self.metadata_store,
+                graph_store=self.graph_store,
+                vector_store=self.vector_store,
+                embedding_manager=self.embedding_manager,
+            )
+        
+        if self.vector_store.has_data(): self.vector_store.load()
+        if self.graph_store.has_data(): self.graph_store.load()
+
+    def _should_write_relation_vectors(self) -> bool:
+        retrieval_cfg = self.plugin_config.get("retrieval", {})
+        if not isinstance(retrieval_cfg, dict):
+            return False
+        rv_cfg = retrieval_cfg.get("relation_vectorization", {})
+        if not isinstance(rv_cfg, dict):
+            return False
+        return bool(rv_cfg.get("enabled", False)) and bool(rv_cfg.get("write_on_import", True))
+
+    def load_file(self, file_path: Path) -> str:
+        with open(file_path, "r", encoding="utf-8") as f:
+            return f.read()
+
+    def get_file_hash(self, content: str) -> str:
+        return hashlib.md5(content.encode("utf-8")).hexdigest()
+    
+    def _parse_reference_time(self, value: Optional[str]) -> datetime:
+        """解析 chat_log 模式的参考时间（用于相对时间语义解析）。"""
+        if not value:
+            return datetime.now()
+        formats = [
+            "%Y/%m/%d %H:%M:%S",
+            "%Y/%m/%d %H:%M",
+            "%Y-%m-%d %H:%M:%S",
+            "%Y-%m-%d %H:%M",
+            "%Y/%m/%d",
+            "%Y-%m-%d",
+        ]
+        text = str(value).strip()
+        for fmt in formats:
+            try:
+                return datetime.strptime(text, fmt)
+            except ValueError:
+                continue
+        logger.warning(
+            f"无法解析 chat_reference_time={value}，将回退为当前本地时间"
+        )
+        return datetime.now()
+
+    async def _extract_chat_time_meta_with_llm(
+        self,
+        text: str,
+        model_config: Any,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        使用 LLM 从聊天文本语义中抽取时间信息。
+        支持将相对时间表达转换为绝对时间。
+        """
+        if not text.strip():
+            return None
+
+        reference_now = self.chat_reference_dt.strftime("%Y/%m/%d %H:%M")
+        prompt = f"""You are a time extraction engine for chat logs.
+Extract temporal information from the following chat paragraph.
+
+Rules:
+1. Use semantic understanding, not regex matching.
+2. Convert relative expressions (e.g., yesterday evening, last Friday morning) to absolute local datetime using reference_now.
+3. If a time span exists, return event_time_start/event_time_end.
+4. If only one point in time exists, return event_time.
+5. If no reliable time can be inferred, return all time fields as null.
+6. Output ONLY valid JSON. No markdown, no explanation.
+
+reference_now: {reference_now}
+timezone: local system timezone
+
+Allowed output formats for time values:
+- "YYYY/MM/DD"
+- "YYYY/MM/DD HH:mm"
+
+JSON schema:
+{{
+  "event_time": null,
+  "event_time_start": null,
+  "event_time_end": null,
+  "time_range": null,
+  "time_granularity": "day",
+  "time_confidence": 0.0
+}}
+
+Chat paragraph:
+\"\"\"{text}\"\"\"
+"""
+        try:
+            result = await self._llm_call(prompt, model_config)
+        except Exception as e:
+            logger.warning(f"chat_log 时间语义抽取失败: {e}")
+            return None
+
+        if not isinstance(result, dict):
+            return None
+
+        raw_time_meta = {
+            "event_time": result.get("event_time"),
+            "event_time_start": result.get("event_time_start"),
+            "event_time_end": result.get("event_time_end"),
+            "time_range": result.get("time_range"),
+            "time_granularity": result.get("time_granularity"),
+            "time_confidence": result.get("time_confidence"),
+        }
+        try:
+            normalized = normalize_time_meta(raw_time_meta)
+        except Exception as e:
+            logger.warning(f"chat_log 时间语义抽取结果不可用，已忽略: {e}")
+            return None
+
+        has_effective_time = any(
+            key in normalized
+            for key in ("event_time", "event_time_start", "event_time_end")
+        )
+        if not has_effective_time:
+            return None
+
+        return normalized
+
+    def _determine_strategy(self, filename: str, content: str) -> BaseStrategy:
+        """Layer 1: Global Strategy Routing"""
+        strategy = select_import_strategy(
+            content,
+            override=self.target_type,
+            chat_log=self.chat_log,
+        )
+        if self.chat_log:
+            logger.info(f"chat_log 模式: {filename} 强制使用 NarrativeStrategy")
+        elif strategy == ImportStrategy.QUOTE:
+            logger.info(f"Auto-detected Quote/Lyric type for {filename}")
+
+        if strategy == ImportStrategy.FACTUAL:
+            return FactualStrategy(filename)
+        if strategy == ImportStrategy.QUOTE:
+            return QuoteStrategy(filename)
+        return NarrativeStrategy(filename)
+
+    def _chunk_rescue(self, chunk: ProcessedChunk, filename: str) -> Optional[BaseStrategy]:
+        """Layer 2: Chunk-level rescue strategies"""
+        # If we are already in Quote strategy, no need to rescue
+        if chunk.type == StratKnowledgeType.QUOTE:
+            return None
+
+        if looks_like_quote_text(chunk.chunk.text):
+            logger.info(f"  > Rescuing chunk {chunk.chunk.index} as Quote")
+            return QuoteStrategy(filename)
+
+        return None
+
+    async def process_and_import(self):
+        if not await self.initialize(): return
+
+        files = list(RAW_DIR.glob("*.txt"))
+        logger.info(f"扫描到 {len(files)} 个文件 in {RAW_DIR}")
+
+        if not files: return
+
+        tasks = []
+        for file_path in files:
+            tasks.append(asyncio.create_task(self._process_single_file(file_path)))
+            
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        success_count = sum(1 for r in results if r is True)
+        logger.info(f"本次主处理完成，共成功处理 {success_count}/{len(files)} 个文件")
+        
+        if self.vector_store: self.vector_store.save()
+        if self.graph_store: self.graph_store.save()
+
+    async def _process_single_file(self, file_path: Path) -> bool:
+        filename = file_path.name
+        async with self.semaphore:
+            try:
+                content = self.load_file(file_path)
+                file_hash = self.get_file_hash(content)
+                
+                if not self.force and filename in self.manifest:
+                    record = self.manifest[filename]
+                    if record.get("hash") == file_hash and record.get("imported"):
+                        logger.info(f"跳过已导入文件: {filename}")
+                        return False
+                
+                logger.info(f">>> 开始处理: {filename}")
+                
+                # 1. Strategy Selection
+                strategy = self._determine_strategy(filename, content)
+                logger.info(f"  策略: {strategy.__class__.__name__}")
+                
+                # 2. Split (Strategy-Aware)
+                initial_chunks = strategy.split(content)
+                logger.info(f"  初步分块: {len(initial_chunks)}")
+                
+                processed_data = {"paragraphs": [], "entities": [], "relations": []}
+                
+                # 3. Extract Loop
+                model_config = await self._select_model()
+                
+                for i, chunk in enumerate(initial_chunks):
+                    current_strategy = strategy
+                    # Layer 2: Chunk Rescue
+                    rescue_strategy = self._chunk_rescue(chunk, filename)
+                    if rescue_strategy:
+                        # Re-split? No, just re-process this text as a single chunk using the rescue strategy
+                        # But rescue strategy might want to split it further?
+                        # Simplification: Treat the whole chunk text as one block for the rescue strategy 
+                        # OR create a single chunk object for it.
+                        # Creating a new chunk using rescue strategy logic might be complex if split behavior differs.
+                        # Let's just instantiate a chunk of the new type manually
+                        chunk.type = StratKnowledgeType.QUOTE
+                        chunk.flags.verbatim = True
+                        chunk.flags.requires_llm = False # Quotes don't usually need LLM
+                        current_strategy = rescue_strategy
+                    
+                    # Extraction
+                    if chunk.flags.requires_llm:
+                        result_chunk = await current_strategy.extract(chunk, lambda p: self._llm_call(p, model_config))
+                    else:
+                         # For quotes, extract might be just pass through or regex
+                        result_chunk = await current_strategy.extract(chunk)
+                    
+                    time_meta = None
+                    if self.chat_log:
+                        time_meta = await self._extract_chat_time_meta_with_llm(
+                            result_chunk.chunk.text,
+                            model_config,
+                        )
+
+                    # Normalize Data
+                    self._normalize_and_aggregate(
+                        result_chunk,
+                        processed_data,
+                        time_meta=time_meta,
+                    )
+                    
+                    logger.info(f"  已处理块 {i+1}/{len(initial_chunks)}")
+                
+                # 4. Save Json
+                json_path = PROCESSED_DIR / f"{file_path.stem}.json"
+                with open(json_path, "w", encoding="utf-8") as f:
+                    json.dump(processed_data, f, ensure_ascii=False, indent=2)
+                
+                # 5. Import to DB
+                async with self.storage_lock:
+                    await self._import_to_db(processed_data)
+                    
+                    self.manifest[filename] = {
+                        "hash": file_hash,
+                        "timestamp": time.time(),
+                        "imported": True
+                    }
+                    self._save_manifest()
+                    self.vector_store.save()
+                    self.graph_store.save()
+                    logger.info(f"✅ 文件 {filename} 处理并导入完成")
+                    return True
+
+            except Exception as e:
+                logger.error(f"❌ 处理失败 {filename}: {e}")
+                import traceback
+                traceback.print_exc()
+                return False
+
+    def _normalize_and_aggregate(
+        self,
+        chunk: ProcessedChunk,
+        all_data: Dict,
+        time_meta: Optional[Dict[str, Any]] = None,
+    ):
+        """Convert strategy-specific data to unified generic format for storage."""
+        # Generic fields
+        para_item = {
+            "content": chunk.chunk.text,
+            "source": chunk.source.file,
+            "knowledge_type": resolve_stored_knowledge_type(
+                chunk.type.value,
+                content=chunk.chunk.text,
+            ).value,
+            "entities": [],
+            "relations": []
+        }
+        
+        data = chunk.data
+        
+        # 1. Triples (Factual)
+        if "triples" in data:
+            for t in data["triples"]:
+                para_item["relations"].append({
+                    "subject": t.get("subject"),
+                    "predicate": t.get("predicate"),
+                    "object": t.get("object")
+                })
+                # Auto-add entities from triples
+                para_item["entities"].extend([t.get("subject"), t.get("object")])
+        
+        # 2. Events & Relations (Narrative)
+        if "events" in data:
+            # Store events as content/metadata? Or entities?
+            # For now maybe just keep them in logic, or add as 'Event' entities?
+            # Creating entities for events is good.
+            para_item["entities"].extend(data["events"])
+        
+        if "relations" in data: # Narrative also outputs relations list
+             para_item["relations"].extend(data["relations"])
+             for r in data["relations"]:
+                 para_item["entities"].extend([r.get("subject"), r.get("object")])
+
+        # 3. Verbatim Entities (Quote)
+        if "verbatim_entities" in data:
+            para_item["entities"].extend(data["verbatim_entities"])
+            
+        # Dedupe per paragraph
+        para_item["entities"] = list(set([e for e in para_item["entities"] if e]))
+
+        if time_meta:
+            para_item["time_meta"] = time_meta
+        
+        all_data["paragraphs"].append(para_item)
+        all_data["entities"].extend(para_item["entities"])
+        if "relations" in para_item:
+             all_data["relations"].extend(para_item["relations"])
+
+    @retry(
+        retry=retry_if_exception_type((LLMGenerationError, json.JSONDecodeError)),
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        before_sleep=_log_before_retry
+    )
+    async def _llm_call(self, prompt: str, model_config: Any) -> Dict:
+        """Generic LLM Caller"""
+        success, response, _, _ = await llm_api.generate_with_model(
+            prompt=prompt,
+            model_config=model_config,
+            request_type="Script.ProcessKnowledge"
+        )
+        if success:
+            txt = response.strip()
+            if "```" in txt:
+                txt = txt.split("```json")[-1].split("```")[0].strip()
+            try:
+                return json.loads(txt)
+            except json.JSONDecodeError:
+                # Fallback: try to find first { and last }
+                start = txt.find('{')
+                end = txt.rfind('}')
+                if start != -1 and end != -1:
+                    return json.loads(txt[start:end+1])
+                raise
+        else:
+            raise LLMGenerationError("LLM generation failed")
+
+    async def _select_model(self) -> Any:
+        models = llm_api.get_available_models()
+        if not models: raise ValueError("No LLM models")
+        
+        config_model = self.plugin_config.get("advanced", {}).get("extraction_model", "auto")
+        if config_model != "auto" and config_model in models:
+            return models[config_model]
+            
+        for task_key in ["lpmm_entity_extract", "lpmm_rdf_build", "embedding"]:
+            if task_key in models: return models[task_key]
+            
+        return models[list(models.keys())[0]]
+
+    # Re-use existing methods
+    async def _add_entity_with_vector(self, name: str, source_paragraph: Optional[str] = None) -> str:
+        # Same as before
+        hash_value = self.metadata_store.add_entity(name, source_paragraph=source_paragraph)
+        self.graph_store.add_nodes([name])
+        try:
+            emb = await self.embedding_manager.encode(name)
+            try:
+                self.vector_store.add(emb.reshape(1, -1), [hash_value])
+            except ValueError: pass
+        except Exception: pass
+        return hash_value
+
+    async def import_json_data(self, data: Dict, filename: str = "script_import", progress_callback=None):
+        """Public import entrypoint for pre-processed JSON payloads."""
+        if not self.storage_lock:
+            raise RuntimeError("Importer is not initialized. Call initialize() first.")
+
+        async with self.storage_lock:
+            await self._import_to_db(data, progress_callback=progress_callback)
+            self.manifest[filename] = {
+                "hash": self.get_file_hash(json.dumps(data, ensure_ascii=False, sort_keys=True)),
+                "timestamp": time.time(),
+                "imported": True,
+            }
+            self._save_manifest()
+            self.vector_store.save()
+            self.graph_store.save()
+
+    async def _import_to_db(self, data: Dict, progress_callback=None):
+        # Same logic, but ensure robust
+        with self.graph_store.batch_update():
+            for item in data.get("paragraphs", []):
+                paragraph = normalize_paragraph_import_item(
+                    item,
+                    default_source="script",
+                )
+                content = paragraph["content"]
+                source = paragraph["source"]
+                k_type_val = paragraph["knowledge_type"]
+
+                h_val = self.metadata_store.add_paragraph(
+                    content=content,
+                    source=source,
+                    knowledge_type=k_type_val,
+                    time_meta=paragraph["time_meta"],
+                )
+                
+                if h_val not in self.vector_store:
+                    try:
+                        emb = await self.embedding_manager.encode(content)
+                        self.vector_store.add(emb.reshape(1, -1), [h_val])
+                    except Exception as e:
+                        logger.error(f"  Vector fail: {e}")
+
+                para_entities = paragraph["entities"]
+                for entity in para_entities:
+                    if entity:
+                        await self._add_entity_with_vector(entity, source_paragraph=h_val)
+                
+                para_relations = paragraph["relations"]
+                for rel in para_relations:
+                    s, p, o = rel.get("subject"), rel.get("predicate"), rel.get("object")
+                    if s and p and o:
+                        await self._add_entity_with_vector(s, source_paragraph=h_val)
+                        await self._add_entity_with_vector(o, source_paragraph=h_val)
+                        confidence = float(rel.get("confidence", 1.0) or 1.0)
+                        rel_meta = rel.get("metadata", {})
+                        write_vector = self._should_write_relation_vectors()
+                        if self.relation_write_service is not None:
+                            await self.relation_write_service.upsert_relation_with_vector(
+                                subject=s,
+                                predicate=p,
+                                obj=o,
+                                confidence=confidence,
+                                source_paragraph=h_val,
+                                metadata=rel_meta if isinstance(rel_meta, dict) else {},
+                                write_vector=write_vector,
+                            )
+                        else:
+                            rel_hash = self.metadata_store.add_relation(
+                                s,
+                                p,
+                                o,
+                                confidence=confidence,
+                                source_paragraph=h_val,
+                                metadata=rel_meta if isinstance(rel_meta, dict) else {},
+                            )
+                            self.graph_store.add_edges([(s, o)], relation_hashes=[rel_hash])
+                            try:
+                                self.metadata_store.set_relation_vector_state(rel_hash, "none")
+                            except Exception:
+                                pass
+                        
+                if progress_callback: progress_callback(1)
+    
+    async def close(self):
+        if self.metadata_store: self.metadata_store.close()
+    
+    def _save_manifest(self):
+        with open(MANIFEST_PATH, "w", encoding="utf-8") as f:
+            json.dump(self.manifest, f, ensure_ascii=False, indent=2)
+
+async def main():
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+
+    if not global_config: return
+    
+    importer = AutoImporter(
+        force=args.force, 
+        clear_manifest=args.clear_manifest, 
+        target_type=args.type,
+        concurrency=args.concurrency,
+        chat_log=args.chat_log,
+        chat_reference_time=args.chat_reference_time,
+    )
+    await importer.process_and_import()
+    await importer.close()
+
+if __name__ == "__main__":
+    if sys.platform == "win32":
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+    asyncio.run(main())
--- a/plugins/A_memorix/scripts/rebuild_episodes.py
+++ b/plugins/A_memorix/scripts/rebuild_episodes.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+"""Episode source 级重建工具。"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+WORKSPACE_ROOT = PLUGIN_ROOT.parent
+MAIBOT_ROOT = WORKSPACE_ROOT / "MaiBot"
+for path in (WORKSPACE_ROOT, MAIBOT_ROOT, PLUGIN_ROOT):
+    path_str = str(path)
+    if path_str not in sys.path:
+        sys.path.insert(0, path_str)
+
+try:
+    import tomlkit  # type: ignore
+except Exception:  # pragma: no cover
+    tomlkit = None
+
+from A_memorix.core.storage import MetadataStore
+from A_memorix.core.utils.episode_service import EpisodeService
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Rebuild A_Memorix episodes by source")
+    parser.add_argument("--data-dir", default=str(PLUGIN_ROOT / "data"), help="插件数据目录")
+    parser.add_argument("--source", type=str, help="指定单个 source 入队/重建")
+    parser.add_argument("--all", action="store_true", help="对所有 source 入队/重建")
+    parser.add_argument("--wait", action="store_true", help="在脚本内同步执行重建")
+    return parser
+
+
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    raise SystemExit(0)
+
+
+def _load_plugin_config() -> Dict[str, Any]:
+    config_path = PLUGIN_ROOT / "config.toml"
+    if tomlkit is None or not config_path.exists():
+        return {}
+    try:
+        with open(config_path, "r", encoding="utf-8") as handle:
+            parsed = tomlkit.load(handle)
+        return dict(parsed) if isinstance(parsed, dict) else {}
+    except Exception:
+        return {}
+
+
+def _resolve_sources(store: MetadataStore, *, source: str | None, rebuild_all: bool) -> List[str]:
+    if rebuild_all:
+        return list(store.list_episode_sources_for_rebuild())
+    token = str(source or "").strip()
+    if not token:
+        raise ValueError("必须提供 --source 或 --all")
+    return [token]
+
+
+async def _run_rebuilds(store: MetadataStore, plugin_config: Dict[str, Any], sources: List[str]) -> int:
+    service = EpisodeService(metadata_store=store, plugin_config=plugin_config)
+    failures: List[str] = []
+    for source in sources:
+        started = store.mark_episode_source_running(source)
+        if not started:
+            failures.append(f"{source}: unable_to_mark_running")
+            continue
+        try:
+            result = await service.rebuild_source(source)
+            store.mark_episode_source_done(source)
+            print(
+                "rebuilt"
+                f" source={source}"
+                f" paragraphs={int(result.get('paragraph_count') or 0)}"
+                f" groups={int(result.get('group_count') or 0)}"
+                f" episodes={int(result.get('episode_count') or 0)}"
+                f" fallback={int(result.get('fallback_count') or 0)}"
+            )
+        except Exception as exc:
+            err = str(exc)[:500]
+            store.mark_episode_source_failed(source, err)
+            failures.append(f"{source}: {err}")
+            print(f"failed source={source} error={err}")
+
+    if failures:
+        for item in failures:
+            print(item)
+        return 1
+    return 0
+
+
+def main() -> int:
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+    if bool(args.all) == bool(args.source):
+        parser.error("必须且只能选择一个：--source 或 --all")
+
+    store = MetadataStore(data_dir=Path(args.data_dir) / "metadata")
+    store.connect()
+    try:
+        sources = _resolve_sources(store, source=args.source, rebuild_all=bool(args.all))
+        if not sources:
+            print("no sources to rebuild")
+            return 0
+
+        enqueued = 0
+        reason = "script_rebuild_all" if args.all else "script_rebuild_source"
+        for source in sources:
+            enqueued += int(store.enqueue_episode_source_rebuild(source, reason=reason))
+        print(f"enqueued={enqueued} sources={len(sources)}")
+
+        if not args.wait:
+            return 0
+
+        plugin_config = _load_plugin_config()
+        return asyncio.run(_run_rebuilds(store, plugin_config, sources))
+    finally:
+        store.close()
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/plugins/A_memorix/scripts/release_vnext_migrate.py
+++ b/plugins/A_memorix/scripts/release_vnext_migrate.py
@@ -0,0 +1,731 @@
+#!/usr/bin/env python3
+"""
+vNext release migration entrypoint for A_Memorix.
+
+Subcommands:
+- preflight: detect legacy config/data/schema risks
+- migrate: offline migrate config + vectors + metadata schema + graph edge hash map
+- verify: strict post-migration consistency checks
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import pickle
+import sqlite3
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
+import tomlkit
+
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+PROJECT_ROOT = PLUGIN_ROOT.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PLUGIN_ROOT))
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="A_Memorix vNext release migration tool")
+    parser.add_argument(
+        "--config",
+        default=str(PLUGIN_ROOT / "config.toml"),
+        help="config.toml path (default: plugins/A_memorix/config.toml)",
+    )
+    parser.add_argument(
+        "--data-dir",
+        default="",
+        help="optional data dir override; default resolved from config.storage.data_dir",
+    )
+    parser.add_argument("--json-out", default="", help="optional JSON report output path")
+
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_preflight = sub.add_parser("preflight", help="scan legacy risks")
+    p_preflight.add_argument("--strict", action="store_true", help="return 1 if any error check exists")
+
+    p_migrate = sub.add_parser("migrate", help="run offline migration")
+    p_migrate.add_argument("--dry-run", action="store_true", help="only print planned changes")
+    p_migrate.add_argument(
+        "--verify-after",
+        action="store_true",
+        help="run verify automatically after migrate",
+    )
+
+    p_verify = sub.add_parser("verify", help="post-migration verification")
+    p_verify.add_argument("--strict", action="store_true", help="return 1 if any error check exists")
+    return parser
+
+
+# --help/-h fast path: avoid heavy host/plugin bootstrap
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    raise SystemExit(0)
+
+try:
+    from core.storage import GraphStore, KnowledgeType, MetadataStore, QuantizationType, VectorStore
+    from core.storage.metadata_store import SCHEMA_VERSION
+except Exception as e:  # pragma: no cover
+    print(f"❌ failed to import storage modules: {e}")
+    raise SystemExit(2)
+
+
+@dataclass
+class CheckItem:
+    code: str
+    level: str
+    message: str
+    details: Optional[Dict[str, Any]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        out = {
+            "code": self.code,
+            "level": self.level,
+            "message": self.message,
+        }
+        if self.details:
+            out["details"] = self.details
+        return out
+
+
+def _read_toml(path: Path) -> Dict[str, Any]:
+    text = path.read_text(encoding="utf-8")
+    return tomlkit.parse(text)
+
+
+def _write_toml(path: Path, data: Dict[str, Any]) -> None:
+    path.write_text(tomlkit.dumps(data), encoding="utf-8")
+
+
+def _get_nested(obj: Dict[str, Any], keys: Sequence[str], default: Any = None) -> Any:
+    cur: Any = obj
+    for k in keys:
+        if not isinstance(cur, dict) or k not in cur:
+            return default
+        cur = cur[k]
+    return cur
+
+
+def _ensure_table(obj: Dict[str, Any], key: str) -> Dict[str, Any]:
+    if key not in obj or not isinstance(obj[key], dict):
+        obj[key] = tomlkit.table()
+    return obj[key]
+
+
+def _resolve_data_dir(config_doc: Dict[str, Any], explicit_data_dir: Optional[str]) -> Path:
+    if explicit_data_dir:
+        return Path(explicit_data_dir).expanduser().resolve()
+    raw = str(_get_nested(config_doc, ("storage", "data_dir"), "./data") or "./data").strip()
+    if raw.startswith("."):
+        return (PLUGIN_ROOT / raw).resolve()
+    return Path(raw).expanduser().resolve()
+
+
+def _sqlite_table_exists(conn: sqlite3.Connection, table: str) -> bool:
+    row = conn.execute(
+        "SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
+        (table,),
+    ).fetchone()
+    return row is not None
+
+
+def _collect_hash_alias_conflicts(conn: sqlite3.Connection) -> Dict[str, List[str]]:
+    hashes: List[str] = []
+    if _sqlite_table_exists(conn, "relations"):
+        rows = conn.execute("SELECT hash FROM relations").fetchall()
+        hashes.extend(str(r[0]) for r in rows if r and r[0])
+    if _sqlite_table_exists(conn, "deleted_relations"):
+        rows = conn.execute("SELECT hash FROM deleted_relations").fetchall()
+        hashes.extend(str(r[0]) for r in rows if r and r[0])
+
+    alias_map: Dict[str, str] = {}
+    conflicts: Dict[str, set[str]] = {}
+    for h in hashes:
+        if len(h) != 64:
+            continue
+        alias = h[:32]
+        old = alias_map.get(alias)
+        if old is None:
+            alias_map[alias] = h
+            continue
+        if old != h:
+            conflicts.setdefault(alias, set()).update({old, h})
+    return {k: sorted(v) for k, v in conflicts.items()}
+
+
+def _collect_invalid_knowledge_types(conn: sqlite3.Connection) -> List[str]:
+    if not _sqlite_table_exists(conn, "paragraphs"):
+        return []
+
+    allowed = {item.value for item in KnowledgeType}
+    rows = conn.execute("SELECT DISTINCT knowledge_type FROM paragraphs").fetchall()
+    invalid: List[str] = []
+    for row in rows:
+        raw = row[0]
+        value = str(raw).strip().lower() if raw is not None else ""
+        if value not in allowed:
+            invalid.append(str(raw) if raw is not None else "")
+    return sorted(set(invalid))
+
+
+def _guess_vector_dimension(config_doc: Dict[str, Any], vectors_dir: Path) -> int:
+    meta_path = vectors_dir / "vectors_metadata.pkl"
+    if meta_path.exists():
+        try:
+            with open(meta_path, "rb") as f:
+                meta = pickle.load(f)
+            dim = int(meta.get("dimension", 0))
+            if dim > 0:
+                return dim
+        except Exception:
+            pass
+    try:
+        dim_cfg = int(_get_nested(config_doc, ("embedding", "dimension"), 1024))
+        if dim_cfg > 0:
+            return dim_cfg
+    except Exception:
+        pass
+    return 1024
+
+
+def _preflight_impl(config_path: Path, data_dir: Path) -> Dict[str, Any]:
+    checks: List[CheckItem] = []
+    facts: Dict[str, Any] = {
+        "config_path": str(config_path),
+        "data_dir": str(data_dir),
+    }
+
+    if not config_path.exists():
+        checks.append(CheckItem("CFG-00", "error", f"config not found: {config_path}"))
+        return {"ok": False, "checks": [c.to_dict() for c in checks], "facts": facts}
+
+    config_doc = _read_toml(config_path)
+    tool_mode = str(_get_nested(config_doc, ("routing", "tool_search_mode"), "forward") or "").strip().lower()
+    summary_model = _get_nested(config_doc, ("summarization", "model_name"), ["auto"])
+    summary_knowledge_type = str(
+        _get_nested(config_doc, ("summarization", "default_knowledge_type"), "narrative") or "narrative"
+    ).strip().lower()
+    quantization = str(_get_nested(config_doc, ("embedding", "quantization_type"), "int8") or "").strip().lower()
+
+    facts["routing.tool_search_mode"] = tool_mode
+    facts["summarization.model_name_type"] = type(summary_model).__name__
+    facts["summarization.default_knowledge_type"] = summary_knowledge_type
+    facts["embedding.quantization_type"] = quantization
+
+    if tool_mode == "legacy":
+        checks.append(
+            CheckItem(
+                "CP-04",
+                "error",
+                "routing.tool_search_mode=legacy is no longer accepted at runtime",
+            )
+        )
+    elif tool_mode not in {"forward", "disabled"}:
+        checks.append(
+            CheckItem(
+                "CP-04",
+                "error",
+                f"routing.tool_search_mode invalid value: {tool_mode}",
+            )
+        )
+
+    if isinstance(summary_model, str):
+        checks.append(
+            CheckItem(
+                "CP-11",
+                "error",
+                "summarization.model_name must be List[str], string legacy format detected",
+            )
+        )
+    elif not isinstance(summary_model, list) or any(not isinstance(x, str) for x in summary_model):
+        checks.append(
+            CheckItem(
+                "CP-11",
+                "error",
+                "summarization.model_name must be List[str]",
+            )
+        )
+
+    if summary_knowledge_type not in {item.value for item in KnowledgeType}:
+        checks.append(
+            CheckItem(
+                "CP-13",
+                "error",
+                f"invalid summarization.default_knowledge_type: {summary_knowledge_type}",
+            )
+        )
+
+    if quantization != "int8":
+        checks.append(
+            CheckItem(
+                "UG-07",
+                "error",
+                "embedding.quantization_type must be int8 in vNext",
+            )
+        )
+
+    vectors_dir = data_dir / "vectors"
+    npy_path = vectors_dir / "vectors.npy"
+    bin_path = vectors_dir / "vectors.bin"
+    ids_bin_path = vectors_dir / "vectors_ids.bin"
+    facts["vectors.npy_exists"] = npy_path.exists()
+    facts["vectors.bin_exists"] = bin_path.exists()
+    facts["vectors_ids.bin_exists"] = ids_bin_path.exists()
+
+    if npy_path.exists() and not (bin_path.exists() and ids_bin_path.exists()):
+        checks.append(
+            CheckItem(
+                "CP-07",
+                "error",
+                "legacy vectors.npy detected; offline migrate required",
+                {"npy_path": str(npy_path)},
+            )
+        )
+
+    metadata_db = data_dir / "metadata" / "metadata.db"
+    facts["metadata_db_exists"] = metadata_db.exists()
+    relation_count = 0
+    if metadata_db.exists():
+        conn = sqlite3.connect(str(metadata_db))
+        try:
+            has_schema_table = _sqlite_table_exists(conn, "schema_migrations")
+            facts["schema_migrations_exists"] = has_schema_table
+            if not has_schema_table:
+                checks.append(
+                    CheckItem(
+                        "CP-08",
+                        "error",
+                        "schema_migrations table missing (legacy metadata schema)",
+                    )
+                )
+            else:
+                row = conn.execute("SELECT MAX(version) FROM schema_migrations").fetchone()
+                version = int(row[0]) if row and row[0] is not None else 0
+                facts["schema_version"] = version
+                if version != SCHEMA_VERSION:
+                    checks.append(
+                        CheckItem(
+                            "CP-08",
+                            "error",
+                            f"schema version mismatch: current={version}, expected={SCHEMA_VERSION}",
+                        )
+                    )
+
+            if _sqlite_table_exists(conn, "relations"):
+                row = conn.execute("SELECT COUNT(*) FROM relations").fetchone()
+                relation_count = int(row[0]) if row and row[0] is not None else 0
+            facts["relations_count"] = relation_count
+
+            conflicts = _collect_hash_alias_conflicts(conn)
+            facts["alias_conflict_count"] = len(conflicts)
+            if conflicts:
+                checks.append(
+                    CheckItem(
+                        "CP-05",
+                        "error",
+                        "32-bit relation hash alias conflict detected",
+                        {"aliases": sorted(conflicts.keys())[:20], "total": len(conflicts)},
+                    )
+                )
+
+            invalid_knowledge_types = _collect_invalid_knowledge_types(conn)
+            facts["invalid_knowledge_type_values"] = invalid_knowledge_types
+            if invalid_knowledge_types:
+                checks.append(
+                    CheckItem(
+                        "CP-12",
+                        "error",
+                        "invalid paragraph knowledge_type values detected",
+                        {"values": invalid_knowledge_types[:20], "total": len(invalid_knowledge_types)},
+                    )
+                )
+        finally:
+            conn.close()
+    else:
+        checks.append(
+            CheckItem(
+                "META-00",
+                "warning",
+                "metadata.db not found, schema checks skipped",
+            )
+        )
+
+    graph_meta_path = data_dir / "graph" / "graph_metadata.pkl"
+    facts["graph_metadata_exists"] = graph_meta_path.exists()
+    if relation_count > 0:
+        if not graph_meta_path.exists():
+            checks.append(
+                CheckItem(
+                    "CP-06",
+                    "error",
+                    "relations exist but graph metadata missing",
+                )
+            )
+        else:
+            try:
+                with open(graph_meta_path, "rb") as f:
+                    graph_meta = pickle.load(f)
+                edge_hash_map = graph_meta.get("edge_hash_map", {})
+                edge_hash_map_size = len(edge_hash_map) if isinstance(edge_hash_map, dict) else 0
+                facts["edge_hash_map_size"] = edge_hash_map_size
+                if edge_hash_map_size <= 0:
+                    checks.append(
+                        CheckItem(
+                            "CP-06",
+                            "error",
+                            "edge_hash_map missing/empty while relations exist",
+                        )
+                    )
+            except Exception as e:
+                checks.append(
+                    CheckItem(
+                        "CP-06",
+                        "error",
+                        f"failed to read graph metadata: {e}",
+                    )
+                )
+
+    has_error = any(c.level == "error" for c in checks)
+    return {
+        "ok": not has_error,
+        "checks": [c.to_dict() for c in checks],
+        "facts": facts,
+    }
+
+
+def _migrate_config(config_doc: Dict[str, Any]) -> Dict[str, Any]:
+    changes: Dict[str, Any] = {}
+
+    routing = _ensure_table(config_doc, "routing")
+    mode_raw = str(routing.get("tool_search_mode", "forward") or "").strip().lower()
+    mode_new = mode_raw
+    if mode_raw == "legacy" or mode_raw not in {"forward", "disabled"}:
+        mode_new = "forward"
+    if mode_new != mode_raw:
+        routing["tool_search_mode"] = mode_new
+        changes["routing.tool_search_mode"] = {"old": mode_raw, "new": mode_new}
+
+    summary = _ensure_table(config_doc, "summarization")
+    summary_model = summary.get("model_name", ["auto"])
+    if isinstance(summary_model, str):
+        normalized = [summary_model.strip() or "auto"]
+        summary["model_name"] = normalized
+        changes["summarization.model_name"] = {"old": summary_model, "new": normalized}
+    elif not isinstance(summary_model, list):
+        normalized = ["auto"]
+        summary["model_name"] = normalized
+        changes["summarization.model_name"] = {"old": str(type(summary_model)), "new": normalized}
+    elif any(not isinstance(x, str) for x in summary_model):
+        normalized = [str(x).strip() for x in summary_model if str(x).strip()]
+        if not normalized:
+            normalized = ["auto"]
+        summary["model_name"] = normalized
+        changes["summarization.model_name"] = {"old": summary_model, "new": normalized}
+
+    default_knowledge_type = str(summary.get("default_knowledge_type", "narrative") or "").strip().lower()
+    allowed_knowledge_types = {item.value for item in KnowledgeType}
+    if default_knowledge_type not in allowed_knowledge_types:
+        summary["default_knowledge_type"] = "narrative"
+        changes["summarization.default_knowledge_type"] = {
+            "old": default_knowledge_type,
+            "new": "narrative",
+        }
+
+    embedding = _ensure_table(config_doc, "embedding")
+    quantization = str(embedding.get("quantization_type", "int8") or "").strip().lower()
+    if quantization != "int8":
+        embedding["quantization_type"] = "int8"
+        changes["embedding.quantization_type"] = {"old": quantization, "new": "int8"}
+
+    return changes
+
+
+def _migrate_impl(config_path: Path, data_dir: Path, dry_run: bool) -> Dict[str, Any]:
+    config_doc = _read_toml(config_path)
+    result: Dict[str, Any] = {
+        "config_path": str(config_path),
+        "data_dir": str(data_dir),
+        "dry_run": bool(dry_run),
+        "steps": {},
+    }
+
+    config_changes = _migrate_config(config_doc)
+    result["steps"]["config"] = {"changed": bool(config_changes), "changes": config_changes}
+    if config_changes and not dry_run:
+        _write_toml(config_path, config_doc)
+
+    vectors_dir = data_dir / "vectors"
+    vectors_dir.mkdir(parents=True, exist_ok=True)
+    npy_path = vectors_dir / "vectors.npy"
+    bin_path = vectors_dir / "vectors.bin"
+    ids_bin_path = vectors_dir / "vectors_ids.bin"
+    if npy_path.exists() and not (bin_path.exists() and ids_bin_path.exists()):
+        if dry_run:
+            result["steps"]["vector"] = {"migrated": False, "reason": "dry_run"}
+        else:
+            dim = _guess_vector_dimension(config_doc, vectors_dir)
+            store = VectorStore(
+                dimension=max(1, int(dim)),
+                quantization_type=QuantizationType.INT8,
+                data_dir=vectors_dir,
+            )
+            result["steps"]["vector"] = store.migrate_legacy_npy(vectors_dir)
+    else:
+        result["steps"]["vector"] = {"migrated": False, "reason": "not_required"}
+
+    metadata_dir = data_dir / "metadata"
+    metadata_dir.mkdir(parents=True, exist_ok=True)
+    metadata_db = metadata_dir / "metadata.db"
+    triples: List[Tuple[str, str, str, str]] = []
+    relation_count = 0
+
+    metadata_result: Dict[str, Any] = {"migrated": False, "reason": "not_required"}
+    if metadata_db.exists():
+        store = MetadataStore(data_dir=metadata_dir)
+        store.connect(enforce_schema=False)
+        try:
+            if dry_run:
+                metadata_result = {"migrated": False, "reason": "dry_run"}
+            else:
+                metadata_result = store.run_legacy_migration_for_vnext()
+            relation_count = int(store.count_relations())
+            if relation_count > 0:
+                triples = [(str(s), str(p), str(o), str(h)) for s, p, o, h in store.get_all_triples()]
+        finally:
+            store.close()
+    result["steps"]["metadata"] = metadata_result
+
+    graph_dir = data_dir / "graph"
+    graph_dir.mkdir(parents=True, exist_ok=True)
+    graph_matrix_format = str(_get_nested(config_doc, ("graph", "sparse_matrix_format"), "csr") or "csr")
+    graph_store = GraphStore(matrix_format=graph_matrix_format, data_dir=graph_dir)
+    graph_step: Dict[str, Any] = {
+        "rebuilt": False,
+        "mapped_hashes": 0,
+        "relation_count": relation_count,
+        "topology_rebuilt_from_relations": False,
+    }
+    if relation_count > 0:
+        if dry_run:
+            graph_step["reason"] = "dry_run"
+        else:
+            if graph_store.has_data():
+                graph_store.load()
+
+            mapped = graph_store.rebuild_edge_hash_map(triples)
+
+            # 兜底：历史数据里 graph 节点/边与 relations 脱节时，直接从 relations 重建图。
+            if mapped <= 0 or not graph_store.has_edge_hash_map():
+                nodes = sorted({s for s, _, o, _ in triples} | {o for _, _, o, _ in triples})
+                edges = [(s, o) for s, _, o, _ in triples]
+                hashes = [h for _, _, _, h in triples]
+
+                graph_store.clear()
+                if nodes:
+                    graph_store.add_nodes(nodes)
+                if edges:
+                    mapped = graph_store.add_edges(edges, relation_hashes=hashes)
+                else:
+                    mapped = 0
+                graph_step.update(
+                    {
+                        "topology_rebuilt_from_relations": True,
+                        "rebuilt_nodes": len(nodes),
+                        "rebuilt_edges": int(graph_store.num_edges),
+                    }
+                )
+
+            graph_store.save()
+            graph_step.update({"rebuilt": True, "mapped_hashes": int(mapped)})
+    else:
+        graph_step["reason"] = "no_relations"
+    result["steps"]["graph"] = graph_step
+
+    return result
+
+
+def _verify_impl(config_path: Path, data_dir: Path) -> Dict[str, Any]:
+    checks: List[CheckItem] = []
+    facts: Dict[str, Any] = {
+        "config_path": str(config_path),
+        "data_dir": str(data_dir),
+    }
+
+    if not config_path.exists():
+        checks.append(CheckItem("CFG-00", "error", f"config not found: {config_path}"))
+        return {"ok": False, "checks": [c.to_dict() for c in checks], "facts": facts}
+
+    config_doc = _read_toml(config_path)
+    mode = str(_get_nested(config_doc, ("routing", "tool_search_mode"), "forward") or "").strip().lower()
+    if mode not in {"forward", "disabled"}:
+        checks.append(CheckItem("CP-04", "error", f"invalid routing.tool_search_mode: {mode}"))
+
+    summary_model = _get_nested(config_doc, ("summarization", "model_name"), ["auto"])
+    if not isinstance(summary_model, list) or any(not isinstance(x, str) for x in summary_model):
+        checks.append(CheckItem("CP-11", "error", "summarization.model_name must be List[str]"))
+    summary_knowledge_type = str(
+        _get_nested(config_doc, ("summarization", "default_knowledge_type"), "narrative") or "narrative"
+    ).strip().lower()
+    if summary_knowledge_type not in {item.value for item in KnowledgeType}:
+        checks.append(
+            CheckItem("CP-13", "error", f"invalid summarization.default_knowledge_type: {summary_knowledge_type}")
+        )
+
+    quantization = str(_get_nested(config_doc, ("embedding", "quantization_type"), "int8") or "").strip().lower()
+    if quantization != "int8":
+        checks.append(CheckItem("UG-07", "error", "embedding.quantization_type must be int8"))
+
+    vectors_dir = data_dir / "vectors"
+    npy_path = vectors_dir / "vectors.npy"
+    bin_path = vectors_dir / "vectors.bin"
+    ids_bin_path = vectors_dir / "vectors_ids.bin"
+    if npy_path.exists() and not (bin_path.exists() and ids_bin_path.exists()):
+        checks.append(CheckItem("CP-07", "error", "legacy vectors.npy still exists without bin migration"))
+
+    metadata_dir = data_dir / "metadata"
+    store = MetadataStore(data_dir=metadata_dir)
+    try:
+        store.connect(enforce_schema=True)
+        schema_version = store.get_schema_version()
+        facts["schema_version"] = schema_version
+        if schema_version != SCHEMA_VERSION:
+            checks.append(CheckItem("CP-08", "error", f"schema version mismatch: {schema_version}"))
+
+        relation_count = int(store.count_relations())
+        facts["relations_count"] = relation_count
+
+        conflicts = {}
+        invalid_knowledge_types: List[str] = []
+        db_path = metadata_dir / "metadata.db"
+        if db_path.exists():
+            conn = sqlite3.connect(str(db_path))
+            try:
+                conflicts = _collect_hash_alias_conflicts(conn)
+                invalid_knowledge_types = _collect_invalid_knowledge_types(conn)
+            finally:
+                conn.close()
+        if conflicts:
+            checks.append(
+                CheckItem(
+                    "CP-05",
+                    "error",
+                    "alias conflicts still exist after migration",
+                    {"aliases": sorted(conflicts.keys())[:20], "total": len(conflicts)},
+                )
+            )
+        if invalid_knowledge_types:
+            checks.append(
+                CheckItem(
+                    "CP-12",
+                    "error",
+                    "invalid paragraph knowledge_type values remain after migration",
+                    {"values": invalid_knowledge_types[:20], "total": len(invalid_knowledge_types)},
+                )
+            )
+
+        if relation_count > 0:
+            graph_dir = data_dir / "graph"
+            if not (graph_dir / "graph_metadata.pkl").exists():
+                checks.append(CheckItem("CP-06", "error", "graph metadata missing while relations exist"))
+            else:
+                matrix_format = str(_get_nested(config_doc, ("graph", "sparse_matrix_format"), "csr") or "csr")
+                graph_store = GraphStore(matrix_format=matrix_format, data_dir=graph_dir)
+                graph_store.load()
+                if not graph_store.has_edge_hash_map():
+                    checks.append(CheckItem("CP-06", "error", "edge_hash_map is empty"))
+    except Exception as e:
+        checks.append(CheckItem("CP-08", "error", f"metadata strict connect failed: {e}"))
+    finally:
+        try:
+            store.close()
+        except Exception:
+            pass
+
+    has_error = any(c.level == "error" for c in checks)
+    return {
+        "ok": not has_error,
+        "checks": [c.to_dict() for c in checks],
+        "facts": facts,
+    }
+
+
+def _print_report(title: str, report: Dict[str, Any]) -> None:
+    print(f"=== {title} ===")
+    print(f"ok: {bool(report.get('ok', True))}")
+    facts = report.get("facts", {})
+    if facts:
+        print("facts:")
+        for k in sorted(facts.keys()):
+            print(f"  - {k}: {facts[k]}")
+    checks = report.get("checks", [])
+    if checks:
+        print("checks:")
+        for item in checks:
+            print(f"  - [{item.get('level')}] {item.get('code')}: {item.get('message')}")
+    else:
+        print("checks: none")
+
+
+def _write_json_if_needed(path: str, payload: Dict[str, Any]) -> None:
+    if not path:
+        return
+    out = Path(path).expanduser().resolve()
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"json_out: {out}")
+
+
+def main() -> int:
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+    config_path = Path(args.config).expanduser().resolve()
+    if not config_path.exists():
+        print(f"❌ config not found: {config_path}")
+        return 2
+    config_doc = _read_toml(config_path)
+    data_dir = _resolve_data_dir(config_doc, args.data_dir)
+
+    if args.command == "preflight":
+        report = _preflight_impl(config_path, data_dir)
+        _print_report("vNext Preflight", report)
+        _write_json_if_needed(args.json_out, report)
+        has_error = any(item.get("level") == "error" for item in report.get("checks", []))
+        if args.strict and has_error:
+            return 1
+        return 0
+
+    if args.command == "migrate":
+        payload = _migrate_impl(config_path, data_dir, dry_run=bool(args.dry_run))
+        print("=== vNext Migrate ===")
+        print(json.dumps(payload, ensure_ascii=False, indent=2))
+
+        verify_report = None
+        if args.verify_after and not args.dry_run:
+            verify_report = _verify_impl(config_path, data_dir)
+            _print_report("vNext Verify (after migrate)", verify_report)
+            payload["verify_after"] = verify_report
+
+        _write_json_if_needed(args.json_out, payload)
+        if verify_report is not None:
+            has_error = any(item.get("level") == "error" for item in verify_report.get("checks", []))
+            if has_error:
+                return 1
+        return 0
+
+    if args.command == "verify":
+        report = _verify_impl(config_path, data_dir)
+        _print_report("vNext Verify", report)
+        _write_json_if_needed(args.json_out, report)
+        has_error = any(item.get("level") == "error" for item in report.get("checks", []))
+        if args.strict and has_error:
+            return 1
+        return 0
+
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/plugins/A_memorix/scripts/runtime_self_check.py
+++ b/plugins/A_memorix/scripts/runtime_self_check.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+"""Run A_Memorix runtime self-check against real embedding/runtime configuration."""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import sys
+import tempfile
+from pathlib import Path
+from typing import Any
+
+import tomlkit
+
+
+CURRENT_DIR = Path(__file__).resolve().parent
+PLUGIN_ROOT = CURRENT_DIR.parent
+PROJECT_ROOT = PLUGIN_ROOT.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PLUGIN_ROOT))
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="A_Memorix runtime self-check")
+    parser.add_argument(
+        "--config",
+        default=str(PLUGIN_ROOT / "config.toml"),
+        help="config.toml path (default: plugins/A_memorix/config.toml)",
+    )
+    parser.add_argument(
+        "--data-dir",
+        default="",
+        help="optional data dir override; default resolved from config.storage.data_dir",
+    )
+    parser.add_argument(
+        "--use-config-data-dir",
+        action="store_true",
+        help="use config.storage.data_dir directly instead of an isolated temp dir",
+    )
+    parser.add_argument(
+        "--sample-text",
+        default="A_Memorix runtime self check",
+        help="sample text used for real embedding probe",
+    )
+    parser.add_argument("--json", action="store_true", help="print JSON report")
+    return parser
+
+
+if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
+    _build_arg_parser().print_help()
+    raise SystemExit(0)
+
+from core.runtime.lifecycle_orchestrator import initialize_storage_async
+from core.utils.runtime_self_check import run_embedding_runtime_self_check
+
+
+def _load_config(path: Path) -> dict[str, Any]:
+    with open(path, "r", encoding="utf-8") as f:
+        raw = tomlkit.load(f)
+    return dict(raw) if isinstance(raw, dict) else {}
+
+
+def _nested_get(config: dict[str, Any], key: str, default: Any = None) -> Any:
+    current: Any = config
+    for part in key.split("."):
+        if isinstance(current, dict) and part in current:
+            current = current[part]
+        else:
+            return default
+    return current
+
+
+class _PluginStub:
+    def __init__(self, config: dict[str, Any]):
+        self.config = config
+        self.vector_store = None
+        self.graph_store = None
+        self.metadata_store = None
+        self.embedding_manager = None
+        self.sparse_index = None
+        self.relation_write_service = None
+
+    def get_config(self, key: str, default: Any = None) -> Any:
+        return _nested_get(self.config, key, default)
+
+
+async def _main_async(args: argparse.Namespace) -> int:
+    config_path = Path(args.config).resolve()
+    if not config_path.exists():
+        print(f"❌ 配置文件不存在: {config_path}")
+        return 2
+
+    config = _load_config(config_path)
+    temp_dir_ctx = None
+    if args.data_dir:
+        storage_dir = str(Path(args.data_dir).resolve())
+    elif args.use_config_data_dir:
+        raw_data_dir = str(_nested_get(config, "storage.data_dir", "./data") or "./data").strip()
+        if raw_data_dir.startswith("."):
+            storage_dir = str((config_path.parent / raw_data_dir).resolve())
+        else:
+            storage_dir = str(Path(raw_data_dir).resolve())
+    else:
+        temp_dir_ctx = tempfile.TemporaryDirectory(prefix="memorix-runtime-self-check-")
+        storage_dir = temp_dir_ctx.name
+
+    storage_cfg = config.setdefault("storage", {})
+    storage_cfg["data_dir"] = storage_dir
+
+    plugin = _PluginStub(config)
+    try:
+        await initialize_storage_async(plugin)
+        report = await run_embedding_runtime_self_check(
+            config=config,
+            vector_store=plugin.vector_store,
+            embedding_manager=plugin.embedding_manager,
+            sample_text=str(args.sample_text or "A_Memorix runtime self check"),
+        )
+        report["data_dir"] = storage_dir
+        report["isolated_data_dir"] = temp_dir_ctx is not None
+        if args.json:
+            print(json.dumps(report, ensure_ascii=False, indent=2))
+        else:
+            print("A_Memorix Runtime Self-Check")
+            print(f"ok: {report.get('ok')}")
+            print(f"code: {report.get('code')}")
+            print(f"message: {report.get('message')}")
+            print(f"configured_dimension: {report.get('configured_dimension')}")
+            print(f"vector_store_dimension: {report.get('vector_store_dimension')}")
+            print(f"detected_dimension: {report.get('detected_dimension')}")
+            print(f"encoded_dimension: {report.get('encoded_dimension')}")
+            print(f"elapsed_ms: {float(report.get('elapsed_ms', 0.0)):.2f}")
+        return 0 if bool(report.get("ok")) else 1
+    finally:
+        if plugin.metadata_store is not None:
+            try:
+                plugin.metadata_store.close()
+            except Exception:
+                pass
+        if temp_dir_ctx is not None:
+            temp_dir_ctx.cleanup()
+
+
+def main() -> int:
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+    return asyncio.run(_main_async(args))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())