Files
mai-bot/plugins/A_memorix/scripts/migrate_chat_history.py
DawnARC 999e7246e2 feat:新增 A_Memorix 记忆插件
引入 A_Memorix 插件(v2.0.0)——一个轻量级的长期记忆提供器。新增插件清单(manifest)和入口(AMemorixPlugin),并提供完整的核心能力:嵌入(基于哈希的 EmbeddingAPIAdapter、EmbeddingManager、预设)、检索(双路径检索器、PageRank、图关系召回、BM25 稀疏索引、阈值与融合配置)、存储与元数据层,以及大量实用工具和迁移/转换脚本。同时更新 .gitignore 以允许 /plugins/A_memorix。该变更为在宿主应用中实现统一的记忆摄取、检索、分析与维护奠定了基础。
2026-03-18 21:33:15 +08:00

111 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import asyncio
import json
import sqlite3
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict
CURRENT_DIR = Path(__file__).resolve().parent
PLUGIN_ROOT = CURRENT_DIR.parent
WORKSPACE_ROOT = PLUGIN_ROOT.parent
MAIBOT_ROOT = WORKSPACE_ROOT / "MaiBot"
DEFAULT_DB_PATH = MAIBOT_ROOT / "data" / "MaiBot.db"
if str(WORKSPACE_ROOT) not in sys.path:
sys.path.insert(0, str(WORKSPACE_ROOT))
if str(MAIBOT_ROOT) not in sys.path:
sys.path.insert(0, str(MAIBOT_ROOT))
from A_memorix.core.runtime.sdk_memory_kernel import SDKMemoryKernel # noqa: E402
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="迁移 MaiBot chat_history 到 A_Memorix")
parser.add_argument("--db-path", default=str(DEFAULT_DB_PATH), help="MaiBot SQLite 路径")
parser.add_argument("--data-dir", default="./data", help="A_Memorix 数据目录")
parser.add_argument("--limit", type=int, default=0, help="限制迁移条数0 表示全部")
parser.add_argument("--dry-run", action="store_true", help="仅预览,不写入")
return parser.parse_args()
def _to_timestamp(value: Any) -> float | None:
if value is None:
return None
if isinstance(value, (int, float)):
return float(value)
text = str(value).strip()
if not text:
return None
try:
return datetime.fromisoformat(text).timestamp()
except ValueError:
return None
async def _main() -> int:
args = _parse_args()
db_path = Path(args.db_path).resolve()
if not db_path.exists():
print(f"数据库不存在: {db_path}")
return 1
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
sql = """
SELECT id, session_id, start_timestamp, end_timestamp, participants, theme, keywords, summary
FROM chat_history
ORDER BY id ASC
"""
if int(args.limit or 0) > 0:
sql += " LIMIT ?"
rows = conn.execute(sql, (int(args.limit),)).fetchall()
else:
rows = conn.execute(sql).fetchall()
conn.close()
print(f"chat_history 待处理: {len(rows)}")
if args.dry_run:
for row in rows[:5]:
print(f"- id={row['id']} session={row['session_id']} theme={row['theme']}")
return 0
kernel = SDKMemoryKernel(plugin_root=PLUGIN_ROOT, config={"storage": {"data_dir": args.data_dir}})
await kernel.initialize()
migrated = 0
skipped = 0
for row in rows:
participants = json.loads(row["participants"]) if row["participants"] else []
keywords = json.loads(row["keywords"]) if row["keywords"] else []
theme = str(row["theme"] or "").strip()
summary = str(row["summary"] or "").strip()
text = f"主题:{theme}\n概括:{summary}".strip()
result: Dict[str, Any] = await kernel.ingest_summary(
external_id=f"chat_history:{row['id']}",
chat_id=str(row["session_id"] or ""),
text=text,
participants=participants,
time_start=_to_timestamp(row["start_timestamp"]),
time_end=_to_timestamp(row["end_timestamp"]),
tags=keywords,
metadata={"theme": theme, "source_row_id": int(row["id"])},
)
if result.get("stored_ids"):
migrated += 1
else:
skipped += 1
print(f"迁移完成: migrated={migrated} skipped={skipped}")
print(json.dumps(kernel.memory_stats(), ensure_ascii=False))
kernel.close()
return 0
if __name__ == "__main__":
raise SystemExit(asyncio.run(_main()))