引入 A_Memorix 插件(v2.0.0)——一个轻量级的长期记忆提供器。新增插件清单(manifest)和入口(AMemorixPlugin),并提供完整的核心能力:嵌入(基于哈希的 EmbeddingAPIAdapter、EmbeddingManager、预设)、检索(双路径检索器、PageRank、图关系召回、BM25 稀疏索引、阈值与融合配置)、存储与元数据层,以及大量实用工具和迁移/转换脚本。同时更新 .gitignore 以允许 /plugins/A_memorix。该变更为在宿主应用中实现统一的记忆摄取、检索、分析与维护奠定了基础。
111 lines
3.6 KiB
Python
111 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import asyncio
|
||
import json
|
||
import sqlite3
|
||
import sys
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Dict
|
||
|
||
|
||
CURRENT_DIR = Path(__file__).resolve().parent
|
||
PLUGIN_ROOT = CURRENT_DIR.parent
|
||
WORKSPACE_ROOT = PLUGIN_ROOT.parent
|
||
MAIBOT_ROOT = WORKSPACE_ROOT / "MaiBot"
|
||
DEFAULT_DB_PATH = MAIBOT_ROOT / "data" / "MaiBot.db"
|
||
|
||
if str(WORKSPACE_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(WORKSPACE_ROOT))
|
||
if str(MAIBOT_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(MAIBOT_ROOT))
|
||
|
||
from A_memorix.core.runtime.sdk_memory_kernel import SDKMemoryKernel # noqa: E402
|
||
|
||
|
||
def _parse_args() -> argparse.Namespace:
|
||
parser = argparse.ArgumentParser(description="迁移 MaiBot chat_history 到 A_Memorix")
|
||
parser.add_argument("--db-path", default=str(DEFAULT_DB_PATH), help="MaiBot SQLite 路径")
|
||
parser.add_argument("--data-dir", default="./data", help="A_Memorix 数据目录")
|
||
parser.add_argument("--limit", type=int, default=0, help="限制迁移条数,0 表示全部")
|
||
parser.add_argument("--dry-run", action="store_true", help="仅预览,不写入")
|
||
return parser.parse_args()
|
||
|
||
|
||
def _to_timestamp(value: Any) -> float | None:
|
||
if value is None:
|
||
return None
|
||
if isinstance(value, (int, float)):
|
||
return float(value)
|
||
text = str(value).strip()
|
||
if not text:
|
||
return None
|
||
try:
|
||
return datetime.fromisoformat(text).timestamp()
|
||
except ValueError:
|
||
return None
|
||
|
||
|
||
async def _main() -> int:
|
||
args = _parse_args()
|
||
db_path = Path(args.db_path).resolve()
|
||
if not db_path.exists():
|
||
print(f"数据库不存在: {db_path}")
|
||
return 1
|
||
|
||
conn = sqlite3.connect(str(db_path))
|
||
conn.row_factory = sqlite3.Row
|
||
sql = """
|
||
SELECT id, session_id, start_timestamp, end_timestamp, participants, theme, keywords, summary
|
||
FROM chat_history
|
||
ORDER BY id ASC
|
||
"""
|
||
if int(args.limit or 0) > 0:
|
||
sql += " LIMIT ?"
|
||
rows = conn.execute(sql, (int(args.limit),)).fetchall()
|
||
else:
|
||
rows = conn.execute(sql).fetchall()
|
||
conn.close()
|
||
|
||
print(f"chat_history 待处理: {len(rows)}")
|
||
if args.dry_run:
|
||
for row in rows[:5]:
|
||
print(f"- id={row['id']} session={row['session_id']} theme={row['theme']}")
|
||
return 0
|
||
|
||
kernel = SDKMemoryKernel(plugin_root=PLUGIN_ROOT, config={"storage": {"data_dir": args.data_dir}})
|
||
await kernel.initialize()
|
||
migrated = 0
|
||
skipped = 0
|
||
for row in rows:
|
||
participants = json.loads(row["participants"]) if row["participants"] else []
|
||
keywords = json.loads(row["keywords"]) if row["keywords"] else []
|
||
theme = str(row["theme"] or "").strip()
|
||
summary = str(row["summary"] or "").strip()
|
||
text = f"主题:{theme}\n概括:{summary}".strip()
|
||
result: Dict[str, Any] = await kernel.ingest_summary(
|
||
external_id=f"chat_history:{row['id']}",
|
||
chat_id=str(row["session_id"] or ""),
|
||
text=text,
|
||
participants=participants,
|
||
time_start=_to_timestamp(row["start_timestamp"]),
|
||
time_end=_to_timestamp(row["end_timestamp"]),
|
||
tags=keywords,
|
||
metadata={"theme": theme, "source_row_id": int(row["id"])},
|
||
)
|
||
if result.get("stored_ids"):
|
||
migrated += 1
|
||
else:
|
||
skipped += 1
|
||
|
||
print(f"迁移完成: migrated={migrated} skipped={skipped}")
|
||
print(json.dumps(kernel.memory_stats(), ensure_ascii=False))
|
||
kernel.close()
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(asyncio.run(_main()))
|