添加 A_Memorix 插件 v2.0.0(包含运行时与文档)
引入 A_Memorix 插件 v2.0.0:新增大量运行时组件、存储/模式更新、检索能力提升、管理工具、导入/调优工作流以及相关文档。关键新增内容包括:lifecycle_orchestrator、SDKMemoryKernel/运行时初始化器、新的存储层与 metadata_store 变更(SCHEMA_VERSION v8)、检索增强(双路径检索、图关系召回、稀疏 BM25),以及多种工具服务(episode/person_profile/relation/segmentation/tuning/search execution)。同时新增 Web 导入/摘要导入器及大量维护脚本。还更新了插件清单、embedding API 适配器、plugin.py、requirements/pyproject,以及主入口文件,使新插件接入项目。该变更为 2.0.0 版本发布做好准备,实现统一的 SDK Tool 接口并扩展整体运行能力。
This commit is contained in:
172
plugins/A_memorix/scripts/import_lpmm_json.py
Normal file
172
plugins/A_memorix/scripts/import_lpmm_json.py
Normal file
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
LPMM OpenIE JSON 导入工具。
|
||||
|
||||
功能:
|
||||
1. 读取符合 LPMM 规范的 OpenIE JSON 文件
|
||||
2. 转换为 A_Memorix 的统一导入格式
|
||||
3. 复用 `process_knowledge.py` 中的 `AutoImporter` 直接入库
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from rich.console import Console
|
||||
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
||||
|
||||
console = Console()
|
||||
|
||||
CURRENT_DIR = Path(__file__).resolve().parent
|
||||
PLUGIN_ROOT = CURRENT_DIR.parent
|
||||
WORKSPACE_ROOT = PLUGIN_ROOT.parent
|
||||
MAIBOT_ROOT = WORKSPACE_ROOT / "MaiBot"
|
||||
for path in (CURRENT_DIR, WORKSPACE_ROOT, MAIBOT_ROOT, PLUGIN_ROOT):
|
||||
path_str = str(path)
|
||||
if path_str not in sys.path:
|
||||
sys.path.insert(0, path_str)
|
||||
|
||||
|
||||
def _build_arg_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="将 LPMM OpenIE JSON 导入 A_Memorix")
|
||||
parser.add_argument("path", help="LPMM JSON 文件路径或目录")
|
||||
parser.add_argument("--force", action="store_true", help="强制重新导入")
|
||||
parser.add_argument("--concurrency", "-c", type=int, default=5, help="并发数")
|
||||
return parser
|
||||
|
||||
|
||||
if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
|
||||
_build_arg_parser().print_help()
|
||||
raise SystemExit(0)
|
||||
|
||||
|
||||
try:
|
||||
from process_knowledge import AutoImporter
|
||||
from A_memorix.core.utils.hash import compute_paragraph_hash
|
||||
from src.common.logger import get_logger
|
||||
except ImportError as exc: # pragma: no cover - script bootstrap
|
||||
print(f"导入模块失败,请确认 PYTHONPATH 与工作区结构: {exc}")
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
logger = get_logger("A_Memorix.LPMMImport")
|
||||
|
||||
|
||||
class LPMMConverter:
|
||||
def convert_lpmm_to_memorix(self, lpmm_data: Dict[str, Any], filename: str) -> Dict[str, Any]:
|
||||
memorix_data = {"paragraphs": [], "entities": []}
|
||||
docs = lpmm_data.get("docs", []) or []
|
||||
if not docs:
|
||||
logger.warning(f"文件中未找到 docs 字段: {filename}")
|
||||
return memorix_data
|
||||
|
||||
all_entities = set()
|
||||
for doc in docs:
|
||||
content = str(doc.get("passage", "") or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
|
||||
relations: List[Dict[str, str]] = []
|
||||
for triple in doc.get("extracted_triples", []) or []:
|
||||
if isinstance(triple, list) and len(triple) == 3:
|
||||
relations.append(
|
||||
{
|
||||
"subject": str(triple[0] or "").strip(),
|
||||
"predicate": str(triple[1] or "").strip(),
|
||||
"object": str(triple[2] or "").strip(),
|
||||
}
|
||||
)
|
||||
|
||||
entities = [str(item or "").strip() for item in doc.get("extracted_entities", []) or [] if str(item or "").strip()]
|
||||
all_entities.update(entities)
|
||||
for relation in relations:
|
||||
if relation["subject"]:
|
||||
all_entities.add(relation["subject"])
|
||||
if relation["object"]:
|
||||
all_entities.add(relation["object"])
|
||||
|
||||
memorix_data["paragraphs"].append(
|
||||
{
|
||||
"hash": compute_paragraph_hash(content),
|
||||
"content": content,
|
||||
"source": filename,
|
||||
"entities": entities,
|
||||
"relations": relations,
|
||||
}
|
||||
)
|
||||
|
||||
memorix_data["entities"] = sorted(all_entities)
|
||||
return memorix_data
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
parser = _build_arg_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
target_path = Path(args.path)
|
||||
if not target_path.exists():
|
||||
logger.error(f"路径不存在: {target_path}")
|
||||
return
|
||||
|
||||
if target_path.is_dir():
|
||||
files_to_process = list(target_path.glob("*-openie.json")) or list(target_path.glob("*.json"))
|
||||
else:
|
||||
files_to_process = [target_path]
|
||||
|
||||
if not files_to_process:
|
||||
logger.error("未找到可处理的 JSON 文件")
|
||||
return
|
||||
|
||||
importer = AutoImporter(force=bool(args.force), concurrency=int(args.concurrency))
|
||||
if not await importer.initialize():
|
||||
logger.error("初始化存储失败")
|
||||
return
|
||||
|
||||
converter = LPMMConverter()
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
||||
TimeElapsedColumn(),
|
||||
console=console,
|
||||
transient=False,
|
||||
) as progress:
|
||||
for json_file in files_to_process:
|
||||
logger.info(f"正在转换并导入: {json_file.name}")
|
||||
try:
|
||||
with open(json_file, "r", encoding="utf-8") as handle:
|
||||
lpmm_data = json.load(handle)
|
||||
memorix_data = converter.convert_lpmm_to_memorix(lpmm_data, json_file.name)
|
||||
total_items = len(memorix_data.get("paragraphs", []))
|
||||
if total_items <= 0:
|
||||
logger.warning(f"转换结果为空: {json_file.name}")
|
||||
continue
|
||||
|
||||
task_id = progress.add_task(f"Importing {json_file.name}", total=total_items)
|
||||
|
||||
def update_progress(step: int = 1) -> None:
|
||||
progress.advance(task_id, advance=step)
|
||||
|
||||
await importer.import_json_data(
|
||||
memorix_data,
|
||||
filename=f"lpmm_{json_file.name}",
|
||||
progress_callback=update_progress,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"处理文件 {json_file.name} 失败: {exc}\n{traceback.format_exc()}")
|
||||
|
||||
await importer.close()
|
||||
logger.info("全部处理完成")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.platform == "win32": # pragma: no cover
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user