添加 A_Memorix 插件 v2.0.0(包含运行时与文档)

引入 A_Memorix 插件 v2.0.0:新增大量运行时组件、存储/模式更新、检索能力提升、管理工具、导入/调优工作流以及相关文档。关键新增内容包括:lifecycle_orchestrator、SDKMemoryKernel/运行时初始化器、新的存储层与 metadata_store 变更(SCHEMA_VERSION v8)、检索增强(双路径检索、图关系召回、稀疏 BM25),以及多种工具服务(episode/person_profile/relation/segmentation/tuning/search execution)。同时新增 Web 导入/摘要导入器及大量维护脚本。还更新了插件清单、embedding API 适配器、plugin.py、requirements/pyproject,以及主入口文件,使新插件接入项目。该变更为 2.0.0 版本发布做好准备,实现统一的 SDK Tool 接口并扩展整体运行能力。
This commit is contained in:
DawnARC
2026-03-19 00:09:04 +08:00
parent eb257345dd
commit 71b3a828c6
44 changed files with 18193 additions and 405 deletions

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env python3
"""
A_Memorix 一致性审计脚本。
输出内容:
1. paragraph/entity/relation 向量覆盖率
2. relation vector_state 分布
3. 孤儿向量数量(向量存在但 metadata 不存在)
4. 状态与向量文件不一致统计
"""
from __future__ import annotations
import argparse
import json
import pickle
import sys
from pathlib import Path
from typing import Any, Dict, Set
CURRENT_DIR = Path(__file__).resolve().parent
PLUGIN_ROOT = CURRENT_DIR.parent
PROJECT_ROOT = PLUGIN_ROOT.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
sys.path.insert(0, str(PLUGIN_ROOT))
def _build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="审计 A_Memorix 向量一致性")
parser.add_argument(
"--data-dir",
default=str(PLUGIN_ROOT / "data"),
help="A_Memorix 数据目录(默认: plugins/A_memorix/data",
)
parser.add_argument("--json-out", default="", help="可选:输出 JSON 文件路径")
parser.add_argument(
"--strict",
action="store_true",
help="若发现一致性异常则返回非 0 退出码",
)
return parser
# --help/-h fast path: avoid heavy host/plugin bootstrap
if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
_build_arg_parser().print_help()
sys.exit(0)
try:
from core.storage.vector_store import VectorStore
from core.storage.metadata_store import MetadataStore
from core.storage import QuantizationType
except Exception as e: # pragma: no cover
print(f"❌ 导入核心模块失败: {e}")
sys.exit(1)
def _safe_ratio(numerator: int, denominator: int) -> float:
if denominator <= 0:
return 0.0
return float(numerator) / float(denominator)
def _load_vector_store(data_dir: Path) -> VectorStore:
meta_path = data_dir / "vectors" / "vectors_metadata.pkl"
if not meta_path.exists():
raise FileNotFoundError(f"未找到向量元数据文件: {meta_path}")
with open(meta_path, "rb") as f:
meta = pickle.load(f)
dimension = int(meta.get("dimension", 1024))
store = VectorStore(
dimension=max(1, dimension),
quantization_type=QuantizationType.INT8,
data_dir=data_dir / "vectors",
)
if store.has_data():
store.load()
return store
def _load_metadata_store(data_dir: Path) -> MetadataStore:
store = MetadataStore(data_dir=data_dir / "metadata")
store.connect()
return store
def _hash_set(metadata_store: MetadataStore, table: str) -> Set[str]:
return {str(h) for h in metadata_store.list_hashes(table)}
def _relation_state_stats(metadata_store: MetadataStore) -> Dict[str, int]:
return metadata_store.count_relations_by_vector_state()
def run_audit(data_dir: Path) -> Dict[str, Any]:
vector_store = _load_vector_store(data_dir)
metadata_store = _load_metadata_store(data_dir)
try:
paragraph_hashes = _hash_set(metadata_store, "paragraphs")
entity_hashes = _hash_set(metadata_store, "entities")
relation_hashes = _hash_set(metadata_store, "relations")
known_hashes = set(getattr(vector_store, "_known_hashes", set()))
live_vector_hashes = {h for h in known_hashes if h in vector_store}
para_vector_hits = len(paragraph_hashes & live_vector_hashes)
ent_vector_hits = len(entity_hashes & live_vector_hashes)
rel_vector_hits = len(relation_hashes & live_vector_hashes)
orphan_vector_hashes = sorted(
live_vector_hashes - paragraph_hashes - entity_hashes - relation_hashes
)
relation_rows = metadata_store.get_relations()
ready_but_missing = 0
not_ready_but_present = 0
for row in relation_rows:
h = str(row.get("hash") or "")
state = str(row.get("vector_state") or "none").lower()
in_vector = h in live_vector_hashes
if state == "ready" and not in_vector:
ready_but_missing += 1
if state != "ready" and in_vector:
not_ready_but_present += 1
relation_states = _relation_state_stats(metadata_store)
rel_total = max(0, int(relation_states.get("total", len(relation_hashes))))
ready_count = max(0, int(relation_states.get("ready", 0)))
result = {
"counts": {
"paragraphs": len(paragraph_hashes),
"entities": len(entity_hashes),
"relations": len(relation_hashes),
"vectors_live": len(live_vector_hashes),
},
"coverage": {
"paragraph_vector_coverage": _safe_ratio(para_vector_hits, len(paragraph_hashes)),
"entity_vector_coverage": _safe_ratio(ent_vector_hits, len(entity_hashes)),
"relation_vector_coverage": _safe_ratio(rel_vector_hits, len(relation_hashes)),
"relation_ready_coverage": _safe_ratio(ready_count, rel_total),
},
"relation_states": relation_states,
"orphans": {
"vector_only_count": len(orphan_vector_hashes),
"vector_only_sample": orphan_vector_hashes[:30],
},
"consistency_checks": {
"ready_but_missing_vector": ready_but_missing,
"not_ready_but_vector_present": not_ready_but_present,
},
}
return result
finally:
metadata_store.close()
def main() -> int:
parser = _build_arg_parser()
args = parser.parse_args()
data_dir = Path(args.data_dir).resolve()
if not data_dir.exists():
print(f"❌ 数据目录不存在: {data_dir}")
return 2
try:
result = run_audit(data_dir)
except Exception as e:
print(f"❌ 审计失败: {e}")
return 2
print("=== A_Memorix Vector Consistency Audit ===")
print(f"data_dir: {data_dir}")
print(f"paragraphs: {result['counts']['paragraphs']}")
print(f"entities: {result['counts']['entities']}")
print(f"relations: {result['counts']['relations']}")
print(f"vectors_live: {result['counts']['vectors_live']}")
print(
"coverage: "
f"paragraph={result['coverage']['paragraph_vector_coverage']:.3f}, "
f"entity={result['coverage']['entity_vector_coverage']:.3f}, "
f"relation={result['coverage']['relation_vector_coverage']:.3f}, "
f"relation_ready={result['coverage']['relation_ready_coverage']:.3f}"
)
print(f"relation_states: {result['relation_states']}")
print(
"consistency_checks: "
f"ready_but_missing_vector={result['consistency_checks']['ready_but_missing_vector']}, "
f"not_ready_but_vector_present={result['consistency_checks']['not_ready_but_vector_present']}"
)
print(f"orphan_vectors: {result['orphans']['vector_only_count']}")
if args.json_out:
out_path = Path(args.json_out).resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"json_out: {out_path}")
has_anomaly = (
result["orphans"]["vector_only_count"] > 0
or result["consistency_checks"]["ready_but_missing_vector"] > 0
)
if args.strict and has_anomaly:
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())