Files
mai-bot/scripts/test_lpmm_retrieval.py
陈曦 1383caf249 LPMM 知识库删除能力与自检脚本增强(附关键健壮性修复)
为 LPMM 新增安全可控的删除能力:
KGManager.delete_paragraphs 支持按段落/实体哈希删除图节点及关联边,可选清理孤立实体,并从图中重建元数据
统一删除脚本 scripts/delete_lpmm_items.py,支持按批次(OpenIE 文件)、哈希文件、原始文本段落、关键字搜索进行删除,内置 dry-run 和最大节点数保护
新增自检与回归脚本:
scripts/inspect_lpmm_batch.py / scripts/inspect_lpmm_global.py 用于批次级和全局状态检查
scripts/test_lpmm_retrieval.py 一键初始化 LPMM 并用固定问题测试检索效果。
健壮性与性能保护:
在 KGManager.kg_search 中对 ent_appear_cnt 缺失增加兜底,避免实体权重计算时的 KeyError。
增加同义实体数量限制与 PPR 节点/关系阈值,必要时自动退回纯向量检索
文档补充:
docs-src/lpmm_user_guide.md:面向零基础用户的导入 / 删除 / 自检脚本使用指南
docs-src/lpmm_parameters_guide.md:[lpmm_knowledge] 关键参数说明与简单调参建议
2025-11-27 13:20:12 +08:00

94 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import os
import sys
from typing import List, Dict, Any
# 强制使用 utf-8避免控制台编码报错影响 Embedding 加载
try:
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
if hasattr(sys.stderr, "reconfigure"):
sys.stderr.reconfigure(encoding="utf-8")
except Exception:
pass
# 确保能导入 src.*
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from src.common.logger import get_logger
from src.config.config import global_config
from src.chat.knowledge import lpmm_start_up
from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
logger = get_logger("test_lpmm_retrieval")
TEST_CASES: List[Dict[str, Any]] = [
{
"name": "回滚一批知识",
"query": "LPMM是什么?",
"expect_keywords": ["哈希列表", "删除脚本", "OpenIE"],
},
{
"name": "调整 LPMM 检索参数",
"query": "不同用词习惯带来的检索偏差该如何解决",
"expect_keywords": ["bot_config.toml", "lpmm_knowledge", "qa_paragraph_search_top_k"],
},
]
async def run_tests() -> None:
"""简单测试 LPMM 知识库检索能力"""
if not global_config.lpmm_knowledge.enable:
logger.warning("当前配置中 lpmm_knowledge.enable 为 False检索测试可能直接返回“未启用”。")
logger.info("开始初始化 LPMM 知识库...")
lpmm_start_up()
logger.info("LPMM 知识库初始化完成,开始执行测试用例。")
for case in TEST_CASES:
name = case["name"]
query = case["query"]
expect_keywords: List[str] = case.get("expect_keywords", [])
print("\n" + "=" * 60)
print(f"[TEST] {name}")
print(f"[Q] {query}")
result = await query_lpmm_knowledge(query, limit=3)
print("\n[RAW RESULT]")
print(result)
status = "UNKNOWN"
hit_keywords: List[str] = []
if isinstance(result, str):
if "未启用" in result or "未初始化" in result or "查询失败" in result:
status = "ERROR"
elif "未找到与" in result:
status = "NO_HIT"
else:
if expect_keywords:
hit_keywords = [kw for kw in expect_keywords if kw in result]
status = "PASS" if hit_keywords else "WARN"
else:
status = "PASS"
print("\n[CHECK]")
print(f"Status: {status}")
if expect_keywords:
print(f"Expected keywords: {expect_keywords}")
print(f"Hit keywords: {hit_keywords}")
print("\n" + "=" * 60)
print("LPMM 检索测试完成。请根据每条用例的 Status 和命中关键词判断检索效果是否符合预期。")
def main() -> None:
asyncio.run(run_tests())
if __name__ == "__main__":
main()