为 LPMM 新增安全可控的删除能力: KGManager.delete_paragraphs 支持按段落/实体哈希删除图节点及关联边,可选清理孤立实体,并从图中重建元数据 统一删除脚本 scripts/delete_lpmm_items.py,支持按批次(OpenIE 文件)、哈希文件、原始文本段落、关键字搜索进行删除,内置 dry-run 和最大节点数保护 新增自检与回归脚本: scripts/inspect_lpmm_batch.py / scripts/inspect_lpmm_global.py 用于批次级和全局状态检查 scripts/test_lpmm_retrieval.py 一键初始化 LPMM 并用固定问题测试检索效果。 健壮性与性能保护: 在 KGManager.kg_search 中对 ent_appear_cnt 缺失增加兜底,避免实体权重计算时的 KeyError。 增加同义实体数量限制与 PPR 节点/关系阈值,必要时自动退回纯向量检索 文档补充: docs-src/lpmm_user_guide.md:面向零基础用户的导入 / 删除 / 自检脚本使用指南 docs-src/lpmm_parameters_guide.md:[lpmm_knowledge] 关键参数说明与简单调参建议
94 lines
2.9 KiB
Python
94 lines
2.9 KiB
Python
import asyncio
|
||
import os
|
||
import sys
|
||
from typing import List, Dict, Any
|
||
|
||
# 强制使用 utf-8,避免控制台编码报错影响 Embedding 加载
|
||
try:
|
||
if hasattr(sys.stdout, "reconfigure"):
|
||
sys.stdout.reconfigure(encoding="utf-8")
|
||
if hasattr(sys.stderr, "reconfigure"):
|
||
sys.stderr.reconfigure(encoding="utf-8")
|
||
except Exception:
|
||
pass
|
||
|
||
# 确保能导入 src.*
|
||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||
|
||
from src.common.logger import get_logger
|
||
from src.config.config import global_config
|
||
from src.chat.knowledge import lpmm_start_up
|
||
from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
|
||
|
||
logger = get_logger("test_lpmm_retrieval")
|
||
|
||
|
||
TEST_CASES: List[Dict[str, Any]] = [
|
||
{
|
||
"name": "回滚一批知识",
|
||
"query": "LPMM是什么?",
|
||
"expect_keywords": ["哈希列表", "删除脚本", "OpenIE"],
|
||
},
|
||
{
|
||
"name": "调整 LPMM 检索参数",
|
||
"query": "不同用词习惯带来的检索偏差该如何解决",
|
||
"expect_keywords": ["bot_config.toml", "lpmm_knowledge", "qa_paragraph_search_top_k"],
|
||
},
|
||
]
|
||
|
||
|
||
async def run_tests() -> None:
|
||
"""简单测试 LPMM 知识库检索能力"""
|
||
if not global_config.lpmm_knowledge.enable:
|
||
logger.warning("当前配置中 lpmm_knowledge.enable 为 False,检索测试可能直接返回“未启用”。")
|
||
|
||
logger.info("开始初始化 LPMM 知识库...")
|
||
lpmm_start_up()
|
||
logger.info("LPMM 知识库初始化完成,开始执行测试用例。")
|
||
|
||
for case in TEST_CASES:
|
||
name = case["name"]
|
||
query = case["query"]
|
||
expect_keywords: List[str] = case.get("expect_keywords", [])
|
||
|
||
print("\n" + "=" * 60)
|
||
print(f"[TEST] {name}")
|
||
print(f"[Q] {query}")
|
||
|
||
result = await query_lpmm_knowledge(query, limit=3)
|
||
|
||
print("\n[RAW RESULT]")
|
||
print(result)
|
||
|
||
status = "UNKNOWN"
|
||
hit_keywords: List[str] = []
|
||
|
||
if isinstance(result, str):
|
||
if "未启用" in result or "未初始化" in result or "查询失败" in result:
|
||
status = "ERROR"
|
||
elif "未找到与" in result:
|
||
status = "NO_HIT"
|
||
else:
|
||
if expect_keywords:
|
||
hit_keywords = [kw for kw in expect_keywords if kw in result]
|
||
status = "PASS" if hit_keywords else "WARN"
|
||
else:
|
||
status = "PASS"
|
||
|
||
print("\n[CHECK]")
|
||
print(f"Status: {status}")
|
||
if expect_keywords:
|
||
print(f"Expected keywords: {expect_keywords}")
|
||
print(f"Hit keywords: {hit_keywords}")
|
||
|
||
print("\n" + "=" * 60)
|
||
print("LPMM 检索测试完成。请根据每条用例的 Status 和命中关键词判断检索效果是否符合预期。")
|
||
|
||
|
||
def main() -> None:
|
||
asyncio.run(run_tests())
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|