diff --git a/bot.py b/bot.py
index 97bf15d4..1a13e46e 100644
--- a/bot.py
+++ b/bot.py
@@ -1,13 +1,11 @@
# raise RuntimeError("System Not Ready")
from pathlib import Path
-
from rich.traceback import install
import asyncio
import hashlib
import os
import platform
-
# import shutil
import subprocess
import sys
@@ -16,6 +14,7 @@ import traceback
from src.common.i18n import set_locale, t, tn
from src.common.logger import get_logger, initialize_logging, shutdown_logging
+from src.config.legacy_upgrade_confirmation import require_legacy_upgrade_confirmation
# 设置工作目录为脚本所在目录
script_dir = os.path.dirname(os.path.abspath(__file__))
@@ -91,6 +90,7 @@ def run_runner_process():
# 此时应该作为 Runner 运行。
if os.environ.get("MAIBOT_WORKER_PROCESS") != "1":
if __name__ == "__main__":
+ require_legacy_upgrade_confirmation(Path(script_dir))
run_runner_process()
# 如果作为模块导入,不执行 Runner 逻辑,但也不应该执行下面的 Worker 逻辑
sys.exit(0)
@@ -103,6 +103,8 @@ if os.environ.get("MAIBOT_WORKER_PROCESS") != "1":
# 这是正常的,但为了避免重复的初始化日志,我们在 initialize_logging() 中添加了防重复机制
# 不过由于是不同进程,每个进程仍会初始化一次,这是预期的行为
+require_legacy_upgrade_confirmation(Path(script_dir))
+
from src.main import MainSystem # noqa
from src.manager.async_task_manager import async_task_manager # noqa
diff --git a/changelogs/changelog.md b/changelogs/changelog.md
index 1321694f..b8d00f43 100644
--- a/changelogs/changelog.md
+++ b/changelogs/changelog.md
@@ -1,5 +1,120 @@
# Changelog
-## [0.12.2] - 2025-1-11
+
+
+## [1.0.0-pre.1] - 2026-4-19
+### 核心功能更新
+### MaiSaka系统
+原生支持多模态模型
+原生支持工具调用,多轮调用和mcp
+升级的replyer回复器,同样支持多模态
+统一群聊与私聊回复链路
+### 记忆系统革新
+引入 A_Memorix 长期记忆系统,替代旧记忆链路
+支持记忆检索、写回、迁移、反馈修正和管理界面
+### 全新插件系统
+提供独立的插件开发SDK
+重构插件系统为 plugin_runtime,提供 RPC、Hook、能力注册、运行时隔离、配置校验、批量重载与旧能力迁移。
+### 全面重构和修复
+新增 platform_io 消息平台抽象与消息中间层,统一消息路由、出站追踪和旧驱动兼容。
+新增统一 services 服务层,集中管理 LLM、生成器、发送、数据库、记忆、Embedding 与 HTML 渲染等能力。
+引入 MCP 与统一工具系统,插件工具和 MCP 工具统一调度,并优化工具展示、索引、重试和失败留档。
+WebUI 后端完成模块化重构,新增统一 WebSocket、插件管理、记忆管理、知识库、配置和监控相关 API。
+配置系统升级,支持旧配置自动迁移、字段类型安全校验、多模态模型配置和更细的工具/回复参数。
+优化表情包、图片、表达方式和黑话学习系统,提升识别、缓存、发送、学习与调用稳定性。
+清理旧插件系统、旧记忆系统、旧回复链路、旧工具系统、旧 WebUI 构建产物和多个废弃内置插件。
+!!预发布版本WebUI暂时不可用
+
+完整更新清单
+核心架构
+大规模重构核心运行结构,新增 src/services 服务层,包括 LLM、生成器、发送、消息、数据库、记忆、HTML 渲染、Embedding 等服务。
+新增统一的 platform_io 消息平台抽象,提供驱动、路由、去重、出站追踪、插件驱动和旧版驱动兼容。
+引入新的消息中间层和网关设计,为插件、适配器、主程序之间的消息流转建立统一基础。
+重构数据模型,新增聊天目标、规划动作、回复生成结果、LLM 服务请求等模型。
+新增数据库迁移管理器,支持迁移进度记录、表级/记录级追踪和旧数据兼容。
+统一机器人识别逻辑,支持多平台场景,包括 WebUI。
+
+MaiSaka / 回复系统
+新增并持续完善 maisaka 主回复链路,逐步接管群聊与私聊回复逻辑。
+新增 planner / replyer / timing / subagent 等运行结构,支持 wait 打断、防抖、重试和状态监控。
+新增 Maisaka 实时聊天流监控、阶段状态面板、控制台工具调用展示、prompt log HTML 预览。
+回复器支持多模态与非多模态统一行为,新增模型 visual 参数,避免非多模态模型误传图片。
+支持复杂消息、转发消息、图片原始数据解析、URL 图片浏览、表情包类消息标记。
+优化上下文压缩,显示实时上下文占用,压缩早期 assistant 信息。
+新增聊天特定额外 prompt、多语言 prompt、prompt 独立文件管理、用户自定义 prompt 与覆盖能力。
+新增工具索引展开方式,压缩工具描述,提高工具调用成功率,修复无参工具、孤儿工具、Gemini tool 等问题。
+新增回复后打分追踪器,用于记录和分析回复效果。
+优化回复频率控制、引用回复概率、打字时间、重复思考、wait 行为和 replyer 空回复处理。
+
+记忆系统 / A_Memorix
+新增并主线化 A_Memorix 长期记忆系统,包含运行时、检索、存储、管理界面和迁移脚本。
+新增记忆测试、检索工具、记忆服务、记忆自动化钩子与写回链路。
+支持将旧 LPMM/旧记忆数据迁移到新长期记忆系统。
+优化记忆检索速度、token 消耗、时间信息、上下文检索方式和人物事实提取。
+新增记忆反馈修正、知识库反馈详情、图存储持久化、总结导入、embedding 维度控制等回归测试。
+移除旧 memory_system 中的大量检索工具与聊天总结逻辑,改由新服务层和 A_Memorix 承担。
+
+插件系统 / Runtime
+大规模替换旧 plugin_system,新增 plugin_runtime。
+新增插件能力注册、组件注册、事件分发、Hook 分发、API 注册、Supervisor、Runner、RPC Server/Client。
+支持插件 manifest 校验、包式插件导入、临时 sys.path 管理、导入保护和模块访问控制。
+新增插件配置版本管理、配置归一化、运行时配置校验、批量插件重载。
+新增插件依赖流水线、HTML 渲染服务、插件 SDK 集成增强。
+新增旧数据库 peewee 兼容层,初步重构插件 database API。
+新增插件侧消息网关能力、出站追踪、会话 ID 计算和适配器回执消息 ID 更新。
+修复 Windows 平台插件运行时信号处理、DLL 导入隔离、包式导入、重载机制等问题。
+限制 maibot-plugin-sdk 版本范围,并升级到 2.3.0 相关适配。
+
+MCP / 工具系统
+新增独立 mcp_module,包含连接、管理、Provider、Host LLM Bridge、Hook 与数据模型。
+引入统一插件与 MCP 工具系统,移除旧工具系统和 tool_use 模型。
+工具支持索引检索、延迟展开、统一控制台展示、失败请求留档与重试分析。
+新增 host LLM bridge,使 MCP 工具和宿主模型调用链路更统一。
+
+WebUI / API
+WebUI 后端整体重构,拆分为 app、依赖、中间件、routers、schemas、services、utils 等结构。
+新增统一 WebSocket 连接管理与路由。
+新增聊天、配置、表情包、表达方式、黑话、插件、记忆、知识库、统计、系统等路由重构。
+新增规划器和回复器监控 API、日志搜索、日志上线数量配置、prompt log 预览。
+新增本地已安装插件 README 读取 API、插件安装/配置/运行时管理相关 API。
+新增静态资源包提示和错误处理,后续修复为仅使用包内 WebUI 静态资源。
+修复 knowledgebase 反馈详情类型问题、WebUI memory 路由、配置 schema 测试等问题。
+注意:历史中有大量 dashboard 前端提交和 WebUI dist 迁移/删除,但本次没有修改 dashboard。
+
+配置 / 模型 / 依赖
+配置系统引入 ConfigBase 测试与更严格校验,支持自动检测并升级旧版配置。
+支持 Union / Optional 字段转换,并禁止不安全的多类型 Union。
+新增配置版本到 8.4.0,加入工具筛选、回复器、多模态、Maim Message、日志颜色等配置。
+移除 Planner 问题配置项、无用配置、旧路径显示配置、模板配置文件等冗余项。
+模型配置移除无用模型、utils_small、弃用的 LLM_judge 类型和 tool_use 模型。
+新增模型随机选择策略、模型 visual 参数、OpenAI 兼容性增强。
+修复 Qwen 3.5 空回复、Gemini 请求思考签名、部分模型不支持 gif、OpenAI client 工具请求等问题。
+移除 uv.lock,更新 pyproject.toml / requirements.txt 依赖,最终 HEAD 又移除部分依赖。
+
+表情包 / 图片
+新增表情包系统重构,包含注册、识别、缓存、发送、选择、数据库迁移。
+表情包选择改为一次性选择全部,支持配置,并接入 subagent。
+移除旧内置 emoji 插件,改为 Maisaka 内置动作或新系统能力。
+修复表情包发送无记录、识别失败、缓存问题、图片存储问题、图片过大自动重试等。
+新增异步后台图片/表情处理、图片展示模式优化、复杂消息查看。
+
+表达方式 / 黑话 / 学习
+新增自动表达优化、表达方式检查脚本、表达方式最后修改来源字段。
+修复私聊表达风格随机、表达方式学习与使用、表达方式全局共享。
+新增 planner 黑话缓存,恢复表达学习、黑话学习、黑话使用和表达使用。
+修复黑话提取学习缓存和 Jargon 提取问题。
+新增表达方式快速版本,优化表达方式提取与 LLM 判断标记。
+
+文档 / 国际化 / 工程规范
+更新 README、徽章、快速导航、版本信息和主仓库地址。
+新增/更新 changelog、设计文档、todo、记忆契约文档、Caddy 反向代理与 TLS/SSL 文档。
+新增 AGENTS.md,并更新代码规范、导入顺序、注释规范、语言规范。
+新增 Crowdin 配置和多语言资源,包含中英日韩等 locale。
+新增 CodeRabbit 配置、PR 模板、测试计划和若干调试/迁移脚本。
+新增 agentlite 子项目/模块,包含 agent、tool、provider、skills、MCP、文件/网页/shell 工具和大量测试、示例、文档。
+测试与质量
+
+
+## [0.12.2] - 2026-1-11
### 功能更改
- 优化私聊wait逻辑
- 超时时强制引用回复
diff --git a/changelogs/changes.md b/changelogs/changes.md
deleted file mode 100644
index db41703c..00000000
--- a/changelogs/changes.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# 插件API与规范修改
-
-1. 现在`plugin_system`的`__init__.py`文件中包含了所有插件API的导入,用户可以直接使用`from src.plugin_system import *`来导入所有API。
-
-2. register_plugin函数现在转移到了`plugin_system.apis.plugin_register_api`模块中,用户可以通过`from src.plugin_system.apis.plugin_register_api import register_plugin`来导入。
- - 顺便一提,按照1中说法,你可以这么用:
- ```python
- from src.plugin_system import register_plugin
- ```
-
-3. 现在强制要求的property如下,即你必须覆盖的属性有:
- - `plugin_name`: 插件名称,必须是唯一的。(与文件夹相同)
- - `enable_plugin`: 是否启用插件,默认为`True`。
- - `dependencies`: 插件依赖的其他插件列表,默认为空。**现在并不检查(也许)**
- - `python_dependencies`: 插件依赖的Python包列表,默认为空。**现在并不检查**
- - `config_file_name`: 插件配置文件名,默认为`config.toml`。
- - `config_schema`: 插件配置文件的schema,用于自动生成配置文件。
-4. 部分API的参数类型和返回值进行了调整
- - `chat_api.py`中获取流的参数中可以使用一个特殊的枚举类型来获得所有平台的 ChatStream 了。
- - `config_api.py`中的`get_global_config`和`get_plugin_config`方法现在支持嵌套访问的配置键名。
- - `database_api.py`中的`db_query`方法调整了参数顺序以增强参数限制的同时,保证了typing正确;`db_get`方法增加了`single_result`参数,与`db_query`保持一致。
-5. 增加了`logging_api`,可以用`get_logger`来获取日志记录器。
-6. 增加了插件和组件管理的API。
-7. `BaseCommand`的`execute`方法现在返回一个三元组,包含是否执行成功、可选的回复消息和是否拦截消息。
- - 这意味着你终于可以动态控制是否继续后续消息的处理了。
-8. 移除了dependency_manager,但是依然保留了`python_dependencies`属性,等待后续重构。
- - 一并移除了文档有关manager的内容。
-9. 增加了工具的有关api
-
-# 插件系统修改
-1. 现在所有的匹配模式不再是关键字了,而是枚举类。**(可能有遗漏)**
-2. 修复了一下显示插件信息不显示的问题。同时精简了一下显示内容
-3. 修复了插件系统混用了`plugin_name`和`display_name`的问题。现在所有的插件信息都使用`display_name`来显示,而内部标识仍然使用`plugin_name`。
-4. 现在增加了参数类型检查,完善了对应注释
-5. 现在插件抽象出了总基类 `PluginBase`
- - 基于`Action`和`Command`的插件基类现在为`BasePlugin`。
- - 基于`Event`的插件基类现在为`BaseEventPlugin`。
- - 基于`Action`,`Command`和`Event`的插件基类现在为`BasePlugin`,所有插件都应该继承此基类。
- - `BasePlugin`继承自`PluginBase`。
- - 所有的插件类都由`register_plugin`装饰器注册。
-6. 现在我们终于可以让插件有自定义的名字了!
- - 真正实现了插件的`plugin_name`**不受文件夹名称限制**的功能。(吐槽:可乐你的某个小小细节导致我搞了好久……)
- - 通过在插件类中定义`plugin_name`属性来指定插件内部标识符。
- - 由于此更改一个文件中现在可以有多个插件类,但每个插件类必须有**唯一的**`plugin_name`。
- - 在某些插件加载失败时,现在会显示包名而不是插件内部标识符。
- - 例如:`MaiMBot.plugins.example_plugin`而不是`example_plugin`。
- - 仅在插件 import 失败时会如此,正常注册过程中失败的插件不会显示包名,而是显示插件内部标识符。(这是特性,但是基本上不可能出现这个情况)
-7. 现在不支持单文件插件了,加载方式已经完全删除。
-8. 把`BaseEventPlugin`合并到了`BasePlugin`中,所有插件都应该继承自`BasePlugin`。
-9. `BaseEventHandler`现在有了`get_config`方法了。
-10. 修正了`main.py`中的错误输出。
-11. 修正了`command`所编译的`Pattern`注册时的错误输出。
-12. `events_manager`有了task相关逻辑了。
-13. 现在有了插件卸载和重载功能了,也就是热插拔。
-14. 实现了组件的全局启用和禁用功能。
- - 通过`enable_component`和`disable_component`方法来启用或禁用组件。
- - 不过这个操作不会保存到配置文件~
-15. 实现了组件的局部禁用,也就是针对某一个聊天禁用的功能。
- - 通过`disable_specific_chat_action`,`enable_specific_chat_action`,`disable_specific_chat_command`,`enable_specific_chat_command`,`disable_specific_chat_event_handler`,`enable_specific_chat_event_handler`来操作
- - 同样不保存到配置文件~
-16. 把`BaseTool`一并合并进入了插件系统
-
-# 官方插件修改
-1. `HelloWorld`插件现在有一个样例的`EventHandler`。
-2. 内置插件增加了一个通过`Command`来管理插件的功能。具体是使用`/pm`命令唤起。(需要自行启用)
-3. `HelloWorld`插件现在有一个样例的`CompareNumbersTool`。
-
-### 执笔BGM
-塞壬唱片!
\ No newline at end of file
diff --git a/pytests/A_memorix_test/test_legacy_config_migration.py b/pytests/A_memorix_test/test_legacy_config_migration.py
index 8a110663..d4022f79 100644
--- a/pytests/A_memorix_test/test_legacy_config_migration.py
+++ b/pytests/A_memorix_test/test_legacy_config_migration.py
@@ -1,6 +1,16 @@
from src.config.legacy_migration import try_migrate_legacy_bot_config_dict
+def test_legacy_empty_qq_account_is_migrated_to_zero():
+ payload = {"bot": {"qq_account": ""}}
+
+ result = try_migrate_legacy_bot_config_dict(payload)
+
+ assert result.migrated is True
+ assert "bot.qq_account_empty" in result.reason
+ assert result.data["bot"]["qq_account"] == 0
+
+
def test_legacy_learning_list_with_numeric_fourth_column_is_migrated():
payload = {
"expression": {
@@ -35,6 +45,72 @@ def test_legacy_learning_list_with_numeric_fourth_column_is_migrated():
]
+def test_legacy_learning_list_with_string_bool_fourth_column_is_migrated():
+ payload = {"expression": {"learning_list": [["qq::group", "enable", "enable", "true"]]}}
+
+ result = try_migrate_legacy_bot_config_dict(payload)
+
+ assert result.migrated is True
+ assert "expression.learning_list" in result.reason
+ assert result.data["expression"]["learning_list"] == [
+ {
+ "platform": "qq",
+ "item_id": "",
+ "rule_type": "group",
+ "use_expression": True,
+ "enable_learning": True,
+ "enable_jargon_learning": True,
+ }
+ ]
+
+
+def test_legacy_expression_groups_empty_string_is_migrated():
+ payload = {"expression": {"expression_groups": ""}}
+
+ result = try_migrate_legacy_bot_config_dict(payload)
+
+ assert result.migrated is True
+ assert "expression.expression_groups" in result.reason
+ assert result.data["expression"]["expression_groups"] == []
+
+
+def test_legacy_expression_groups_global_marker_is_migrated():
+ payload = {"expression": {"expression_groups": [["*"]]}}
+
+ result = try_migrate_legacy_bot_config_dict(payload)
+
+ assert result.migrated is True
+ assert "expression.expression_groups" in result.reason
+ assert result.data["expression"]["expression_groups"] == [
+ {
+ "expression_groups": [
+ {
+ "platform": "*",
+ "item_id": "*",
+ "rule_type": "group",
+ }
+ ]
+ }
+ ]
+
+
+def test_empty_keyword_rules_are_dropped():
+ payload = {
+ "keyword_reaction": {
+ "keyword_rules": [
+ {"keywords": [], "reaction": ""},
+ {"keywords": ["test"], "reaction": "ok"},
+ ]
+ }
+ }
+
+ result = try_migrate_legacy_bot_config_dict(payload)
+
+ assert result.migrated is True
+ assert "keyword_reaction.keyword_rules_empty" in result.reason
+ assert result.data["keyword_reaction"]["keyword_rules"] == [{"keywords": ["test"], "reaction": "ok"}]
+
+
def test_visual_multimodal_planner_is_migrated_to_planner_mode():
payload = {"visual": {"multimodal_planner": True}}
diff --git a/scripts/analyze_evaluation_stats.py b/scripts/analyze_evaluation_stats.py
deleted file mode 100644
index e18243d1..00000000
--- a/scripts/analyze_evaluation_stats.py
+++ /dev/null
@@ -1,328 +0,0 @@
-"""
-评估结果统计脚本
-
-功能:
-1. 扫描temp目录下所有JSON文件
-2. 分析每个文件的统计信息
-3. 输出详细的统计报告
-"""
-
-import json
-import os
-import sys
-import glob
-from collections import Counter
-from datetime import datetime
-from typing import Dict, List, Set, Tuple
-
-# 添加项目根目录到路径
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-sys.path.insert(0, project_root)
-
-from src.common.logger import get_logger # noqa: E402
-
-logger = get_logger("evaluation_stats_analyzer")
-
-# 评估结果文件路径
-TEMP_DIR = os.path.join(os.path.dirname(__file__), "temp")
-
-
-def parse_datetime(dt_str: str) -> datetime | None:
- """解析ISO格式的日期时间字符串"""
- try:
- return datetime.fromisoformat(dt_str)
- except Exception:
- return None
-
-
-def analyze_single_file(file_path: str) -> Dict:
- """
- 分析单个JSON文件的统计信息
-
- Args:
- file_path: JSON文件路径
-
- Returns:
- 统计信息字典
- """
- file_name = os.path.basename(file_path)
- stats = {
- "file_name": file_name,
- "file_path": file_path,
- "file_size": os.path.getsize(file_path),
- "error": None,
- "last_updated": None,
- "total_count": 0,
- "actual_count": 0,
- "suitable_count": 0,
- "unsuitable_count": 0,
- "suitable_rate": 0.0,
- "unique_pairs": 0,
- "evaluators": Counter(),
- "evaluation_dates": [],
- "date_range": None,
- "has_expression_id": False,
- "has_reason": False,
- "reason_count": 0,
- }
-
- try:
- with open(file_path, "r", encoding="utf-8") as f:
- data = json.load(f)
-
- # 基本信息
- stats["last_updated"] = data.get("last_updated")
- stats["total_count"] = data.get("total_count", 0)
-
- results = data.get("manual_results", [])
- stats["actual_count"] = len(results)
-
- if not results:
- return stats
-
- # 统计通过/不通过
- suitable_count = sum(1 for r in results if r.get("suitable") is True)
- unsuitable_count = sum(1 for r in results if r.get("suitable") is False)
- stats["suitable_count"] = suitable_count
- stats["unsuitable_count"] = unsuitable_count
- stats["suitable_rate"] = (suitable_count / len(results) * 100) if results else 0.0
-
- # 统计唯一的(situation, style)对
- pairs: Set[Tuple[str, str]] = set()
- for r in results:
- if "situation" in r and "style" in r:
- pairs.add((r["situation"], r["style"]))
- stats["unique_pairs"] = len(pairs)
-
- # 统计评估者
- for r in results:
- evaluator = r.get("evaluator", "unknown")
- stats["evaluators"][evaluator] += 1
-
- # 统计评估时间
- evaluation_dates = []
- for r in results:
- evaluated_at = r.get("evaluated_at")
- if evaluated_at:
- dt = parse_datetime(evaluated_at)
- if dt:
- evaluation_dates.append(dt)
-
- stats["evaluation_dates"] = evaluation_dates
- if evaluation_dates:
- min_date = min(evaluation_dates)
- max_date = max(evaluation_dates)
- stats["date_range"] = {
- "start": min_date.isoformat(),
- "end": max_date.isoformat(),
- "duration_days": (max_date - min_date).days + 1,
- }
-
- # 检查字段存在性
- stats["has_expression_id"] = any("expression_id" in r for r in results)
- stats["has_reason"] = any(r.get("reason") for r in results)
- stats["reason_count"] = sum(1 for r in results if r.get("reason"))
-
- except Exception as e:
- stats["error"] = str(e)
- logger.error(f"分析文件 {file_name} 时出错: {e}")
-
- return stats
-
-
-def print_file_stats(stats: Dict, index: int = None):
- """打印单个文件的统计信息"""
- prefix = f"[{index}] " if index is not None else ""
- print(f"\n{'=' * 80}")
- print(f"{prefix}文件: {stats['file_name']}")
- print(f"{'=' * 80}")
-
- if stats["error"]:
- print(f"✗ 错误: {stats['error']}")
- return
-
- print(f"文件路径: {stats['file_path']}")
- print(f"文件大小: {stats['file_size']:,} 字节 ({stats['file_size'] / 1024:.2f} KB)")
-
- if stats["last_updated"]:
- print(f"最后更新: {stats['last_updated']}")
-
- print("\n【记录统计】")
- print(f" 文件中的 total_count: {stats['total_count']}")
- print(f" 实际记录数: {stats['actual_count']}")
-
- if stats["total_count"] != stats["actual_count"]:
- diff = stats["total_count"] - stats["actual_count"]
- print(f" ⚠️ 数量不一致,差值: {diff:+d}")
-
- print("\n【评估结果统计】")
- print(f" 通过 (suitable=True): {stats['suitable_count']} 条 ({stats['suitable_rate']:.2f}%)")
- print(f" 不通过 (suitable=False): {stats['unsuitable_count']} 条 ({100 - stats['suitable_rate']:.2f}%)")
-
- print("\n【唯一性统计】")
- print(f" 唯一 (situation, style) 对: {stats['unique_pairs']} 条")
- if stats["actual_count"] > 0:
- duplicate_count = stats["actual_count"] - stats["unique_pairs"]
- duplicate_rate = (duplicate_count / stats["actual_count"] * 100) if stats["actual_count"] > 0 else 0
- print(f" 重复记录: {duplicate_count} 条 ({duplicate_rate:.2f}%)")
-
- print("\n【评估者统计】")
- if stats["evaluators"]:
- for evaluator, count in stats["evaluators"].most_common():
- rate = (count / stats["actual_count"] * 100) if stats["actual_count"] > 0 else 0
- print(f" {evaluator}: {count} 条 ({rate:.2f}%)")
- else:
- print(" 无评估者信息")
-
- print("\n【时间统计】")
- if stats["date_range"]:
- print(f" 最早评估时间: {stats['date_range']['start']}")
- print(f" 最晚评估时间: {stats['date_range']['end']}")
- print(f" 评估时间跨度: {stats['date_range']['duration_days']} 天")
- else:
- print(" 无时间信息")
-
- print("\n【字段统计】")
- print(f" 包含 expression_id: {'是' if stats['has_expression_id'] else '否'}")
- print(f" 包含 reason: {'是' if stats['has_reason'] else '否'}")
- if stats["has_reason"]:
- rate = (stats["reason_count"] / stats["actual_count"] * 100) if stats["actual_count"] > 0 else 0
- print(f" 有理由的记录: {stats['reason_count']} 条 ({rate:.2f}%)")
-
-
-def print_summary(all_stats: List[Dict]):
- """打印汇总统计信息"""
- print(f"\n{'=' * 80}")
- print("汇总统计")
- print(f"{'=' * 80}")
-
- total_files = len(all_stats)
- valid_files = [s for s in all_stats if not s.get("error")]
- error_files = [s for s in all_stats if s.get("error")]
-
- print("\n【文件统计】")
- print(f" 总文件数: {total_files}")
- print(f" 成功解析: {len(valid_files)}")
- print(f" 解析失败: {len(error_files)}")
-
- if error_files:
- print("\n 失败文件列表:")
- for stats in error_files:
- print(f" - {stats['file_name']}: {stats['error']}")
-
- if not valid_files:
- print("\n没有成功解析的文件")
- return
-
- # 汇总记录统计
- total_records = sum(s["actual_count"] for s in valid_files)
- total_suitable = sum(s["suitable_count"] for s in valid_files)
- total_unsuitable = sum(s["unsuitable_count"] for s in valid_files)
- total_unique_pairs = set()
-
- # 收集所有唯一的(situation, style)对
- for stats in valid_files:
- try:
- with open(stats["file_path"], "r", encoding="utf-8") as f:
- data = json.load(f)
- results = data.get("manual_results", [])
- for r in results:
- if "situation" in r and "style" in r:
- total_unique_pairs.add((r["situation"], r["style"]))
- except Exception:
- pass
-
- print("\n【记录汇总】")
- print(f" 总记录数: {total_records:,} 条")
- print(
- f" 通过: {total_suitable:,} 条 ({total_suitable / total_records * 100:.2f}%)"
- if total_records > 0
- else " 通过: 0 条"
- )
- print(
- f" 不通过: {total_unsuitable:,} 条 ({total_unsuitable / total_records * 100:.2f}%)"
- if total_records > 0
- else " 不通过: 0 条"
- )
- print(f" 唯一 (situation, style) 对: {len(total_unique_pairs):,} 条")
-
- if total_records > 0:
- duplicate_count = total_records - len(total_unique_pairs)
- duplicate_rate = (duplicate_count / total_records * 100) if total_records > 0 else 0
- print(f" 重复记录: {duplicate_count:,} 条 ({duplicate_rate:.2f}%)")
-
- # 汇总评估者统计
- all_evaluators = Counter()
- for stats in valid_files:
- all_evaluators.update(stats["evaluators"])
-
- print("\n【评估者汇总】")
- if all_evaluators:
- for evaluator, count in all_evaluators.most_common():
- rate = (count / total_records * 100) if total_records > 0 else 0
- print(f" {evaluator}: {count:,} 条 ({rate:.2f}%)")
- else:
- print(" 无评估者信息")
-
- # 汇总时间范围
- all_dates = []
- for stats in valid_files:
- all_dates.extend(stats["evaluation_dates"])
-
- if all_dates:
- min_date = min(all_dates)
- max_date = max(all_dates)
- print("\n【时间汇总】")
- print(f" 最早评估时间: {min_date.isoformat()}")
- print(f" 最晚评估时间: {max_date.isoformat()}")
- print(f" 总时间跨度: {(max_date - min_date).days + 1} 天")
-
- # 文件大小汇总
- total_size = sum(s["file_size"] for s in valid_files)
- avg_size = total_size / len(valid_files) if valid_files else 0
- print("\n【文件大小汇总】")
- print(f" 总大小: {total_size:,} 字节 ({total_size / 1024 / 1024:.2f} MB)")
- print(f" 平均大小: {avg_size:,.0f} 字节 ({avg_size / 1024:.2f} KB)")
-
-
-def main():
- """主函数"""
- logger.info("=" * 80)
- logger.info("开始分析评估结果统计信息")
- logger.info("=" * 80)
-
- if not os.path.exists(TEMP_DIR):
- print(f"\n✗ 错误:未找到temp目录: {TEMP_DIR}")
- logger.error(f"未找到temp目录: {TEMP_DIR}")
- return
-
- # 查找所有JSON文件
- json_files = glob.glob(os.path.join(TEMP_DIR, "*.json"))
-
- if not json_files:
- print(f"\n✗ 错误:temp目录下未找到JSON文件: {TEMP_DIR}")
- logger.error(f"temp目录下未找到JSON文件: {TEMP_DIR}")
- return
-
- json_files.sort() # 按文件名排序
-
- print(f"\n找到 {len(json_files)} 个JSON文件")
- print("=" * 80)
-
- # 分析每个文件
- all_stats = []
- for i, json_file in enumerate(json_files, 1):
- stats = analyze_single_file(json_file)
- all_stats.append(stats)
- print_file_stats(stats, index=i)
-
- # 打印汇总统计
- print_summary(all_stats)
-
- print(f"\n{'=' * 80}")
- print("分析完成")
- print(f"{'=' * 80}")
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/delete_lpmm_items.py b/scripts/delete_lpmm_items.py
deleted file mode 100644
index e6e40fea..00000000
--- a/scripts/delete_lpmm_items.py
+++ /dev/null
@@ -1,388 +0,0 @@
-import argparse
-import sys
-from pathlib import Path
-from typing import List, Tuple, Dict, Any
-import json
-import os
-
-# 强制使用 utf-8,避免控制台编码报错
-try:
- if hasattr(sys.stdout, "reconfigure"):
- sys.stdout.reconfigure(encoding="utf-8")
- if hasattr(sys.stderr, "reconfigure"):
- sys.stderr.reconfigure(encoding="utf-8")
-except Exception:
- pass
-
-# 确保能找到 src 包
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from src.chat.knowledge.embedding_store import EmbeddingManager
-from src.chat.knowledge.kg_manager import KGManager
-from src.common.logger import get_logger
-from src.chat.knowledge.utils.hash import get_sha256
-
-logger = get_logger("delete_lpmm_items")
-
-
-def read_hashes(file_path: Path) -> List[str]:
- """读取哈希列表,跳过空行"""
- hashes: List[str] = []
- for line in file_path.read_text(encoding="utf-8").splitlines():
- val = line.strip()
- if not val:
- continue
- hashes.append(val)
- return hashes
-
-
-def read_openie_hashes(file_path: Path) -> List[str]:
- """从 OpenIE JSON 中提取 idx 作为段落哈希"""
- data: Dict[str, Any] = json.loads(file_path.read_text(encoding="utf-8"))
- docs = data.get("docs", []) if isinstance(data, dict) else []
- hashes: List[str] = []
- for doc in docs:
- idx = doc.get("idx") if isinstance(doc, dict) else None
- if isinstance(idx, str) and idx.strip():
- hashes.append(idx.strip())
- return hashes
-
-
-def normalize_paragraph_keys(raw_hashes: List[str]) -> Tuple[List[str], List[str]]:
- """将输入规范为完整键和纯哈希两份列表"""
- keys: List[str] = []
- hashes: List[str] = []
- for h in raw_hashes:
- if h.startswith("paragraph-"):
- keys.append(h)
- hashes.append(h.replace("paragraph-", "", 1))
- else:
- keys.append(f"paragraph-{h}")
- hashes.append(h)
- return keys, hashes
-
-
-def main():
- parser = argparse.ArgumentParser(description="Delete paragraphs from LPMM knowledge base (vectors + graph).")
- parser.add_argument("--hash-file", help="文本文件路径,每行一个 paragraph 哈希或带前缀键")
- parser.add_argument("--openie-file", help="OpenIE 输出文件(JSON),将其 docs.idx 作为待删段落哈希")
- parser.add_argument("--raw-file", help="原始 txt 语料文件(按空行分段),可结合 --raw-index 使用")
- parser.add_argument(
- "--raw-index",
- help="在 --raw-file 中要删除的段落索引,1 基,支持逗号分隔,例如 1,3",
- )
- parser.add_argument("--search-text", help="在当前段落库中按子串搜索匹配段落并交互选择删除")
- parser.add_argument(
- "--search-limit",
- type=int,
- default=10,
- help="--search-text 模式下最多展示的候选段落数量",
- )
- parser.add_argument("--delete-entities", action="store_true", help="同时删除 OpenIE 文件中的实体节点/嵌入")
- parser.add_argument("--delete-relations", action="store_true", help="同时删除 OpenIE 文件中的关系嵌入")
- parser.add_argument("--remove-orphan-entities", action="store_true", help="删除删除后孤立的实体节点")
- parser.add_argument("--dry-run", action="store_true", help="仅预览将删除的项,不实际修改")
- parser.add_argument("--yes", action="store_true", help="跳过交互确认,直接执行删除(谨慎使用)")
- parser.add_argument(
- "--max-delete-nodes",
- type=int,
- default=2000,
- help="单次最大允许删除的节点数量(段落+实体),超过则需要显式确认或调整该参数",
- )
- parser.add_argument(
- "--non-interactive",
- action="store_true",
- help=(
- "非交互模式:不再通过 input() 询问任何信息;"
- "在该模式下,如果需要交互(例如 --search-text 未指定具体条目、未提供 --yes),"
- "会直接报错退出。"
- ),
- )
- args = parser.parse_args()
-
- # 至少需要一种来源
- if not (args.hash_file or args.openie_file or args.raw_file or args.search_text):
- logger.error("必须指定 --hash-file / --openie-file / --raw-file / --search-text 之一")
- sys.exit(1)
-
- raw_hashes: List[str] = []
- raw_entities: List[str] = []
- raw_relations: List[str] = []
-
- if args.hash_file:
- hash_file = Path(args.hash_file)
- if not hash_file.exists():
- logger.error(f"哈希文件不存在: {hash_file}")
- sys.exit(1)
- raw_hashes.extend(read_hashes(hash_file))
-
- if args.openie_file:
- openie_path = Path(args.openie_file)
- if not openie_path.exists():
- logger.error(f"OpenIE 文件不存在: {openie_path}")
- sys.exit(1)
- # 段落
- raw_hashes.extend(read_openie_hashes(openie_path))
- # 实体/关系(实体同时包含 extracted_entities 与三元组主语/宾语,以匹配 KG 构图逻辑)
- try:
- data = json.loads(openie_path.read_text(encoding="utf-8"))
- docs = data.get("docs", []) if isinstance(data, dict) else []
- for doc in docs:
- if not isinstance(doc, dict):
- continue
- ents = doc.get("extracted_entities", [])
- if isinstance(ents, list):
- raw_entities.extend([e for e in ents if isinstance(e, str)])
- triples = doc.get("extracted_triples", [])
- if isinstance(triples, list):
- for t in triples:
- if isinstance(t, list) and len(t) == 3:
- subj, _, obj = t
- if isinstance(subj, str):
- raw_entities.append(subj)
- if isinstance(obj, str):
- raw_entities.append(obj)
- raw_relations.append(str(tuple(t)))
- except Exception as e:
- logger.error(f"读取 OpenIE 文件失败: {e}")
- sys.exit(1)
-
- # 从原始 txt 语料按段落索引选择删除
- if args.raw_file:
- raw_path = Path(args.raw_file)
- if not raw_path.exists():
- logger.error(f"原始语料文件不存在: {raw_path}")
- sys.exit(1)
- text = raw_path.read_text(encoding="utf-8")
- paragraphs: List[str] = []
- buf = []
- for line in text.splitlines():
- if line.strip() == "":
- if buf:
- paragraphs.append("\n".join(buf).strip())
- buf = []
- else:
- buf.append(line)
- if buf:
- paragraphs.append("\n".join(buf).strip())
-
- if not paragraphs:
- logger.error(f"原始语料文件 {raw_path} 中没有解析到任何段落")
- sys.exit(1)
-
- if not args.raw_index:
- logger.info(
- f"{raw_path} 共解析出 {len(paragraphs)} 个段落,请通过 --raw-index 指定要删除的段落,例如 --raw-index 1,3"
- )
- sys.exit(1)
-
- # 解析索引列表(1-based)
- try:
- idx_list = [int(x.strip()) for x in str(args.raw_index).split(",") if x.strip()]
- except ValueError:
- logger.error(f"--raw-index 解析失败: {args.raw_index}")
- sys.exit(1)
-
- for idx in idx_list:
- if idx < 1 or idx > len(paragraphs):
- logger.error(f"--raw-index 包含无效索引 {idx}(有效范围 1~{len(paragraphs)})")
- sys.exit(1)
-
- logger.info("根据原始语料选择段落:")
- for idx in idx_list:
- para = paragraphs[idx - 1]
- h = get_sha256(para)
- logger.info(f"- 第 {idx} 段,hash={h},内容预览:{para[:80]}")
- raw_hashes.append(h)
-
- # 在现有库中按子串搜索候选段落并交互选择
- if args.search_text:
- search_text = args.search_text.strip()
- if not search_text:
- logger.error("--search-text 不能为空")
- sys.exit(1)
- logger.info(f"正在根据关键字在现有段落库中搜索:{search_text!r}")
- em_search = EmbeddingManager()
- try:
- em_search.load_from_file()
- except Exception as e:
- logger.error(f"加载嵌入库失败,无法使用 --search-text 功能: {e}")
- sys.exit(1)
-
- candidates = []
- for key, item in em_search.paragraphs_embedding_store.store.items():
- if search_text in item.str:
- candidates.append((key, item.str))
- if len(candidates) >= args.search_limit:
- break
-
- if not candidates:
- logger.info("未在现有段落库中找到包含该关键字的段落")
- else:
- logger.info("找到以下候选段落(输入序号选择要删除的条目,可用逗号分隔,多选):")
- for i, (key, text) in enumerate(candidates, start=1):
- logger.info(f"{i}. {key} | {text[:80]}")
- if args.non_interactive:
- logger.error(
- "当前处于非交互模式,无法通过输入序号选择要删除的候选段落;"
- "如需脚本化删除,请改用 --hash-file / --openie-file / --raw-file 等方式。"
- )
- sys.exit(1)
- choice = input("请输入要删除的序号列表(如 1,3),或直接回车取消:").strip()
- if choice:
- try:
- idxs = [int(x.strip()) for x in choice.split(",") if x.strip()]
- except ValueError:
- logger.error("输入的序号列表无法解析,已取消 --search-text 删除")
- else:
- for i in idxs:
- if 1 <= i <= len(candidates):
- key, _ = candidates[i - 1]
- # key 已是完整的 paragraph-xxx
- if key.startswith("paragraph-"):
- raw_hashes.append(key.split("paragraph-", 1)[1])
- else:
- logger.warning(f"忽略无效序号: {i}")
-
- # 去重但保持顺序
- seen = set()
- raw_hashes = [h for h in raw_hashes if not (h in seen or seen.add(h))]
-
- if not raw_hashes:
- logger.error("未读取到任何待删哈希,无操作")
- sys.exit(1)
-
- keys, pg_hashes = normalize_paragraph_keys(raw_hashes)
-
- ent_hashes: List[str] = []
- rel_hashes: List[str] = []
- if args.delete_entities and raw_entities:
- ent_hashes = [get_sha256(e) for e in raw_entities]
- if args.delete_relations and raw_relations:
- rel_hashes = [get_sha256(r) for r in raw_relations]
-
- logger.info("=== 删除操作预备 ===")
- logger.info("请确保已备份 data/embedding 与 data/rag,必要时可使用 --dry-run 预览")
- logger.info(f"待删除段落数量: {len(keys)}")
- logger.info(f"示例: {keys[:5]}")
- if ent_hashes:
- logger.info(f"待删除实体数量: {len(ent_hashes)}")
- if rel_hashes:
- logger.info(f"待删除关系数量: {len(rel_hashes)}")
-
- total_nodes_to_delete = len(pg_hashes) + (len(ent_hashes) if args.delete_entities else 0)
- logger.info(f"本次预计删除节点总数(段落+实体): {total_nodes_to_delete}")
-
- if args.dry_run:
- logger.info("dry-run 模式,未执行删除")
- return
-
- # 大批次删除保护
- if total_nodes_to_delete > args.max_delete_nodes and not args.yes:
- logger.error(
- f"本次预计删除节点 {total_nodes_to_delete} 个,超过阈值 {args.max_delete_nodes}。"
- " 为避免误删,请降低批次规模或使用 --max-delete-nodes 调整阈值,并加上 --yes 明确确认。"
- )
- sys.exit(1)
-
- # 交互确认
- if not args.yes:
- if args.non_interactive:
- logger.error(
- "当前处于非交互模式且未指定 --yes,出于安全考虑,删除操作已被拒绝。\n"
- "如确认需要在非交互模式下执行删除,请显式添加 --yes 参数。"
- )
- sys.exit(1)
- confirm = input("确认删除上述数据?输入大写 YES 以继续,其他任意键取消: ").strip()
- if confirm != "YES":
- logger.info("用户取消删除操作")
- return
-
- # 加载嵌入与图
- embed_manager = EmbeddingManager()
- kg_manager = KGManager()
-
- try:
- embed_manager.load_from_file()
- kg_manager.load_from_file()
- except Exception as e:
- logger.error(f"加载现有知识库失败: {e}")
- sys.exit(1)
-
- # 记录删除前全局统计,便于对比
- before_para_vec = len(embed_manager.paragraphs_embedding_store.store)
- before_ent_vec = len(embed_manager.entities_embedding_store.store)
- before_rel_vec = len(embed_manager.relation_embedding_store.store)
- before_nodes = len(kg_manager.graph.get_node_list())
- before_edges = len(kg_manager.graph.get_edge_list())
- logger.info(
- f"删除前统计: 段落向量={before_para_vec}, 实体向量={before_ent_vec}, 关系向量={before_rel_vec}, "
- f"KG节点={before_nodes}, KG边={before_edges}"
- )
-
- # 删除向量
- deleted, skipped = embed_manager.paragraphs_embedding_store.delete_items(keys)
- embed_manager.stored_pg_hashes = set(embed_manager.paragraphs_embedding_store.store.keys())
- logger.info(f"段落向量删除完成,删除: {deleted}, 跳过: {skipped}")
- ent_deleted = ent_skipped = rel_deleted = rel_skipped = 0
- if ent_hashes:
- ent_keys = [f"entity-{h}" for h in ent_hashes]
- ent_deleted, ent_skipped = embed_manager.entities_embedding_store.delete_items(ent_keys)
- logger.info(f"实体向量删除完成,删除: {ent_deleted}, 跳过: {ent_skipped}")
- if rel_hashes:
- rel_keys = [f"relation-{h}" for h in rel_hashes]
- rel_deleted, rel_skipped = embed_manager.relation_embedding_store.delete_items(rel_keys)
- logger.info(f"关系向量删除完成,删除: {rel_deleted}, 跳过: {rel_skipped}")
-
- # 删除图节点/边
- kg_result = kg_manager.delete_paragraphs(
- pg_hashes,
- ent_hashes=ent_hashes if args.delete_entities else None,
- remove_orphan_entities=args.remove_orphan_entities,
- )
- logger.info(
- f"KG 删除完成,删除: {kg_result.get('deleted', 0)}, 跳过: {kg_result.get('skipped', 0)}, "
- f"孤立实体清理: {kg_result.get('orphan_removed', 0)}"
- )
-
- # 重建索引并保存
- logger.info("重建 Faiss 索引并保存嵌入文件...")
- embed_manager.rebuild_faiss_index()
- embed_manager.save_to_file()
-
- logger.info("保存 KG 数据...")
- kg_manager.save_to_file()
-
- # 删除后统计
- after_para_vec = len(embed_manager.paragraphs_embedding_store.store)
- after_ent_vec = len(embed_manager.entities_embedding_store.store)
- after_rel_vec = len(embed_manager.relation_embedding_store.store)
- after_nodes = len(kg_manager.graph.get_node_list())
- after_edges = len(kg_manager.graph.get_edge_list())
-
- logger.info(
- "删除后统计: 段落向量=%d(%+d), 实体向量=%d(%+d), 关系向量=%d(%+d), KG节点=%d(%+d), KG边=%d(%+d)"
- % (
- after_para_vec,
- after_para_vec - before_para_vec,
- after_ent_vec,
- after_ent_vec - before_ent_vec,
- after_rel_vec,
- after_rel_vec - before_rel_vec,
- after_nodes,
- after_nodes - before_nodes,
- after_edges,
- after_edges - before_edges,
- )
- )
-
- logger.info("删除流程完成")
- print(
- "\n[NOTICE] 删除脚本执行完毕。如主程序(聊天 / WebUI)已在运行,"
- "请重启主程序,或在主程序内部调用一次 lpmm_start_up() 以应用最新 LPMM 知识库。"
- )
- print("[NOTICE] 如果不清楚 lpmm_start_up 是什么,直接重启主程序即可。")
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/import_openie.py b/scripts/import_openie.py
deleted file mode 100644
index b7ec7442..00000000
--- a/scripts/import_openie.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# try:
-# import src.plugins.knowledge.lib.quick_algo
-# except ImportError:
-# print("未找到quick_algo库,无法使用quick_algo算法")
-# print("请安装quick_algo库 - 在lib.quick_algo中,执行命令:python setup.py build_ext --inplace")
-
-import argparse
-import sys
-import os
-import asyncio
-from time import sleep
-from typing import Optional
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from src.chat.knowledge.embedding_store import EmbeddingManager
-from src.chat.knowledge.open_ie import OpenIE
-from src.chat.knowledge.kg_manager import KGManager
-from src.common.logger import get_logger
-from src.chat.knowledge.utils.hash import get_sha256
-
-
-# 添加项目根目录到 sys.path
-ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-OPENIE_DIR = os.path.join(ROOT_PATH, "data", "openie")
-
-logger = get_logger("OpenIE导入")
-
-
-def ensure_openie_dir():
- """确保OpenIE数据目录存在"""
- if not os.path.exists(OPENIE_DIR):
- os.makedirs(OPENIE_DIR)
- logger.info(f"创建OpenIE数据目录:{OPENIE_DIR}")
- else:
- logger.info(f"OpenIE数据目录已存在:{OPENIE_DIR}")
-
-
-def hash_deduplicate(
- raw_paragraphs: dict[str, str],
- triple_list_data: dict[str, list[list[str]]],
- stored_pg_hashes: set,
- stored_paragraph_hashes: set,
-):
- """Hash去重
-
- Args:
- raw_paragraphs: 索引的段落原文
- triple_list_data: 索引的三元组列表
- stored_pg_hashes: 已存储的段落hash集合
- stored_paragraph_hashes: 已存储的段落hash集合
-
- Returns:
- new_raw_paragraphs: 去重后的段落
- new_triple_list_data: 去重后的三元组
- """
- # 保存去重后的段落
- new_raw_paragraphs = {}
- # 保存去重后的三元组
- new_triple_list_data = {}
-
- for _, (raw_paragraph, triple_list) in enumerate(
- zip(raw_paragraphs.values(), triple_list_data.values(), strict=False)
- ):
- # 段落hash
- paragraph_hash = get_sha256(raw_paragraph)
- # 使用与EmbeddingStore中一致的命名空间格式:namespace-hash
- paragraph_key = f"paragraph-{paragraph_hash}"
- if paragraph_key in stored_pg_hashes and paragraph_hash in stored_paragraph_hashes:
- continue
- new_raw_paragraphs[paragraph_hash] = raw_paragraph
- new_triple_list_data[paragraph_hash] = triple_list
-
- return new_raw_paragraphs, new_triple_list_data
-
-
-def handle_import_openie(
- openie_data: OpenIE,
- embed_manager: EmbeddingManager,
- kg_manager: KGManager,
- non_interactive: bool = False,
-) -> bool:
- # sourcery skip: extract-method
- # 从OpenIE数据中提取段落原文与三元组列表
- # 索引的段落原文
- raw_paragraphs = openie_data.extract_raw_paragraph_dict()
- # 索引的实体列表
- entity_list_data = openie_data.extract_entity_dict()
- # 索引的三元组列表
- triple_list_data = openie_data.extract_triple_dict()
- # print(openie_data.docs)
- if len(raw_paragraphs) != len(entity_list_data) or len(raw_paragraphs) != len(triple_list_data):
- logger.error("OpenIE数据存在异常")
- logger.error(f"原始段落数量:{len(raw_paragraphs)}")
- logger.error(f"实体列表数量:{len(entity_list_data)}")
- logger.error(f"三元组列表数量:{len(triple_list_data)}")
- logger.error("OpenIE数据段落数量与实体列表数量或三元组列表数量不一致")
- logger.error("请保证你的原始数据分段良好,不要有类似于 “.....” 单独成一段的情况")
- logger.error("或者一段中只有符号的情况")
- # 新增:检查docs中每条数据的完整性
- logger.error("系统将于2秒后开始检查数据完整性")
- sleep(2)
- found_missing = False
- missing_idxs = []
- for doc in getattr(openie_data, "docs", []):
- idx = doc.get("idx", "<无idx>")
- passage = doc.get("passage", "<无passage>")
- missing = []
- # 检查字段是否存在且非空
- if "passage" not in doc or not doc.get("passage"):
- missing.append("passage")
- if "extracted_entities" not in doc or not isinstance(doc.get("extracted_entities"), list):
- missing.append("名词列表缺失")
- elif len(doc.get("extracted_entities", [])) == 0:
- missing.append("名词列表为空")
- if "extracted_triples" not in doc or not isinstance(doc.get("extracted_triples"), list):
- missing.append("主谓宾三元组缺失")
- elif len(doc.get("extracted_triples", [])) == 0:
- missing.append("主谓宾三元组为空")
- # 输出所有doc的idx
- # print(f"检查: idx={idx}")
- if missing:
- found_missing = True
- missing_idxs.append(idx)
- logger.error("\n")
- logger.error("数据缺失:")
- logger.error(f"对应哈希值:{idx}")
- logger.error(f"对应文段内容内容:{passage}")
- logger.error(f"非法原因:{', '.join(missing)}")
- # 确保提示在所有非法数据输出后再输出
- if not found_missing:
- logger.info("所有数据均完整,没有发现缺失字段。")
- return False
- # 新增:提示用户是否删除非法文段继续导入
- # 在非交互模式下,不再询问用户,而是直接报错终止
- logger.info(f"\n检测到非法文段,共{len(missing_idxs)}条。")
- if non_interactive:
- logger.error("检测到非法文段且当前处于非交互模式,无法询问是否删除非法文段,导入终止。")
- sys.exit(1)
- logger.info("\n是否删除所有非法文段后继续导入?(y/n): ", end="")
- user_choice = input().strip().lower()
- if user_choice != "y":
- logger.info("用户选择不删除非法文段,程序终止。")
- sys.exit(1)
- # 删除非法文段
- logger.info("正在删除非法文段并继续导入...")
- # 过滤掉非法文段
- openie_data.docs = [
- doc for doc in getattr(openie_data, "docs", []) if doc.get("idx", "<无idx>") not in missing_idxs
- ]
- # 重新提取数据
- raw_paragraphs = openie_data.extract_raw_paragraph_dict()
- entity_list_data = openie_data.extract_entity_dict()
- triple_list_data = openie_data.extract_triple_dict()
- # 再次校验
- if len(raw_paragraphs) != len(entity_list_data) or len(raw_paragraphs) != len(triple_list_data):
- logger.error("删除非法文段后,数据仍不一致,程序终止。")
- sys.exit(1)
- # 将索引换为对应段落的hash值
- logger.info("正在进行段落去重与重索引")
- raw_paragraphs, triple_list_data = hash_deduplicate(
- raw_paragraphs,
- triple_list_data,
- embed_manager.stored_pg_hashes,
- kg_manager.stored_paragraph_hashes,
- )
- if len(raw_paragraphs) != 0:
- # 获取嵌入并保存
- logger.info(f"段落去重完成,剩余待处理的段落数量:{len(raw_paragraphs)}")
- logger.info("开始Embedding")
- embed_manager.store_new_data_set(raw_paragraphs, triple_list_data)
- # Embedding-Faiss重索引
- logger.info("正在重新构建向量索引")
- embed_manager.rebuild_faiss_index()
- logger.info("向量索引构建完成")
- embed_manager.save_to_file()
- logger.info("Embedding完成")
- # 构建新段落的RAG
- logger.info("开始构建RAG")
- kg_manager.build_kg(triple_list_data, embed_manager)
- kg_manager.save_to_file()
- logger.info("RAG构建完成")
- else:
- logger.info("无新段落需要处理")
- return True
-
-
-async def main_async(non_interactive: bool = False) -> bool: # sourcery skip: dict-comprehension
- # 新增确认提示
- if non_interactive:
- logger.warning("当前处于非交互模式,将跳过导入开销确认提示,直接开始执行 OpenIE 导入。")
- else:
- print("=== 重要操作确认 ===")
- print("OpenIE导入时会大量发送请求,可能会撞到请求速度上限,请注意选用的模型")
- print("同之前样例:在本地模型下,在70分钟内我们发送了约8万条请求,在网络允许下,速度会更快")
- print("推荐使用硅基流动的Pro/BAAI/bge-m3")
- print("每百万Token费用为0.7元")
- print("知识导入时,会消耗大量系统资源,建议在较好配置电脑上运行")
- print("同上样例,导入时10700K几乎跑满,14900HX占用80%,峰值内存占用约3G")
- confirm = input("确认继续执行?(y/n): ").strip().lower()
- if confirm != "y":
- logger.info("用户取消操作")
- print("操作已取消")
- sys.exit(1)
- print("\n" + "=" * 40 + "\n")
- ensure_openie_dir() # 确保OpenIE目录存在
- logger.info("----开始导入openie数据----\n")
-
- logger.info("创建LLM客户端")
-
- # 初始化Embedding库
- embed_manager = EmbeddingManager()
- logger.info("正在从文件加载Embedding库")
- try:
- embed_manager.load_from_file()
- except Exception as e:
- logger.error(f"从文件加载Embedding库时发生错误:{e}")
- if "嵌入模型与本地存储不一致" in str(e):
- logger.error("检测到嵌入模型与本地存储不一致,已终止导入。请检查模型设置或清空嵌入库后重试。")
- logger.error("请保证你的嵌入模型从未更改,并且在导入时使用相同的模型")
- # print("检测到嵌入模型与本地存储不一致,已终止导入。请检查模型设置或清空嵌入库后重试。")
- sys.exit(1)
- if "不存在" in str(e):
- logger.error("如果你是第一次导入知识,请忽略此错误")
- logger.info("Embedding库加载完成")
- # 初始化KG
- kg_manager = KGManager()
- logger.info("正在从文件加载KG")
- try:
- kg_manager.load_from_file()
- except Exception as e:
- logger.error(f"从文件加载KG时发生错误:{e}")
- logger.error("如果你是第一次导入知识,请忽略此错误")
- logger.info("KG加载完成")
-
- logger.info(f"KG节点数量:{len(kg_manager.graph.get_node_list())}")
- logger.info(f"KG边数量:{len(kg_manager.graph.get_edge_list())}")
-
- # 数据比对:Embedding库与KG的段落hash集合
- for pg_hash in kg_manager.stored_paragraph_hashes:
- # 使用与EmbeddingStore中一致的命名空间格式:namespace-hash
- key = f"paragraph-{pg_hash}"
- if key not in embed_manager.stored_pg_hashes:
- logger.warning(f"KG中存在Embedding库中不存在的段落:{key}")
-
- logger.info("正在导入OpenIE数据文件")
- try:
- openie_data = OpenIE.load()
- except Exception as e:
- logger.error(f"导入OpenIE数据文件时发生错误:{e}")
- return False
- if handle_import_openie(openie_data, embed_manager, kg_manager, non_interactive=non_interactive) is False:
- logger.error("处理OpenIE数据时发生错误")
- return False
- return True
-
-
-def main(argv: Optional[list[str]] = None) -> None:
- """主函数 - 解析参数并运行异步主流程。"""
- parser = argparse.ArgumentParser(
- description=("OpenIE 导入脚本:读取 data/openie 中的 OpenIE JSON 批次,将其导入到 LPMM 的向量库与知识图中。")
- )
- parser.add_argument(
- "--non-interactive",
- action="store_true",
- help="非交互模式:跳过导入确认提示以及非法文段删除询问,遇到非法文段时直接报错退出。",
- )
- args = parser.parse_args(argv)
-
- # 检查是否有现有的事件循环
- try:
- loop = asyncio.get_running_loop()
- if loop.is_closed():
- # 如果事件循环已关闭,创建新的
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- except RuntimeError:
- # 没有运行的事件循环,创建新的
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
-
- ok: bool = False
- try:
- # 在新的事件循环中运行异步主函数
- ok = loop.run_until_complete(main_async(non_interactive=args.non_interactive))
- print(
- "\n[NOTICE] OpenIE 导入脚本执行完毕。如主程序(聊天 / WebUI)已在运行,"
- "请重启主程序,或在主程序内部调用一次 lpmm_start_up() 以应用最新 LPMM 知识库。"
- )
- print("[NOTICE] 如果不清楚 lpmm_start_up 是什么,直接重启主程序即可。")
- finally:
- # 确保事件循环被正确关闭
- if not loop.is_closed():
- loop.close()
- if not ok:
- # 统一错误码,方便在非交互场景下检测失败
- sys.exit(1)
-
-
-if __name__ == "__main__":
- # logger.info(f"111111111111111111111111{ROOT_PATH}")
- main()
diff --git a/scripts/info_extraction.py b/scripts/info_extraction.py
deleted file mode 100644
index ab9d295b..00000000
--- a/scripts/info_extraction.py
+++ /dev/null
@@ -1,248 +0,0 @@
-import argparse
-import json
-import os
-import signal
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from threading import Lock, Event
-import sys
-import datetime
-from typing import Optional
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-# 添加项目根目录到 sys.path
-
-from rich.progress import Progress # 替换为 rich 进度条
-
-from src.common.logger import get_logger
-
-# from src.chat.knowledge.lpmmconfig import global_config
-from src.chat.knowledge.ie_process import info_extract_from_str
-from src.chat.knowledge.open_ie import OpenIE
-from rich.progress import (
- BarColumn,
- TimeElapsedColumn,
- TimeRemainingColumn,
- TaskProgressColumn,
- MofNCompleteColumn,
- SpinnerColumn,
- TextColumn,
-)
-from raw_data_preprocessor import RAW_DATA_PATH, load_raw_data
-from src.config.config import global_config, model_config
-from src.llm_models.utils_model import LLMRequest
-
-logger = get_logger("LPMM知识库-信息提取")
-
-
-ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-TEMP_DIR = os.path.join(ROOT_PATH, "temp")
-# IMPORTED_DATA_PATH = os.path.join(ROOT_PATH, "data", "imported_lpmm_data")
-OPENIE_OUTPUT_DIR = os.path.join(ROOT_PATH, "data", "openie")
-
-
-def ensure_dirs():
- """确保临时目录和输出目录存在"""
- if not os.path.exists(TEMP_DIR):
- os.makedirs(TEMP_DIR)
- logger.info(f"已创建临时目录: {TEMP_DIR}")
- if not os.path.exists(OPENIE_OUTPUT_DIR):
- os.makedirs(OPENIE_OUTPUT_DIR)
- logger.info(f"已创建输出目录: {OPENIE_OUTPUT_DIR}")
- if not os.path.exists(RAW_DATA_PATH):
- os.makedirs(RAW_DATA_PATH)
- logger.info(f"已创建原始数据目录: {RAW_DATA_PATH}")
-
-
-# 创建一个线程安全的锁,用于保护文件操作和共享数据
-file_lock = Lock()
-open_ie_doc_lock = Lock()
-
-# 创建一个事件标志,用于控制程序终止
-shutdown_event = Event()
-
-lpmm_entity_extract_llm = LLMRequest(
- model_set=model_config.model_task_config.lpmm_entity_extract, request_type="lpmm.entity_extract"
-)
-lpmm_rdf_build_llm = LLMRequest(model_set=model_config.model_task_config.lpmm_rdf_build, request_type="lpmm.rdf_build")
-
-
-def process_single_text(pg_hash, raw_data):
- """处理单个文本的函数,用于线程池"""
- temp_file_path = f"{TEMP_DIR}/{pg_hash}.json"
-
- # 使用文件锁检查和读取缓存文件
- with file_lock:
- if os.path.exists(temp_file_path):
- try:
- # 存在对应的提取结果
- logger.info(f"找到缓存的提取结果:{pg_hash}")
- with open(temp_file_path, "r", encoding="utf-8") as f:
- return json.load(f), None
- except json.JSONDecodeError:
- # 如果JSON文件损坏,删除它并重新处理
- logger.warning(f"缓存文件损坏,重新处理:{pg_hash}")
- os.remove(temp_file_path)
-
- entity_list, rdf_triple_list = info_extract_from_str(
- lpmm_entity_extract_llm,
- lpmm_rdf_build_llm,
- raw_data,
- )
- if entity_list is None or rdf_triple_list is None:
- return None, pg_hash
- doc_item = {
- "idx": pg_hash,
- "passage": raw_data,
- "extracted_entities": entity_list,
- "extracted_triples": rdf_triple_list,
- }
- # 保存临时提取结果
- with file_lock:
- try:
- with open(temp_file_path, "w", encoding="utf-8") as f:
- json.dump(doc_item, f, ensure_ascii=False, indent=4)
- except Exception as e:
- logger.error(f"保存缓存文件失败:{pg_hash}, 错误:{e}")
- # 如果保存失败,确保不会留下损坏的文件
- if os.path.exists(temp_file_path):
- os.remove(temp_file_path)
- sys.exit(0)
- return None, pg_hash
- return doc_item, None
-
-
-def signal_handler(_signum, _frame):
- """处理Ctrl+C信号"""
- logger.info("\n接收到中断信号,正在优雅地关闭程序...")
- sys.exit(0)
-
-
-def _run(non_interactive: bool = False) -> None: # sourcery skip: comprehension-to-generator, extract-method
- # 设置信号处理器
- signal.signal(signal.SIGINT, signal_handler)
- ensure_dirs() # 确保目录存在
- # 新增用户确认提示
- if non_interactive:
- logger.warning("当前处于非交互模式,将跳过费用与时长确认提示,直接开始进行实体提取操作。")
- else:
- print("=== 重要操作确认,请认真阅读以下内容哦 ===")
- print("实体提取操作将会花费较多api余额和时间,建议在空闲时段执行。")
- print("举例:600万字全剧情,提取选用deepseek v3 0324,消耗约40元,约3小时。")
- print("建议使用硅基流动的非Pro模型")
- print("或者使用可以用赠金抵扣的Pro模型")
- print("请确保账户余额充足,并且在执行前确认无误。")
- confirm = input("确认继续执行?(y/n): ").strip().lower()
- if confirm != "y":
- logger.info("用户取消操作")
- print("操作已取消")
- sys.exit(1)
-
- # 友好提示:说明“网络错误(可重试)”日志属于正常自动重试行为,避免用户误以为任务失败
- print(
- "\n提示:在提取过程中,如果看到模型出现“网络错误(可重试)”等日志,"
- "表示系统正在自动重试请求,一般不会影响整体导入结果,请耐心等待即可。\n"
- )
-
- print("\n" + "=" * 40 + "\n")
- ensure_dirs() # 确保目录存在
- logger.info("--------进行信息提取--------\n")
-
- # 加载原始数据
- logger.info("正在加载原始数据")
- all_sha256_list, all_raw_datas = load_raw_data()
-
- failed_sha256 = []
- open_ie_doc = []
-
- workers = global_config.lpmm_knowledge.info_extraction_workers
- with ThreadPoolExecutor(max_workers=workers) as executor:
- future_to_hash = {
- executor.submit(process_single_text, pg_hash, raw_data): pg_hash
- for pg_hash, raw_data in zip(all_sha256_list, all_raw_datas, strict=False)
- }
-
- with Progress(
- SpinnerColumn(),
- TextColumn("[progress.description]{task.description}"),
- BarColumn(),
- TaskProgressColumn(),
- MofNCompleteColumn(),
- "•",
- TimeElapsedColumn(),
- "<",
- TimeRemainingColumn(),
- transient=False,
- ) as progress:
- task = progress.add_task("正在进行提取:", total=len(future_to_hash))
- try:
- for future in as_completed(future_to_hash):
- if shutdown_event.is_set():
- for f in future_to_hash:
- if not f.done():
- f.cancel()
- break
-
- doc_item, failed_hash = future.result()
- if failed_hash:
- failed_sha256.append(failed_hash)
- logger.error(f"提取失败:{failed_hash}")
- elif doc_item:
- with open_ie_doc_lock:
- open_ie_doc.append(doc_item)
- logger.info(f'已处理"{doc_item.get("passage", "")}"')
- progress.update(task, advance=1)
- except KeyboardInterrupt:
- logger.info("\n接收到中断信号,正在优雅地关闭程序...")
- shutdown_event.set()
- for f in future_to_hash:
- if not f.done():
- f.cancel()
-
- # 合并所有文件的提取结果并保存
- if open_ie_doc:
- sum_phrase_chars = sum([len(e) for chunk in open_ie_doc for e in chunk["extracted_entities"]])
- sum_phrase_words = sum([len(e.split()) for chunk in open_ie_doc for e in chunk["extracted_entities"]])
- num_phrases = sum([len(chunk["extracted_entities"]) for chunk in open_ie_doc])
- openie_obj = OpenIE(
- open_ie_doc,
- round(sum_phrase_chars / num_phrases, 4) if num_phrases else 0,
- round(sum_phrase_words / num_phrases, 4) if num_phrases else 0,
- )
- # 输出文件名格式:MM-DD-HH-ss-openie.json
- now = datetime.datetime.now()
- filename = now.strftime("%m-%d-%H-%S-openie.json")
- output_path = os.path.join(OPENIE_OUTPUT_DIR, filename)
- with open(output_path, "w", encoding="utf-8") as f:
- json.dump(
- openie_obj.to_dict() if hasattr(openie_obj, "to_dict") else openie_obj.__dict__,
- f,
- ensure_ascii=False,
- indent=4,
- )
- logger.info(f"信息提取结果已保存到: {output_path}")
- else:
- logger.warning("没有可保存的信息提取结果")
-
- logger.info("--------信息提取完成--------")
- logger.info(f"提取失败的文段SHA256:{failed_sha256}")
-
-
-def main(argv: Optional[list[str]] = None) -> None:
- parser = argparse.ArgumentParser(
- description=(
- "LPMM 信息提取脚本:从 data/lpmm_raw_data/*.txt 中读取原始段落,"
- "调用 LLM 提取实体和三元组,并生成 OpenIE JSON 批次文件。"
- )
- )
- parser.add_argument(
- "--non-interactive",
- action="store_true",
- help="非交互模式:跳过费用确认提示,直接开始执行;适用于 CI / 定时任务等场景。",
- )
- args = parser.parse_args(argv)
-
- _run(non_interactive=args.non_interactive)
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/inspect_lpmm_batch.py b/scripts/inspect_lpmm_batch.py
deleted file mode 100644
index 2ed719cf..00000000
--- a/scripts/inspect_lpmm_batch.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import argparse
-import json
-import os
-import sys
-from pathlib import Path
-from typing import List, Tuple
-
-# 确保能导入 src.*
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from src.chat.knowledge.utils.hash import get_sha256
-from src.chat.knowledge.embedding_store import EmbeddingManager
-from src.chat.knowledge.kg_manager import KGManager
-from src.common.logger import get_logger
-
-logger = get_logger("inspect_lpmm_batch")
-
-
-def load_openie_hashes(path: Path) -> Tuple[List[str], List[str], List[str]]:
- """从 OpenIE JSON 中提取段落 / 实体 / 关系的哈希
-
- 注意:实体既包括 extracted_entities 中的条目,也包括三元组中的主语/宾语,
- 以与 KG 构图逻辑保持一致。
- """
- with path.open("r", encoding="utf-8") as f:
- data = json.load(f)
-
- pg_hashes: List[str] = []
- ent_hashes: List[str] = []
- rel_hashes: List[str] = []
-
- for doc in data.get("docs", []):
- if not isinstance(doc, dict):
- continue
- idx = doc.get("idx")
- if isinstance(idx, str) and idx.strip():
- pg_hashes.append(idx.strip())
-
- ents = doc.get("extracted_entities", [])
- if isinstance(ents, list):
- for e in ents:
- if isinstance(e, str):
- ent_hashes.append(get_sha256(e))
-
- triples = doc.get("extracted_triples", [])
- if isinstance(triples, list):
- for t in triples:
- if isinstance(t, list) and len(t) == 3:
- # 主语/宾语作为实体参与构图
- subj, _, obj = t
- if isinstance(subj, str):
- ent_hashes.append(get_sha256(subj))
- if isinstance(obj, str):
- ent_hashes.append(get_sha256(obj))
- rel_hashes.append(get_sha256(str(tuple(t))))
-
- # 去重但保留顺序
- def unique(seq: List[str]) -> List[str]:
- seen = set()
- return [x for x in seq if not (x in seen or seen.add(x))]
-
- return unique(pg_hashes), unique(ent_hashes), unique(rel_hashes)
-
-
-def main() -> None:
- parser = argparse.ArgumentParser(
- description="检查指定 OpenIE 文件对应批次在当前向量库与 KG 中的存在情况(用于验证删除效果)。"
- )
- parser.add_argument("--openie-file", required=True, help="OpenIE 输出 JSON 文件路径")
- args = parser.parse_args()
-
- openie_path = Path(args.openie_file)
- if not openie_path.exists():
- logger.error(f"OpenIE 文件不存在: {openie_path}")
- sys.exit(1)
-
- pg_hashes, ent_hashes, rel_hashes = load_openie_hashes(openie_path)
- logger.info(
- f"从 {openie_path.name} 解析到 段落 {len(pg_hashes)} 条,实体 {len(ent_hashes)} 个,关系 {len(rel_hashes)} 条"
- )
-
- # 加载当前嵌入与 KG
- em = EmbeddingManager()
- kg = KGManager()
- try:
- em.load_from_file()
- kg.load_from_file()
- except Exception as e:
- logger.error(f"加载当前知识库失败: {e}")
- sys.exit(1)
-
- graph_nodes = set(kg.graph.get_node_list())
-
- # 检查段落
- pg_keys = [f"paragraph-{h}" for h in pg_hashes]
- pg_in_vec = sum(1 for k in pg_keys if k in em.paragraphs_embedding_store.store)
- pg_in_kg = sum(1 for k in pg_keys if k in graph_nodes)
-
- # 检查实体
- ent_keys = [f"entity-{h}" for h in ent_hashes]
- ent_in_vec = sum(1 for k in ent_keys if k in em.entities_embedding_store.store)
- ent_in_kg = sum(1 for k in ent_keys if k in graph_nodes)
-
- # 检查关系(只针对向量库)
- rel_keys = [f"relation-{h}" for h in rel_hashes]
- rel_in_vec = sum(1 for k in rel_keys if k in em.relation_embedding_store.store)
-
- print("==== 批次存在情况(删除前/后对比用) ====")
- print(f"段落: 总计 {len(pg_keys)}, 向量库剩余 {pg_in_vec}, KG 中剩余 {pg_in_kg}")
- print(f"实体: 总计 {len(ent_keys)}, 向量库剩余 {ent_in_vec}, KG 中剩余 {ent_in_kg}")
- print(f"关系: 总计 {len(rel_keys)}, 向量库剩余 {rel_in_vec}")
-
- # 打印少量仍存在的样例,便于检查内容是否正常
- sample_pg = [k for k in pg_keys if k in graph_nodes][:3]
- if sample_pg:
- print("\n仍在 KG 中的段落节点示例:")
- for k in sample_pg:
- nd = kg.graph[k]
- content = nd["content"] if "content" in nd else k
- print(f"- {k}: {content[:80]}")
-
- sample_ent = [k for k in ent_keys if k in graph_nodes][:3]
- if sample_ent:
- print("\n仍在 KG 中的实体节点示例:")
- for k in sample_ent:
- nd = kg.graph[k]
- content = nd["content"] if "content" in nd else k
- print(f"- {k}: {content[:80]}")
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/inspect_lpmm_global.py b/scripts/inspect_lpmm_global.py
deleted file mode 100644
index eb53259f..00000000
--- a/scripts/inspect_lpmm_global.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import os
-import sys
-
-# 保证可以导入 src.*
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from src.chat.knowledge.embedding_store import EmbeddingManager
-from src.chat.knowledge.kg_manager import KGManager
-from src.common.logger import get_logger
-
-logger = get_logger("inspect_lpmm_global")
-
-
-def main() -> None:
- """检查当前整库(所有批次)的向量与 KG 状态,用于观察删除对剩余数据的影响。"""
- em = EmbeddingManager()
- kg = KGManager()
-
- try:
- em.load_from_file()
- kg.load_from_file()
- except Exception as e:
- logger.error(f"加载当前知识库失败: {e}")
- sys.exit(1)
-
- # 向量库统计
- para_cnt = len(em.paragraphs_embedding_store.store)
- ent_cnt_vec = len(em.entities_embedding_store.store)
- rel_cnt_vec = len(em.relation_embedding_store.store)
-
- # KG 统计
- nodes = kg.graph.get_node_list()
- edges = kg.graph.get_edge_list()
-
- para_nodes = [n for n in nodes if n.startswith("paragraph-")]
- ent_nodes = [n for n in nodes if n.startswith("entity-")]
-
- print("==== 向量库统计 ====")
- print(f"段落向量条数: {para_cnt}")
- print(f"实体向量条数: {ent_cnt_vec}")
- print(f"关系向量条数: {rel_cnt_vec}")
-
- print("\n==== KG 图统计 ====")
- print(f"节点总数: {len(nodes)}")
- print(f"边总数: {len(edges)}")
- print(f"段落节点数: {len(para_nodes)}")
- print(f"实体节点数: {len(ent_nodes)}")
-
- # ent_appear_cnt 状态
- ent_cnt_meta = len(kg.ent_appear_cnt)
- print(f"\n实体计数表条目数: {ent_cnt_meta}")
-
- # 抽样查看剩余段落/实体内容
- print("\n==== 剩余段落示例(最多 3 条) ====")
- for nid in para_nodes[:3]:
- nd = kg.graph[nid]
- content = nd["content"] if "content" in nd else nid
- print(f"- {nid}: {content[:80]}")
-
- print("\n==== 剩余实体示例(最多 5 条) ====")
- for nid in ent_nodes[:5]:
- nd = kg.graph[nid]
- content = nd["content"] if "content" in nd else nid
- print(f"- {nid}: {content[:80]}")
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/lpmm_interactive_manager.py b/scripts/lpmm_interactive_manager.py
deleted file mode 100644
index 1dac5a3e..00000000
--- a/scripts/lpmm_interactive_manager.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import asyncio
-import os
-import sys
-
-# 尽量统一控制台编码为 utf-8,避免中文输出报错
-try:
- if hasattr(sys.stdout, "reconfigure"):
- sys.stdout.reconfigure(encoding="utf-8")
- if hasattr(sys.stderr, "reconfigure"):
- sys.stderr.reconfigure(encoding="utf-8")
-except Exception:
- pass
-
-# 确保项目根目录在 sys.path 中,以便导入 src.*
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-if PROJECT_ROOT not in sys.path:
- sys.path.append(PROJECT_ROOT)
-
-try:
- # 显式从 src.chat.knowledge.lpmm_ops 导入单例对象
- from src.chat.knowledge.lpmm_ops import lpmm_ops
- from src.common.logger import get_logger
- from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
- from src.chat.knowledge import lpmm_start_up
- from src.config.config import global_config
-except ImportError as e:
- print(f"导入失败,请确保在项目根目录下运行脚本: {e}")
- sys.exit(1)
-
-logger = get_logger("lpmm_interactive_manager")
-
-
-async def interactive_add():
- """交互式导入知识"""
- print("\n" + "=" * 40)
- print(" --- 📥 导入知识 (Add) ---")
- print("=" * 40)
- print("说明:请输入要导入的文本内容。")
- print(" - 支持多段落,段落间请保留空行。")
- print(" - 输入完成后,在新起的一行输入 'EOF' 并回车结束输入。")
- print("-" * 40)
-
- lines = []
- while True:
- try:
- line = input()
- if line.strip().upper() == "EOF":
- break
- lines.append(line)
- except EOFError:
- break
-
- text = "\n".join(lines).strip()
- if not text:
- print("\n[!] 内容为空,操作已取消。")
- return
-
- print("\n[进度] 正在调用 LPMM 接口进行信息抽取与向量化,请稍候...")
- try:
- # 使用 lpmm_ops.py 中的接口
- result = await lpmm_ops.add_content(text)
-
- if result["status"] == "success":
- print(f"\n[√] 成功:{result['message']}")
- print(f" 实际新增段落数: {result.get('count', 0)}")
- else:
- print(f"\n[×] 失败:{result['message']}")
- except Exception as e:
- print(f"\n[×] 发生异常: {e}")
- logger.error(f"add_content 异常: {e}", exc_info=True)
-
-
-async def interactive_delete():
- """交互式删除知识"""
- print("\n" + "=" * 40)
- print(" --- 🗑️ 删除知识 (Delete) ---")
- print("=" * 40)
- print("删除模式:")
- print(" 1. 关键词模糊匹配(删除包含关键词的所有段落)")
- print(" 2. 完整文段匹配(删除完全匹配的段落)")
- print("-" * 40)
-
- mode = input("请选择删除模式 (1/2): ").strip()
- exact_match = False
-
- if mode == "2":
- exact_match = True
- print("\n[完整文段匹配模式]")
- print("说明:请输入要删除的完整文段内容(必须完全一致)。")
- print(" - 支持多行输入,输入完成后在新起的一行输入 'EOF' 并回车。")
- print("-" * 40)
- lines = []
- while True:
- try:
- line = input()
- if line.strip().upper() == "EOF":
- break
- lines.append(line)
- except EOFError:
- break
- keyword = "\n".join(lines).strip()
- else:
- if mode != "1":
- print("\n[!] 无效选择,默认使用关键词模糊匹配模式。")
- print("\n[关键词模糊匹配模式]")
- keyword = input("请输入匹配关键词: ").strip()
-
- if not keyword:
- print("\n[!] 输入为空,操作已取消。")
- return
-
- print("-" * 40)
- confirm = (
- input(f"危险确认:确定要删除所有匹配 '{keyword[:50]}{'...' if len(keyword) > 50 else ''}' 的知识吗?(y/N): ")
- .strip()
- .lower()
- )
- if confirm != "y":
- print("\n[!] 已取消删除操作。")
- return
-
- print("\n[进度] 正在执行删除并更新索引...")
- try:
- # 使用 lpmm_ops.py 中的接口
- result = await lpmm_ops.delete(keyword, exact_match=exact_match)
-
- if result["status"] == "success":
- print(f"\n[√] 成功:{result['message']}")
- print(f" 删除条数: {result.get('deleted_count', 0)}")
- elif result["status"] == "info":
- print(f"\n[i] 提示:{result['message']}")
- else:
- print(f"\n[×] 失败:{result['message']}")
- except Exception as e:
- print(f"\n[×] 发生异常: {e}")
- logger.error(f"delete 异常: {e}", exc_info=True)
-
-
-async def interactive_clear():
- """交互式清空知识库"""
- print("\n" + "=" * 40)
- print(" --- ⚠️ 清空知识库 (Clear All) ---")
- print("=" * 40)
- print("警告:此操作将删除LPMM知识库中的所有内容!")
- print(" - 所有段落向量")
- print(" - 所有实体向量")
- print(" - 所有关系向量")
- print(" - 整个知识图谱")
- print(" - 此操作不可恢复!")
- print("-" * 40)
-
- # 双重确认
- confirm1 = input("⚠️ 第一次确认:确定要清空整个知识库吗?(输入 'YES' 继续): ").strip()
- if confirm1 != "YES":
- print("\n[!] 已取消清空操作。")
- return
-
- print("\n" + "=" * 40)
- confirm2 = input("⚠️ 第二次确认:此操作不可恢复,请再次输入 'CLEAR' 确认: ").strip()
- if confirm2 != "CLEAR":
- print("\n[!] 已取消清空操作。")
- return
-
- print("\n[进度] 正在清空知识库...")
- try:
- # 使用 lpmm_ops.py 中的接口
- result = await lpmm_ops.clear_all()
-
- if result["status"] == "success":
- print(f"\n[√] 成功:{result['message']}")
- stats = result.get("stats", {})
- before = stats.get("before", {})
- after = stats.get("after", {})
- print("\n[统计信息]")
- print(
- f" 清空前: 段落={before.get('paragraphs', 0)}, 实体={before.get('entities', 0)}, "
- f"关系={before.get('relations', 0)}, KG节点={before.get('kg_nodes', 0)}, KG边={before.get('kg_edges', 0)}"
- )
- print(
- f" 清空后: 段落={after.get('paragraphs', 0)}, 实体={after.get('entities', 0)}, "
- f"关系={after.get('relations', 0)}, KG节点={after.get('kg_nodes', 0)}, KG边={after.get('kg_edges', 0)}"
- )
- else:
- print(f"\n[×] 失败:{result['message']}")
- except Exception as e:
- print(f"\n[×] 发生异常: {e}")
- logger.error(f"clear_all 异常: {e}", exc_info=True)
-
-
-async def interactive_search():
- """交互式查询知识"""
- print("\n" + "=" * 40)
- print(" --- 🔍 查询知识 (Search) ---")
- print("=" * 40)
- print("说明:输入查询问题或关键词,系统会返回相关的知识段落。")
- print("-" * 40)
-
- # 确保 LPMM 已初始化
- if not global_config.lpmm_knowledge.enable:
- print("\n[!] 警告:LPMM 知识库在配置中未启用。")
- return
-
- try:
- lpmm_start_up()
- except Exception as e:
- print(f"\n[!] LPMM 初始化失败: {e}")
- logger.error(f"LPMM 初始化失败: {e}", exc_info=True)
- return
-
- query = input("请输入查询问题或关键词: ").strip()
-
- if not query:
- print("\n[!] 查询内容为空,操作已取消。")
- return
-
- # 询问返回条数
- print("-" * 40)
- limit_str = input("希望返回的相关知识条数(默认3,直接回车使用默认值): ").strip()
- try:
- limit = int(limit_str) if limit_str else 3
- limit = max(1, min(limit, 20)) # 限制在1-20之间
- except ValueError:
- limit = 3
- print("[!] 输入无效,使用默认值 3。")
-
- print("\n[进度] 正在查询知识库...")
- try:
- result = await query_lpmm_knowledge(query, limit=limit)
-
- print("\n" + "=" * 60)
- print("[查询结果]")
- print("=" * 60)
- print(result)
- print("=" * 60)
- except Exception as e:
- print(f"\n[×] 查询失败: {e}")
- logger.error(f"查询异常: {e}", exc_info=True)
-
-
-async def main():
- """主循环"""
- while True:
- print("\n" + "╔" + "═" * 38 + "╗")
- print("║ LPMM 知识库交互管理工具 ║")
- print("╠" + "═" * 38 + "╣")
- print("║ 1. 导入知识 (Add Content) ║")
- print("║ 2. 删除知识 (Delete Content) ║")
- print("║ 3. 查询知识 (Search Content) ║")
- print("║ 4. 清空知识库 (Clear All) ⚠️ ║")
- print("║ 0. 退出 (Exit) ║")
- print("╚" + "═" * 38 + "╝")
-
- choice = input("请选择操作编号: ").strip()
-
- if choice == "1":
- await interactive_add()
- elif choice == "2":
- await interactive_delete()
- elif choice == "3":
- await interactive_search()
- elif choice == "4":
- await interactive_clear()
- elif choice in ("0", "q", "Q", "quit", "exit"):
- print("\n已退出工具。")
- break
- else:
- print("\n[!] 无效的选择,请输入 0, 1, 2, 3 或 4。")
-
-
-if __name__ == "__main__":
- try:
- # 运行主循环
- asyncio.run(main())
- except KeyboardInterrupt:
- print("\n\n[!] 用户中断程序 (Ctrl+C)。")
- except Exception as e:
- print(f"\n[!] 程序运行出错: {e}")
- logger.error(f"Main loop 异常: {e}", exc_info=True)
diff --git a/scripts/lpmm_manager.py b/scripts/lpmm_manager.py
deleted file mode 100644
index 868d4b14..00000000
--- a/scripts/lpmm_manager.py
+++ /dev/null
@@ -1,512 +0,0 @@
-import argparse
-import os
-import re
-import sys
-from datetime import datetime
-from pathlib import Path
-from typing import Optional, List
-
-# 尽量统一控制台编码为 utf-8,避免中文输出报错
-try:
- if hasattr(sys.stdout, "reconfigure"):
- sys.stdout.reconfigure(encoding="utf-8")
- if hasattr(sys.stderr, "reconfigure"):
- sys.stderr.reconfigure(encoding="utf-8")
-except Exception:
- pass
-
-# 确保能导入 src.* 以及同目录脚本
-CURRENT_DIR = os.path.dirname(__file__)
-PROJECT_ROOT = os.path.abspath(os.path.join(CURRENT_DIR, ".."))
-if PROJECT_ROOT not in sys.path:
- sys.path.append(PROJECT_ROOT)
-
-from src.common.logger import get_logger # type: ignore # noqa: E402
-from src.config.config import global_config, model_config # type: ignore # noqa: E402
-
-# 引入各功能脚本的入口函数
-from import_openie import main as import_openie_main # type: ignore # noqa: E402
-from info_extraction import main as info_extraction_main # type: ignore # noqa: E402
-from delete_lpmm_items import main as delete_lpmm_items_main # type: ignore # noqa: E402
-from inspect_lpmm_batch import main as inspect_lpmm_batch_main # type: ignore # noqa: E402
-from inspect_lpmm_global import main as inspect_lpmm_global_main # type: ignore # noqa: E402
-from refresh_lpmm_knowledge import main as refresh_lpmm_knowledge_main # type: ignore # noqa: E402
-from test_lpmm_retrieval import main as test_lpmm_retrieval_main # type: ignore # noqa: E402
-from raw_data_preprocessor import load_raw_data # type: ignore # noqa: E402
-
-
-logger = get_logger("lpmm_manager")
-
-
-ACTION_INFO = {
- "prepare_raw": "预处理 data/lpmm_raw_data/*.txt,按空行切分为段落并做去重统计",
- "info_extract": "原始 txt -> OpenIE 信息抽取(调用 info_extraction.py)",
- "import_openie": "导入 OpenIE 批次到向量库与知识图(调用 import_openie.py)",
- "delete": "删除/回滚知识(调用 delete_lpmm_items.py)",
- "batch_inspect": "检查指定 OpenIE 批次在当前库中的存在情况(调用 inspect_lpmm_batch.py)",
- "global_inspect": "查看当前整库向量与 KG 状态(调用 inspect_lpmm_global.py)",
- "refresh": "刷新 LPMM 磁盘数据到内存(调用 refresh_lpmm_knowledge.py)",
- "test": "运行 LPMM 检索效果回归测试(调用 test_lpmm_retrieval.py)",
- "embedding_helper": "嵌入模型迁移辅助:查看当前嵌入模型/维度并归档 embedding_model_test.json",
- "full_import": "一键执行:信息抽取 -> 导入 OpenIE -> 刷新",
-}
-
-
-def _with_overridden_argv(extra_args: List[str], target_main) -> None:
- """在不修改子脚本的前提下,临时覆盖 sys.argv 以透传参数。"""
- old_argv = list(sys.argv)
- try:
- # 第 0 个元素为“程序名”,后续元素为实际参数
- # 这里不再插入类似 delete_lpmm_items.py 的占位,避免被 argparse 误识别为位置参数
- sys.argv = [old_argv[0]] + extra_args
- target_main()
- finally:
- sys.argv = old_argv
-
-
-def _check_before_info_extract(non_interactive: bool = False) -> bool:
- """信息抽取前的轻量级检查。"""
- raw_dir = Path(PROJECT_ROOT) / "data" / "lpmm_raw_data"
- txt_files = list(raw_dir.glob("*.txt"))
- if not txt_files:
- msg = f"[WARN] 未在 {raw_dir} 下找到任何 .txt 原始语料文件,info_extraction 可能立即退出或无数据可处理。"
- print(msg)
- if non_interactive:
- logger.error("非交互模式下要求原始语料目录中已存在可用的 .txt 文件,请先准备好数据再重试。")
- return False
- cont = input("仍然继续执行信息提取吗?(y/n): ").strip().lower()
- return cont == "y"
- return True
-
-
-def _check_before_import_openie(non_interactive: bool = False) -> bool:
- """导入 OpenIE 前的轻量级检查。"""
- openie_dir = Path(PROJECT_ROOT) / "data" / "openie"
- json_files = list(openie_dir.glob("*.json"))
- if not json_files:
- msg = f"[WARN] 未在 {openie_dir} 下找到任何 OpenIE JSON 文件,import_openie 可能会因为找不到批次而失败。"
- print(msg)
- if non_interactive:
- logger.error("非交互模式下要求 data/openie 目录中已存在可用的 OpenIE JSON 文件,请先执行信息提取脚本。")
- return False
- cont = input("仍然继续执行导入吗?(y/n): ").strip().lower()
- return cont == "y"
- return True
-
-
-def _warn_if_lpmm_disabled() -> None:
- """在部分操作前提醒 lpmm_knowledge.enable 状态。"""
- try:
- if not getattr(global_config.lpmm_knowledge, "enable", False):
- print("[WARN] 当前配置 lpmm_knowledge.enable = false,刷新或检索测试可能无法在聊天侧真正启用 LPMM。")
- except Exception:
- # 配置异常时不阻断主流程,仅忽略提示
- pass
-
-
-def run_action(action: str, extra_args: Optional[List[str]] = None) -> None:
- """根据动作名称调度到对应脚本。
-
- 这里不重复解析子参数,而是直接调用各脚本的 main(),
- 让子脚本保留原有的交互/参数行为。
- """
- logger.info(f"开始执行操作: {action}")
-
- extra_args = extra_args or []
-
- try:
- if action == "prepare_raw":
- logger.info("开始预处理原始语料 (data/lpmm_raw_data/*.txt)...")
- sha_list, raw_data = load_raw_data()
- print(f"\n[PREPARE_RAW] 完成原始语料预处理:共 {len(raw_data)} 条段落,去重后哈希数 {len(sha_list)}。")
- elif action == "info_extract":
- if not _check_before_info_extract("--non-interactive" in extra_args):
- print("已根据用户选择,取消执行信息提取。")
- return
- _with_overridden_argv(extra_args, info_extraction_main)
- elif action == "import_openie":
- if not _check_before_import_openie("--non-interactive" in extra_args):
- print("已根据用户选择,取消执行导入。")
- return
- _with_overridden_argv(extra_args, import_openie_main)
- elif action == "delete":
- _with_overridden_argv(extra_args, delete_lpmm_items_main)
- elif action == "batch_inspect":
- _with_overridden_argv(extra_args, inspect_lpmm_batch_main)
- elif action == "global_inspect":
- _with_overridden_argv(extra_args, inspect_lpmm_global_main)
- elif action == "refresh":
- _warn_if_lpmm_disabled()
- _with_overridden_argv(extra_args, refresh_lpmm_knowledge_main)
- elif action == "test":
- _warn_if_lpmm_disabled()
- _with_overridden_argv(extra_args, test_lpmm_retrieval_main)
- elif action == "embedding_helper":
- # 嵌入模型迁移辅助:查看当前嵌入模型/维度并归档 embedding_model_test.json
- _run_embedding_helper()
- elif action == "full_import":
- # 一键流水线:预处理原始语料 -> 信息抽取 -> 导入 -> 刷新
- logger.info("开始 full_import:预处理原始语料 -> 信息抽取 -> 导入 -> 刷新")
- sha_list, raw_data = load_raw_data()
- print(f"\n[FULL_IMPORT] 原始语料预处理完成:共 {len(raw_data)} 条段落,去重后哈希数 {len(sha_list)}。")
- non_interactive = "--non-interactive" in extra_args
- if not _check_before_info_extract(non_interactive):
- print("已根据用户选择,取消 full_import(信息提取阶段被取消)。")
- return
- # 使用与单步 info_extract 相同的参数透传机制,确保 --non-interactive 等生效
- _with_overridden_argv(extra_args, info_extraction_main)
- if not _check_before_import_openie(non_interactive):
- print("已根据用户选择,取消 full_import(导入阶段被取消)。")
- return
- _with_overridden_argv(extra_args, import_openie_main)
- _warn_if_lpmm_disabled()
- _with_overridden_argv(extra_args, refresh_lpmm_knowledge_main)
- else:
- logger.error(f"未知操作: {action}")
- except KeyboardInterrupt:
- logger.info("用户中断当前操作(Ctrl+C)")
- except SystemExit:
- # 子脚本里大量使用 sys.exit,直接透传即可
- raise
- except Exception as exc: # pragma: no cover - 防御性兜底
- logger.error(f"执行操作 {action} 时发生未捕获异常: {exc}")
- raise
-
-
-def print_menu() -> None:
- print("\n===== LPMM 管理菜单 =====")
- for idx, key in enumerate(
- [
- "prepare_raw",
- "info_extract",
- "import_openie",
- "delete",
- "batch_inspect",
- "global_inspect",
- "refresh",
- "test",
- "embedding_helper",
- "full_import",
- ],
- start=1,
- ):
- desc = ACTION_INFO.get(key, "")
- print(f"{idx}. {key:14s} - {desc}")
- print("0. 退出")
- print("=========================")
-
-
-def interactive_loop() -> None:
- """交互式选择模式。"""
- key_order = [
- "prepare_raw",
- "info_extract",
- "import_openie",
- "delete",
- "batch_inspect",
- "global_inspect",
- "refresh",
- "test",
- "embedding_helper",
- "full_import",
- ]
-
- while True:
- print_menu()
- choice = input("请输入选项编号(0-10):").strip()
-
- if choice in ("0", "q", "Q", "quit", "exit"):
- print("已退出 LPMM 管理器。")
- return
-
- try:
- idx = int(choice)
- except ValueError:
- print("输入无效,请输入 0-10 之间的数字。")
- continue
-
- if not (1 <= idx <= len(key_order)):
- print("输入编号超出范围,请重新输入。")
- continue
-
- action = key_order[idx - 1]
- print(f"\n你选择了: {action} - {ACTION_INFO.get(action, '')}")
- confirm = input("确认执行该操作?(y/n): ").strip().lower()
- if confirm != "y":
- print("已取消当前操作。\n")
- continue
-
- # 通过交互式问题,尽量帮用户补全对应脚本的常用参数
- extra_args: List[str] = []
- if action == "delete":
- extra_args = _interactive_build_delete_args()
- elif action == "batch_inspect":
- extra_args = _interactive_build_batch_inspect_args()
- elif action == "test":
- extra_args = _interactive_build_test_args()
- else:
- extra_args = []
-
- run_action(action, extra_args=extra_args)
- print("\n当前操作已结束,回到主菜单。\n")
-
-
-def _interactive_choose_openie_file(prompt: str) -> Optional[str]:
- """在 data/openie 下列出可选 JSON 文件,并返回用户选择的路径。"""
- openie_dir = Path(PROJECT_ROOT) / "data" / "openie"
- files = sorted(openie_dir.glob("*.json"))
- if not files:
- print(f"[WARN] 在 {openie_dir} 下没有找到任何 OpenIE JSON 文件。")
- return input(prompt).strip() or None
-
- print("\n可选的 OpenIE 批次文件:")
- for i, f in enumerate(files, start=1):
- print(f"{i}. {f.name}")
- print("0. 手动输入完整路径")
-
- while True:
- choice = input("请选择文件编号:").strip()
- if choice == "0":
- manual = input(prompt).strip()
- return manual or None
- try:
- idx = int(choice)
- except ValueError:
- print("请输入合法的编号。")
- continue
- if 1 <= idx <= len(files):
- return str(files[idx - 1])
- print("编号超出范围,请重试。")
-
-
-def _interactive_build_delete_args() -> List[str]:
- """为 delete_lpmm_items 构造常见参数,减少二次交互。"""
- print(
- "\n[DELETE] 请选择删除方式:\n"
- "1. 按哈希文件删除 (--hash-file)\n"
- "2. 按 OpenIE 批次删除 (--openie-file)\n"
- "3. 按原始语料文件 + 段落索引删除 (--raw-file + --raw-index)\n"
- "4. 按关键字搜索现有段落 (--search-text)\n"
- "回车跳过,由子脚本自行交互。"
- )
- mode = input("输入选项编号(1-4,或回车跳过):").strip()
- args: List[str] = []
-
- if mode == "1":
- path = input("请输入哈希文件路径(每行一个 hash):").strip()
- if path:
- args += ["--hash-file", path]
- elif mode == "2":
- path = _interactive_choose_openie_file("请输入 OpenIE JSON 文件路径:")
- if path:
- args += ["--openie-file", path]
- elif mode == "3":
- raw_file = input("请输入原始语料 txt 文件路径:").strip()
- raw_index = input("请输入要删除的段落索引(如 1,3):").strip()
- if raw_file and raw_index:
- args += ["--raw-file", raw_file, "--raw-index", raw_index]
- elif mode == "4":
- text = input("请输入用于搜索的关键字(出现在段落原文中):").strip()
- if text:
- args += ["--search-text", text]
- else:
- # 留空则完全交给子脚本交互
- return []
-
- # 进一步询问与安全相关的布尔选项
- print(
- "\n[DELETE] 接下来是一些安全相关选项的说明:\n"
- "- 删除实体向量/节点:会一并清理与这些段落关联的实体节点及其向量;\n"
- "- 删除关系向量:在上面的基础上,额外清理关系向量(一般与删除实体一同使用);\n"
- "- 删除孤立实体节点:删除后若实体不再连接任何段落,将其从图中移除,避免残留孤点;\n"
- "- dry-run:只预览将要删除的内容,不真正修改任何数据;\n"
- "- 跳过交互确认(--yes):直接执行删除操作,适合脚本化或已充分确认的场景;\n"
- "- 单次最大删除节点数上限:防止一次性删除规模过大,起到误操作保护作用;\n"
- "- 一般情况下建议同时删除实体向量/节点/关系向量/节点,以确保知识图谱的完整性。"
- )
-
- # 快速选项:按推荐方式清理所有相关实体/关系
- quick_all = (
- input("是否使用推荐策略:同时删除关联的实体向量/节点、关系向量,并清理孤立实体?(Y/n): ").strip().lower()
- )
- if quick_all in ("", "y", "yes"):
- args.extend(["--delete-entities", "--delete-relations", "--remove-orphan-entities"])
- else:
- # 仅当未使用快速方案时,再逐项询问
- if input("是否同时删除实体向量/节点?(y/N): ").strip().lower() == "y":
- args.append("--delete-entities")
- if input("是否同时删除关系向量?(y/N): ").strip().lower() == "y":
- args.append("--delete-relations")
-
- if input("是否删除孤立实体节点?(y/N): ").strip().lower() == "y":
- args.append("--remove-orphan-entities")
-
- if input("是否以 dry-run 预览而不真正删除?(y/N): ").strip().lower() == "y":
- args.append("--dry-run")
- else:
- if input("是否跳过交互确认直接删除?(默认否,请谨慎) (y/N): ").strip().lower() == "y":
- args.append("--yes")
-
- max_nodes = input("单次最大删除节点数上限(回车使用默认 2000):").strip()
- if max_nodes:
- args += ["--max-delete-nodes", max_nodes]
-
- return args
-
-
-def _interactive_build_batch_inspect_args() -> List[str]:
- """为 inspect_lpmm_batch 构造 --openie-file 参数。"""
- path = _interactive_choose_openie_file("请输入要检查的 OpenIE JSON 文件路径(回车跳过,由子脚本自行交互):")
- if not path:
- return []
- return ["--openie-file", path]
-
-
-def _interactive_build_test_args() -> List[str]:
- """为 test_lpmm_retrieval 构造自定义测试用例参数。"""
- print("\n[TEST] 你可以:\n- 直接回车使用内置的默认测试用例;\n- 或者输入一条自定义问题,并指定期望命中的关键字。")
- query = input("请输入自定义测试问题(回车则使用默认用例):").strip()
- if not query:
- return []
-
- expect = input("请输入期望命中的关键字(可选,多项用逗号分隔):").strip()
- args: List[str] = ["--query", query]
- if expect:
- for kw in expect.split(","):
- kw = kw.strip()
- if kw:
- args.extend(["--expect-keyword", kw])
- return args
-
-
-def _run_embedding_helper() -> None:
- """嵌入模型迁移辅助:展示当前配置,并安全归档 embedding_model_test.json。"""
- from src.chat.knowledge.embedding_store import EMBEDDING_TEST_FILE # type: ignore
-
- # 1. 读取当前配置中的嵌入维度与模型信息
- current_dim = getattr(getattr(global_config, "lpmm_knowledge", None), "embedding_dimension", None)
- embed_task = getattr(model_config.model_task_config, "embedding", None)
- model_ids: List[str] = []
- if embed_task is not None:
- model_ids = getattr(embed_task, "model_list", []) or []
- primary_model = model_ids[0] if model_ids else "unknown"
- safe_model_name = re.sub(r"[^0-9A-Za-z_.-]+", "_", primary_model) or "unknown"
-
- print("\n===== 嵌入模型迁移辅助 (embedding_helper) =====")
- print(f"- 当前嵌入模型标识(model_task_config.embedding.model_list[0]): {primary_model}")
- print(f"- 当前配置中的嵌入维度 (lpmm_knowledge.embedding_dimension): {current_dim}")
- print(f"- 测试文件路径: {EMBEDDING_TEST_FILE}")
-
- new_dim = input("\n如果你计划更换嵌入模型,请在此输入“新的嵌入维度”(仅用于记录与提示,回车则跳过):").strip()
- if new_dim and not new_dim.isdigit():
- print("输入的维度不是纯数字,已取消操作。")
- return
-
- print(
- "\n[重要提示]\n"
- "- 修改嵌入模型或维度会导致当前磁盘中的旧知识库(data/embedding 下的向量)与新模型不兼容;\n"
- "- 这通常意味着你需要清空旧的向量/图数据,并重新执行 LPMM 导入流水线;\n"
- "- 请仅在你**确定要切换嵌入模型/维度**时再继续。\n"
- )
- confirm = input("是否已充分评估风险,并准备切换嵌入模型/维度?(y/N): ").strip().lower()
- if confirm != "y":
- print("已根据你的选择取消嵌入模型迁移辅助操作。")
- return
-
- print(
- "\n接下来请手动完成以下操作(脚本不会自动修改配置或删除知识库):\n"
- f"1. 在配置文件中,将 lpmm_knowledge.embedding_dimension 从 {current_dim} 修改为你计划使用的新维度"
- + (f"(例如 {new_dim})" if new_dim else "") # 仅作为示例
- + ";\n"
- "2. 根据需要,清空 data/embedding 与相关 KG 数据(data/rag 等),然后重新执行导入流水线;\n"
- "3. 本脚本将帮助你归档当前的 embedding_model_test.json,避免旧测试文件干扰新模型的校验。\n"
- )
-
- # 2. 归档 embedding_model_test.json
- test_path = Path(EMBEDDING_TEST_FILE)
- if not test_path.exists():
- print(f"\n[INFO] 未在 {test_path} 发现 embedding_model_test.json,无需归档。")
- return
-
- ts = datetime.now().strftime("%Y%m%d-%H%M%S")
- archive_name = f"embedding_model_test-{safe_model_name}-{ts}.json"
- archive_path = test_path.with_name(archive_name)
-
- # 若不巧重名,简单追加后缀避免覆盖
- suffix_id = 1
- while archive_path.exists():
- archive_name = f"embedding_model_test-{safe_model_name}-{ts}-{suffix_id}.json"
- archive_path = test_path.with_name(archive_name)
- suffix_id += 1
-
- try:
- test_path.rename(archive_path)
- except Exception as exc: # pragma: no cover - 防御性兜底
- logger.error(f"归档 embedding_model_test.json 失败: {exc}")
- print("[ERROR] 归档 embedding_model_test.json 失败,请检查文件权限与路径。错误详情已写入日志。")
- return
-
- print(
- f"\n[OK] 已将 {test_path.name} 重命名为 {archive_path.name}。\n"
- f"- 归档位置: {archive_path}\n"
- "- 之后再次运行涉及嵌入模型的一致性校验时,将会基于当前配置与新模型生成新的测试文件。\n"
- "- 在完成配置修改与知识库重导入前,请不要手动再创建名为 embedding_model_test.json 的文件。"
- )
-
-
-def parse_args(argv: Optional[list[str]] = None) -> tuple[argparse.Namespace, List[str]]:
- parser = argparse.ArgumentParser(
- description=(
- "LPMM 管理脚本:集中入口管理 LPMM 的导入 / 删除 / 自检 / 刷新 / 测试等功能。\n"
- "可以通过 --interactive 进入菜单模式,也可以使用 --action 直接执行单个操作。"
- )
- )
- parser.add_argument(
- "-i",
- "--interactive",
- action="store_true",
- help="进入交互式菜单模式(推荐给手动运维使用)",
- )
- parser.add_argument(
- "-a",
- "--action",
- choices=list(ACTION_INFO.keys()),
- help="直接执行指定操作(非交互模式)",
- )
- parser.add_argument(
- "--non-interactive",
- action="store_true",
- help=(
- "启用非交互模式:lpmm_manager 自身不会再通过 input() 询问是否继续前置检查;"
- "并会将 --non-interactive 透传给子脚本,以避免子脚本中的交互式确认。"
- ),
- )
- # 允许在管理脚本之后继续跟随子脚本参数,例如:
- # python lpmm_manager.py -a delete -- --hash-file xxx --yes
- args, unknown = parser.parse_known_args(argv)
- return args, unknown
-
-
-def main(argv: Optional[list[str]] = None) -> None:
- args, extra_args = parse_args(argv)
-
- # 如果指定了 non-interactive,则不能进入交互式菜单
- if args.non_interactive and args.interactive:
- logger.error("不能同时指定 --interactive 与 --non-interactive,请二选一。")
- sys.exit(1)
-
- # 没有指定 action 或显式要求交互 -> 进入菜单
- if args.interactive or not args.action:
- interactive_loop()
- return
-
- # 在非交互模式下,将 --non-interactive 透传给子脚本,避免其内部出现 input() 交互
- if args.non_interactive:
- extra_args = ["--non-interactive"] + extra_args
-
- # 非交互模式:直接执行指定操作
- run_action(args.action, extra_args=extra_args)
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/manual_evaluation_results.json b/scripts/manual_evaluation_results.json
deleted file mode 100644
index 9e7f1cf3..00000000
--- a/scripts/manual_evaluation_results.json
+++ /dev/null
@@ -1,3236 +0,0 @@
-{
- "last_updated": "2025-12-26T16:33:12.430516",
- "total_count": 360,
- "manual_results": [
- {
- "expression_id": 3169,
- "situation": "调侃式回应他人疑问",
- "style": "hhh",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:14.505956"
- },
- {
- "expression_id": 2488,
- "situation": "建议被忽视,问题依旧存在",
- "style": "简单回应 yes 后继续说失败",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:17.306742"
- },
- {
- "expression_id": 3535,
- "situation": "提出反对意见时",
- "style": "使用'我拒绝xxx'句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:18.516562"
- },
- {
- "expression_id": 5287,
- "situation": "针对技术方案提出建议时",
- "style": "采用“...一下”或“试试”句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:20.937777"
- },
- {
- "expression_id": 5136,
- "situation": "对他人发言进行同步复读以强化语气",
- "style": "复读对方原话并加@",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:38.770362"
- },
- {
- "expression_id": 3178,
- "situation": "认可他人观点时表达认同",
- "style": "使用 你别说...句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:42.465183"
- },
- {
- "expression_id": 5344,
- "situation": "对突然变化的规则表示荒诞感",
- "style": "今天这个是自动减的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:45.801959"
- },
- {
- "expression_id": 2379,
- "situation": "讽刺技术荒诞的文学化表达",
- "style": "用诗意语言描述技术崩溃",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:49.097711"
- },
- {
- "expression_id": 2508,
- "situation": "被要求不回复时仍执意回应",
- "style": "用“不要回复我”直接拒绝",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:51.913429"
- },
- {
- "expression_id": 2573,
- "situation": "聊天中热议动漫角色",
- "style": "使用'萌音'指代角色",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:17:54.017132"
- },
- {
- "expression_id": 2051,
- "situation": "对比抽象概念时的对话特征",
- "style": "使用 vs 连接两个对立概念",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:03.928730"
- },
- {
- "expression_id": 4327,
- "situation": "对他人建议提出反问以质疑合理性",
- "style": "要XX干嘛",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:05.944953"
- },
- {
- "expression_id": 3432,
- "situation": "讨论插件功能时强调专注性",
- "style": "使用 专注于某动作",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:08.425325"
- },
- {
- "expression_id": 3591,
- "situation": "对他人建议表示认可并补充",
- "style": "嗯,也是好选择",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:11.202821"
- },
- {
- "expression_id": 602,
- "situation": "简短确认他人引用内容",
- "style": "回 复 中",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:13.296995"
- },
- {
- "expression_id": 2552,
- "situation": "系统状态描述均简洁明确",
- "style": "使用 重启就会变",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:21.089313"
- },
- {
- "expression_id": 1566,
- "situation": "讨论服务器资源占用情况",
- "style": "使用 吃了太多电费",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:22.585119"
- },
- {
- "expression_id": 839,
- "situation": "对某事持怀疑或否定态度",
- "style": "我觉得肯定受不了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:24.680676"
- },
- {
- "expression_id": 3673,
- "situation": "建议用无关方式解决矛盾",
- "style": "删掉XX即可",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:28.842462"
- },
- {
- "expression_id": 1723,
- "situation": "反复强调同一观点",
- "style": "重复使用相同短句",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:30.728958"
- },
- {
- "expression_id": 2732,
- "situation": "用否定句式表达无力回应建议",
- "style": "无法起飞",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:35.472037"
- },
- {
- "expression_id": 2965,
- "situation": "确认为其他账户",
- "style": "对",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:37.170307"
- },
- {
- "expression_id": 4436,
- "situation": "对他人操作流程表示质疑",
- "style": "你虚拟环境呢",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:39.192119"
- },
- {
- "expression_id": 995,
- "situation": "模仿他人发言",
- "style": "直接重复他人原话",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:40.703669"
- },
- {
- "expression_id": 495,
- "situation": "对技术建议常持否定态度",
- "style": "补充说明细节或注意事项",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:44.968474"
- },
- {
- "expression_id": 4443,
- "situation": "警告他人不要随意操作",
- "style": "重复 '别乱动乱敲'",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:48.256356"
- },
- {
- "expression_id": 3059,
- "situation": "质疑数据真实性",
- "style": "结果不真啊,麦的xxx呢",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:51.080153"
- },
- {
- "expression_id": 4038,
- "situation": "回应无厘头或荒诞内容时假装严肃",
- "style": "你尔朵笼还是盐津虾",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:18:54.202207"
- },
- {
- "expression_id": 1121,
- "situation": "发现技术细节时的对话特征",
- "style": "连续使用woc表达惊讶",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:19:04.408294"
- },
- {
- "expression_id": 1328,
- "situation": "AI输出异常或过度",
- "style": "用“发力”形容模型生成内容,带调侃",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:19:06.559956"
- },
- {
- "expression_id": 5506,
- "situation": "渴望得到他人反馈或急于求证时",
- "style": "使用祈使短语“求回答”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:08.938962"
- },
- {
- "expression_id": 662,
- "situation": "AI输出不稳定时的聊天情境",
- "style": "使用“不收敛”形容失控",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:12.483859"
- },
- {
- "expression_id": 3944,
- "situation": "对他人疑问进行引导性回应",
- "style": "你问问课代表?",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:14.507348"
- },
- {
- "expression_id": 4220,
- "situation": "半开玩笑地讨论学校政策",
- "style": "使用‘确实爽’和‘也就’表达半开玩笑",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:16.395121"
- },
- {
- "expression_id": 1530,
- "situation": "用无厘头回应化解尴尬",
- "style": "还有上门服务",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:29.306839"
- },
- {
- "expression_id": 1236,
- "situation": "系统故障时用户寻求帮助",
- "style": "直接陈述故障原因",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:32.467370"
- },
- {
- "expression_id": 1635,
- "situation": "表达惊讶或困惑",
- "style": "使用 这是什么东西",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:35.506595"
- },
- {
- "expression_id": 4145,
- "situation": "对他人行为表示接受并轻描淡写回应",
- "style": "搜嘎",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:38.307144"
- },
- {
- "expression_id": 2385,
- "situation": "聊天中突兀插入无关话题",
- "style": "用无意义数字或感叹词填充",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:40.594576"
- },
- {
- "expression_id": 1179,
- "situation": "游戏比较中常带主观评价",
- "style": "使用“感觉不如...”句式进行对比",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:43.075143"
- },
- {
- "expression_id": 343,
- "situation": "认可时表达赞同",
- "style": "使用“中”等单字肯定",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:47.058887"
- },
- {
- "expression_id": 2303,
- "situation": "用反常识鼓励回应消极情绪",
- "style": "上药",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:49.586496"
- },
- {
- "expression_id": 4246,
- "situation": "讨论技术问题时提出解决方案",
- "style": "直接给出技术术语建议",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:53.050459"
- },
- {
- "expression_id": 3955,
- "situation": "表达对问题的不满",
- "style": "使用'有点慢'结构",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:56.305860"
- },
- {
- "expression_id": 1435,
- "situation": "邀请聚餐时的社交互动",
- "style": "使用 有没有一起去吃火锅的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:23:58.762824"
- },
- {
- "expression_id": 4800,
- "situation": "对游戏机制表示不解",
- "style": "问“怎么自动扣除了”",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:07.842563"
- },
- {
- "expression_id": 2632,
- "situation": "犹豫不决,难以抉择",
- "style": "强调 都试试",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:09.730461"
- },
- {
- "expression_id": 1872,
- "situation": "确认他人建议",
- "style": "对喵",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:10.866521"
- },
- {
- "expression_id": 1118,
- "situation": "请求未获直接同意",
- "style": "使用“烦死了!(自动回复)”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:14.370537"
- },
- {
- "expression_id": 1709,
- "situation": "神秘化描述事物",
- "style": "使用 神秘+名词",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:16.929684"
- },
- {
- "expression_id": 5010,
- "situation": "质疑插件指令被误判为对话",
- "style": "为啥插件触发的指令会被bot当成对话",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:22.425584"
- },
- {
- "expression_id": 3706,
- "situation": "对他人建议的轻蔑态度",
- "style": "使用'直接杀死'",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:26.681089"
- },
- {
- "expression_id": 4490,
- "situation": "面对复杂问题时表达头大或压力大",
- "style": "头大",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:28.010419"
- },
- {
- "expression_id": 3408,
- "situation": "回应荒诞承诺时提出更荒诞条件",
- "style": "只要...就可以了喵",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:31.538269"
- },
- {
- "expression_id": 1293,
- "situation": "多人在线游戏组队求助",
- "style": "使用 数字 表示确认参与",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:33.305892"
- },
- {
- "expression_id": 4322,
- "situation": "回应他人提议或信息时表示认同",
- "style": "说 还真是",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:53.640715"
- },
- {
- "expression_id": 4782,
- "situation": "请求操作指导",
- "style": "怎么做枫枫",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:24:55.329241"
- },
- {
- "expression_id": 4249,
- "situation": "讨论工具优劣",
- "style": "使用比较句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:25:16.202072"
- },
- {
- "expression_id": 357,
- "situation": "讨论游戏机制时的交流",
- "style": "使用 模块化框架",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:25:18.071625"
- },
- {
- "expression_id": 5503,
- "situation": "表达无奈、疲惫或轻微叹息时",
- "style": "使用单字语气助词“哎”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:25:19.896008"
- },
- {
- "expression_id": 1212,
- "situation": "震惊于荒谬猎奇内容",
- "style": "使用‘我艹’等感叹词表达强烈反应",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:42.274011"
- },
- {
- "expression_id": 1983,
- "situation": "技术讨论中使用专业术语",
- "style": "提及‘API’‘插件’",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:43.878236"
- },
- {
- "expression_id": 4106,
- "situation": "对投票结果表示意外",
- "style": "结合表情符号表达震惊",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:45.653868"
- },
- {
- "expression_id": 2450,
- "situation": "提出模糊建议或方向",
- "style": "看看",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:47.667071"
- },
- {
- "expression_id": 3993,
- "situation": "对不明内容表示困惑",
- "style": "直接说'不理解'",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:49.230999"
- },
- {
- "expression_id": 5291,
- "situation": "追究责任或寻找原因时",
- "style": "使用“...的锅”进行定性",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:50.621311"
- },
- {
- "expression_id": 2242,
- "situation": "对技术问题感到困惑",
- "style": "说命令和主分支一样,文档没写",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:52.797922"
- },
- {
- "expression_id": 4203,
- "situation": "暗示等待经济到账后行动",
- "style": "发米了就去找人玩",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:54.809907"
- },
- {
- "expression_id": 2342,
- "situation": "用地域调侃化解严肃话题",
- "style": "昌平县城",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:57.269840"
- },
- {
- "expression_id": 4797,
- "situation": "对他人观点提出反问以示质疑",
- "style": "为什么不要凭证(?)",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:51:59.469990"
- },
- {
- "expression_id": 1703,
- "situation": "确认方案是否可行",
- "style": "以'就可以'作结",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:11.254043"
- },
- {
- "expression_id": 2381,
- "situation": "表达失望时的负面情绪反应",
- "style": "使用粗俗比喻描述期待与现实的落差",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:12.918439"
- },
- {
- "expression_id": 2860,
- "situation": "用夸张比喻描述抽象概念",
- "style": "说 到不了会有阴兵的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:15.326344"
- },
- {
- "expression_id": 199,
- "situation": "讨论技术实现方案",
- "style": "使用项目化表述",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:18.646503"
- },
- {
- "expression_id": 3174,
- "situation": "建议时总被忽略或反驳",
- "style": "采用 直接建议句式",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:22.445332"
- },
- {
- "expression_id": 5195,
- "situation": "对某种方案或条件表示极度认可",
- "style": "使用“无敌”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:24.621702"
- },
- {
- "expression_id": 1012,
- "situation": "反复威胁,情绪升级",
- "style": "刻意模仿客服话术",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:26.349202"
- },
- {
- "expression_id": 3012,
- "situation": "对术语困惑不解",
- "style": "啊?那是啥平台的代金券",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:27.668688"
- },
- {
- "expression_id": 2620,
- "situation": "表达赞同或认可时的回应",
- "style": "使用“好看”、“666”等简短夸赞",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:28.765022"
- },
- {
- "expression_id": 2214,
- "situation": "反差语气掩饰内心崩溃",
- "style": "纯搞来的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T01:52:31.452526"
- },
- {
- "expression_id": 3847,
- "situation": "对不想做的事表达抗拒",
- "style": "但是我一直不想",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:23:50.924439"
- },
- {
- "expression_id": 5534,
- "situation": "被说像某物时否认并强调不同",
- "style": "说 不是xxx",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:23:56.427990"
- },
- {
- "expression_id": 4648,
- "situation": "被质疑行为逻辑时强行合理化",
- "style": "说‘逻辑上就是这样的’",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:24:02.995928"
- },
- {
- "expression_id": 2213,
- "situation": "轻描淡写回应,单字默认认同",
- "style": "6",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:24:04.060748"
- },
- {
- "expression_id": 374,
- "situation": "质疑对方隐瞒信息",
- "style": "使用 是不是在xxx里存xxx",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:24:09.139523"
- },
- {
- "expression_id": 1441,
- "situation": "任务压力大,沟通紧张",
- "style": "用 无开始搞 表达",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:03.202611"
- },
- {
- "expression_id": 2931,
- "situation": "困惑或无法理解时寻求帮助",
- "style": "使用“发啥了”询问具体情况",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:06.154571"
- },
- {
- "expression_id": 2431,
- "situation": "询问接口性能时关注响应速度",
- "style": "用“有没有几百token/s的”量化提问",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:08.186667"
- },
- {
- "expression_id": 3329,
- "situation": "对他人行为表示惊讶或讽刺",
- "style": "妈的气笑了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:09.673876"
- },
- {
- "expression_id": 3716,
- "situation": "表达对系统改进的期待",
- "style": "使用'再次伟大'类比",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:18.306136"
- },
- {
- "expression_id": 505,
- "situation": "看到离谱内容时的震惊反应",
- "style": "使用“逆天”表示惊叹",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:21.410431"
- },
- {
- "expression_id": 4796,
- "situation": "对他人技术问题表示不解",
- "style": "?你在说什么啊",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:22.954275"
- },
- {
- "expression_id": 2889,
- "situation": "增进感情的亲密表达",
- "style": "使用“贴贴”、“抱抱”等叠词",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:24.106546"
- },
- {
- "expression_id": 3698,
- "situation": "对他人建议表示附和但无实质回应",
- "style": "虽然我也不知道该hook啥",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:27.298723"
- },
- {
- "expression_id": 552,
- "situation": "测试新功能时",
- "style": "表达试用意愿",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:29.066189"
- },
- {
- "expression_id": 5362,
- "situation": "催促对方开启加速器",
- "style": "加速器开了吧",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:30.881502"
- },
- {
- "expression_id": 762,
- "situation": "无厘头接话搞怪互动",
- "style": "那就发人模狗样",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:34.914448"
- },
- {
- "expression_id": 3868,
- "situation": "对他人观点进行轻蔑式贬低",
- "style": "这有点阿姨了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:36.777608"
- },
- {
- "expression_id": 1690,
- "situation": "对复杂功能感到惊讶或抗拒",
- "style": "那有点过于...了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:40.578147"
- },
- {
- "expression_id": 389,
- "situation": "问题解决后致谢",
- "style": "说 爱你 + 表情",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:25:41.729439"
- },
- {
- "expression_id": 567,
- "situation": "表达个人偏好时的主观陈述",
- "style": "使用 肯定是取我喜欢的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:13.295939"
- },
- {
- "expression_id": 3068,
- "situation": "被反复重复相同句式",
- "style": "模仿并加重句式重复",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:16.903331"
- },
- {
- "expression_id": 3108,
- "situation": "计划时模糊指代资源",
- "style": "我先用...还打算从...",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:22.205836"
- },
- {
- "expression_id": 3725,
- "situation": "对回复质量表示认可",
- "style": "感觉还挺高的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:23.678708"
- },
- {
- "expression_id": 3280,
- "situation": "谐音梗成聊天幽默利器",
- "style": "使用 孔喵莲",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:25.717663"
- },
- {
- "expression_id": 3767,
- "situation": "否定某种过度配置",
- "style": "没必要开extra high",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:31.309638"
- },
- {
- "expression_id": 5507,
- "situation": "对他人行为表示调侃式认同",
- "style": "这是一种无声的反抗",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:35.772597"
- },
- {
- "expression_id": 3885,
- "situation": "对明显跑题表示无奈",
- "style": "串了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:36.972325"
- },
- {
- "expression_id": 4080,
- "situation": "对某事表示认同并补充细节",
- "style": "还真是",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:39.123904"
- },
- {
- "expression_id": 1000,
- "situation": "用反讽接受他人调侃",
- "style": "玩的还挺花的你这",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:42.140283"
- },
- {
- "expression_id": 4976,
- "situation": "认为旧话题应该结束时",
- "style": "使用 翻篇",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:45.379005"
- },
- {
- "expression_id": 3358,
- "situation": "对他人建议表示轻微反驳或无奈",
- "style": "不至于吧,这还没出几天",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:48.050954"
- },
- {
- "expression_id": 5267,
- "situation": "对技术问题表示无奈或调侃",
- "style": "用括号补充自嘲式备注",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:50.914664"
- },
- {
- "expression_id": 4063,
- "situation": "对复杂问题表示难以判断",
- "style": "说实话...真不好判断",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:54.594218"
- },
- {
- "expression_id": 855,
- "situation": "系统崩溃引发多场景对话",
- "style": "使用网络流行语",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:56.281794"
- },
- {
- "expression_id": 3075,
- "situation": "认可不当建议",
- "style": "使用 数字6 或 666 表示赞同",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:36:57.994884"
- },
- {
- "expression_id": 3861,
- "situation": "质疑换方案的动机",
- "style": "换他干嘛",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:37:01.857283"
- },
- {
- "expression_id": 1393,
- "situation": "无奈接受技术限制",
- "style": "这个模型最高不就是10mb么",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:37:04.168960"
- },
- {
- "expression_id": 4552,
- "situation": "对他人关系发展进行猜测或八卦",
- "style": "感觉要成了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:37:05.785418"
- },
- {
- "expression_id": 676,
- "situation": "用重复关键词回应荒谬言论",
- "style": "赛尔赛尔~",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:37:07.208807"
- },
- {
- "expression_id": 2072,
- "situation": "讨论衣物搭配方案",
- "style": "使用“xxx套xxx”描述叠穿",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:38.798708"
- },
- {
- "expression_id": 4683,
- "situation": "对选择模型时强调性价比",
- "style": "哪个上的快我先上哪个",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:42.150304"
- },
- {
- "expression_id": 1361,
- "situation": "厌倦工作学习,想躺平休息",
- "style": "用 几个月都不想碰 来描述状态",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:43.838510"
- },
- {
- "expression_id": 4577,
- "situation": "描述某种负面现象的持续影响",
- "style": "...后遗症",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:46.573887"
- },
- {
- "expression_id": 5103,
- "situation": "对他人发言表示附和但无实质内容",
- "style": "哦,知道了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:48.062051"
- },
- {
- "expression_id": 4010,
- "situation": "对荒诞现象表示无语",
- "style": "再无话说",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:49.781501"
- },
- {
- "expression_id": 2103,
- "situation": "惊讶或无奈的反应",
- "style": "使用单字“草”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:50.989742"
- },
- {
- "expression_id": 958,
- "situation": "对他人发言感到震惊",
- "style": "用‘我草’‘吓哭了’‘能说吗我真吓到了’",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:52.117589"
- },
- {
- "expression_id": 1864,
- "situation": "调侃他人行为时的幽默互动",
- "style": "用‘量子叠加态’等科学术语进行荒诞比喻",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:53:54.797521"
- },
- {
- "expression_id": 4726,
- "situation": "对他人建议表示反对或讽刺",
- "style": "用别人还是比用自己的爽",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:04.268461"
- },
- {
- "expression_id": 3071,
- "situation": "聊天中常以亲昵方式打招呼",
- "style": "使用 麦麦你好/麦麦你好香",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:08.882885"
- },
- {
- "expression_id": 4738,
- "situation": "对复杂现象进行简化归因并带自嘲",
- "style": "规划器全过了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:10.835122"
- },
- {
- "expression_id": 3884,
- "situation": "突然引入无关信息转移话题",
- "style": "其实叔叔的生日是12月3日 [图片2]",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:14.163726"
- },
- {
- "expression_id": 3838,
- "situation": "讨论网络交易陷阱",
- "style": "用'白嫖一个号'描述欺诈行为",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:17.066433"
- },
- {
- "expression_id": 2994,
- "situation": "越解释越糊涂",
- "style": "是这么说",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:20.706261"
- },
- {
- "expression_id": 2930,
- "situation": "转发或观点引发讨论",
- "style": "用“是这样的”表示认同或总结",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:22.378464"
- },
- {
- "expression_id": 4480,
- "situation": "对模糊概念试图澄清",
- "style": "就是那个",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:24.290415"
- },
- {
- "expression_id": 1229,
- "situation": "认同他人,却无奈叹息",
- "style": "现在知道了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:26.410167"
- },
- {
- "expression_id": 2830,
- "situation": "用无厘头称呼调侃转移话题",
- "style": "喊我妈妈",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:30.378153"
- },
- {
- "expression_id": 4893,
- "situation": "对他人发言表示质疑并要求澄清",
- "style": "追问 不是什么不是",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:34.264776"
- },
- {
- "expression_id": 4792,
- "situation": "对他人发言进行重复引用并追问",
- "style": "[回复 X:Y],说:@X Z",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:54:37.200473"
- },
- {
- "expression_id": 3764,
- "situation": "表达对某事物不信任或不安",
- "style": "感觉不太踏实",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:04.670326"
- },
- {
- "expression_id": 1452,
- "situation": "技术建议常被忽视或质疑",
- "style": "表示赞同并称呼对方昵称",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:08.054725"
- },
- {
- "expression_id": 559,
- "situation": "面对模糊指令时表达困惑",
- "style": "连续发送 ?",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:09.717423"
- },
- {
- "expression_id": 5559,
- "situation": "用伪文言文语气调侃时",
- "style": "使用 汝...之...也乎 句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:14.565000"
- },
- {
- "expression_id": 1077,
- "situation": "模型回避身份提问",
- "style": "使用 你是...吗 直接提问",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:16.844791"
- },
- {
- "expression_id": 2634,
- "situation": "玩家常以夸张或幽默方式描述游戏行为",
- "style": "使用 开大车",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:18.652901"
- },
- {
- "expression_id": 5113,
- "situation": "请求帮助时情绪焦急",
- "style": "使用 救一救孩子",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:20.468956"
- },
- {
- "expression_id": 2211,
- "situation": "调侃他人突然自曝身份",
- "style": "我搭的,看我看我",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:22.364349"
- },
- {
- "expression_id": 4358,
- "situation": "提及特定模型",
- "style": "使用 大香蕉",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T10:55:26.196360"
- },
- {
- "expression_id": 3525,
- "situation": "拉取失败时表达无奈",
- "style": "拉不下来了草",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:17.912143"
- },
- {
- "expression_id": 1035,
- "situation": "任务无法及时完成",
- "style": "说明时间安排",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:19.288011"
- },
- {
- "expression_id": 3327,
- "situation": "对技术实现失败归因于提示词",
- "style": "什么都好,但死在了怎么写提示词了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:23.344335"
- },
- {
- "expression_id": 4832,
- "situation": "回应他人调侃时反向调侃",
- "style": "叫妈妈",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:24.559084"
- },
- {
- "expression_id": 812,
- "situation": "赞同他人发言",
- "style": "使用单个词语如“豪德”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:27.127242"
- },
- {
- "expression_id": 3139,
- "situation": "图片质量讨论中的常见争议",
- "style": "远看有艺术感,近看糊",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:32.199112"
- },
- {
- "expression_id": 4113,
- "situation": "对他人发言表示共鸣式附和",
- "style": "3.2喜欢用...",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:33.742935"
- },
- {
- "expression_id": 1764,
- "situation": "发现配置错误引发问题",
- "style": "使用“事实证明”引出结论",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:37.238756"
- },
- {
- "expression_id": 1673,
- "situation": "评价他人发言时态度谨慎",
- "style": "先肯定再转折提出缺点",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:48.790587"
- },
- {
- "expression_id": 5089,
- "situation": "描述对某事物投入情感的演变",
- "style": "使用 第一章...第N章...的叙事体",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:53.574395"
- },
- {
- "expression_id": 756,
- "situation": "回避他人隐藏动态的回应行为",
- "style": "233",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:57.806483"
- },
- {
- "expression_id": 510,
- "situation": "反驳他人时轻描淡写",
- "style": "没",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:28:59.957683"
- },
- {
- "expression_id": 1655,
- "situation": "质疑模型是否具备思考能力",
- "style": "不思考的可以吗",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:02.485807"
- },
- {
- "expression_id": 3923,
- "situation": "对不合理或荒诞观点表示无奈认同",
- "style": "说 没事,我也是...",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:06.998147"
- },
- {
- "expression_id": 3721,
- "situation": "对他人质疑进行反问式挑衅",
- "style": "懂什么辣",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:09.238207"
- },
- {
- "expression_id": 742,
- "situation": "对技术方案敷衍认可合规",
- "style": "很擦边",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:12.613919"
- },
- {
- "expression_id": 5701,
- "situation": "对他人提问表示认同",
- "style": "true",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:13.733950"
- },
- {
- "expression_id": 3137,
- "situation": "震惊或困惑的反应",
- "style": "使用 我草了老铁",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:14.949674"
- },
- {
- "expression_id": 4237,
- "situation": "对他人创作内容表示崇拜",
- "style": "我写那种不乖的小孩被奇怪app拐走的文章",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:17.029639"
- },
- {
- "expression_id": 1941,
- "situation": "回应建议时提出修正",
- "style": "指出具体点并否定原方案",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:20.702203"
- },
- {
- "expression_id": 3078,
- "situation": "遇责推诿,不愿担责",
- "style": "说 你自己的锅啊 / 你自己发的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:27.717281"
- },
- {
- "expression_id": 5294,
- "situation": "表达不确定或反问语气时",
- "style": "句末添加空括号“()”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:30.253213"
- },
- {
- "expression_id": 4892,
- "situation": "对他人突然文言文表示无奈",
- "style": "吐槽 同一个毛病",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:36.428512"
- },
- {
- "expression_id": 5724,
- "situation": "回应模糊或无效反馈时的无奈",
- "style": "人机",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:38.380509"
- },
- {
- "expression_id": 2366,
- "situation": "随和回应他人提议",
- "style": "那我也试试",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:43.020770"
- },
- {
- "expression_id": 2201,
- "situation": "无奈或荒谬时的无奈回应",
- "style": "使用‘难蚌’、‘难评’等谐音梗",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:50.148141"
- },
- {
- "expression_id": 1620,
- "situation": "敏感或违规发言需处理",
- "style": "声称已向国安举报,带威胁语气",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:52.771962"
- },
- {
- "expression_id": 3214,
- "situation": "资源受限下的讨论",
- "style": "使用'没内存'",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:29:58.091857"
- },
- {
- "expression_id": 2988,
- "situation": "用幽默回避敏感话题",
- "style": "用‘课上也能发’‘刺激’等词淡化场合禁忌",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:00.628771"
- },
- {
- "expression_id": 3875,
- "situation": "模糊指代某群体",
- "style": "使用 你们形成",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:01.988036"
- },
- {
- "expression_id": 3808,
- "situation": "对他人观点表示质疑或否定",
- "style": "感觉还是智商欠缺",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:11.635279"
- },
- {
- "expression_id": 2114,
- "situation": "对他人言行表示惊讶或不解",
- "style": "?",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:12.619580"
- },
- {
- "expression_id": 3308,
- "situation": "用括号补充调侃性备注",
- "style": "(...)",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:14.219061"
- },
- {
- "expression_id": 2728,
- "situation": "压力下用极端语气表达崩溃",
- "style": "杀了我吧",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:16.523568"
- },
- {
- "expression_id": 5051,
- "situation": "故意曲解词语制造谐音梗",
- "style": "将'淫秽信息'替换为'银灰信息'",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:19.555559"
- },
- {
- "expression_id": 5233,
- "situation": "评价他人行为不合规矩时",
- "style": "使用 不合乎周礼",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:23.826708"
- },
- {
- "expression_id": 2096,
- "situation": "对复杂问题的负面反应",
- "style": "草",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:25.123482"
- },
- {
- "expression_id": 2924,
- "situation": "强烈质疑某事",
- "style": "使用单个“?”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:25.994508"
- },
- {
- "expression_id": 2763,
- "situation": "轻度反驳或调侃他人建议",
- "style": "肯定是你正在用的才要分享啊",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:30.123092"
- },
- {
- "expression_id": 2476,
- "situation": "技术原理与原因的解释",
- "style": "使用 因为...用了...",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:31.995015"
- },
- {
- "expression_id": 4210,
- "situation": "对他人转发的荒诞内容表示否定",
- "style": "见鬼",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:35.203748"
- },
- {
- "expression_id": 3991,
- "situation": "对某人能力表示贬低但带幽默",
- "style": "说'好菜'并集体跟风",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:38.818700"
- },
- {
- "expression_id": 3223,
- "situation": "接受无奈并自嘲",
- "style": "使用 不知道就不知道吧ⁿ",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:45.169767"
- },
- {
- "expression_id": 3892,
- "situation": "提醒注意健康时",
- "style": "使用 请停止麦麦开发",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:46.833699"
- },
- {
- "expression_id": 3359,
- "situation": "表达自己资源充裕且不需使用",
- "style": "我都用不完",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:52.386001"
- },
- {
- "expression_id": 4192,
- "situation": "对不明款项提出疑问",
- "style": "这是什么钱",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:30:54.210075"
- },
- {
- "expression_id": 4627,
- "situation": "对版本差异感到困惑时",
- "style": "用‘怎么一会儿...一会...’表达疑惑",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:31:02.825611"
- },
- {
- "expression_id": 2221,
- "situation": "对他人行为感到无语",
- "style": "使用“很唐了”评价",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:31:04.513956"
- },
- {
- "expression_id": 4999,
- "situation": "质疑他人发言的合理性",
- "style": "为什么会说出这种话",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:31:05.737494"
- },
- {
- "expression_id": 3774,
- "situation": "对触发机制感到荒谬",
- "style": "群友发色图也触发麦麦哈气",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:31:07.193435"
- },
- {
- "expression_id": 3395,
- "situation": "对某现象进行归因并简化表达",
- "style": "因为...特别容易被提取到",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:04.528007"
- },
- {
- "expression_id": 3333,
- "situation": "回应荒谬言论时进行反讽式认同",
- "style": "使用‘是吧’‘说是’结尾",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:06.574729"
- },
- {
- "expression_id": 4752,
- "situation": "催促对方检查状态",
- "style": "你看看",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:07.830497"
- },
- {
- "expression_id": 5673,
- "situation": "评价某个事物或领域时",
- "style": "使用“XX这一块”作为结尾",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:09.614544"
- },
- {
- "expression_id": 5269,
- "situation": "重复他人话语以示调侃",
- "style": "重复前句并加语气词",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:11.294108"
- },
- {
- "expression_id": 3912,
- "situation": "表达个人目标时",
- "style": "自嘲式‘妄想’表述",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:13.917935"
- },
- {
- "expression_id": 3728,
- "situation": "表达体验后正面评价",
- "style": "感觉不错",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:15.278186"
- },
- {
- "expression_id": 2802,
- "situation": "纠正术语错误",
- "style": "不对,ssl",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:16.270299"
- },
- {
- "expression_id": 432,
- "situation": "重复回应时坚持自己正确",
- "style": "是刚刚的我没说错",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:18.133442"
- },
- {
- "expression_id": 5524,
- "situation": "对他人方案表示认可但无热情",
- "style": "随便吧",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:20.166496"
- },
- {
- "expression_id": 792,
- "situation": "事情顺利恢复",
- "style": "使用 又回来了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:23.773650"
- },
- {
- "expression_id": 3465,
- "situation": "对他人言论进行粗俗辱骂",
- "style": "nmsl啊米诺斯",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:25.101676"
- },
- {
- "expression_id": 2604,
- "situation": "故作深奥地炫耀技术",
- "style": "说“我见过更残酷的公式”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:26.942360"
- },
- {
- "expression_id": 961,
- "situation": "模仿程序异常反应",
- "style": "复读+喵喵模式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:29.406044"
- },
- {
- "expression_id": 929,
- "situation": "表达惊讶或意外的反应",
- "style": "使用“好神奇”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:30.717473"
- },
- {
- "expression_id": 4349,
- "situation": "对技术问题归因于环境",
- "style": "env有冲突",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:33.236816"
- },
- {
- "expression_id": 3222,
- "situation": "聊天中融入奇幻元素",
- "style": "使用 神秘梦境",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:34.813846"
- },
- {
- "expression_id": 1609,
- "situation": "调侃中化解技术难题",
- "style": "要不要fork...(雾)",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:36.197532"
- },
- {
- "expression_id": 5545,
- "situation": "对他人操作表示认可",
- "style": "OK",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:37.092972"
- },
- {
- "expression_id": 4678,
- "situation": "表达对新版本的期待",
- "style": "用 '快点端上来罢' 带有催促的口语化请求",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:38.677816"
- },
- {
- "expression_id": 561,
- "situation": "无奈应对突发任务",
- "style": "使用 哎mcp",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:47.158634"
- },
- {
- "expression_id": 5695,
- "situation": "吐槽对方不提供日志瞎猜时",
- "style": "戏称对方 让我算命",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:50.357614"
- },
- {
- "expression_id": 842,
- "situation": "确认事物归属或来源",
- "style": "应该是的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:56.220459"
- },
- {
- "expression_id": 317,
- "situation": "对不合理现象震惊崩溃",
- "style": "心累啊……",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:57.687150"
- },
- {
- "expression_id": 2023,
- "situation": "用户表达对技术问题的困惑",
- "style": "使用 分不清哪一块是废弃代码",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:44:59.900852"
- },
- {
- "expression_id": 4407,
- "situation": "对消失事物进行荒诞推测",
- "style": "编造神秘场景描述",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:45:08.219787"
- },
- {
- "expression_id": 5438,
- "situation": "被指责时反讽回应",
- "style": "禁言你试试",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:45:09.860292"
- },
- {
- "expression_id": 1312,
- "situation": "反复呼唤被忽视的对方",
- "style": "老婆",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:45:11.396310"
- },
- {
- "expression_id": 4138,
- "situation": "对他人困境表示夸张的共情",
- "style": "冻傻了走着走着被创飞的可能性更大一些",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:45:13.155944"
- },
- {
- "expression_id": 5077,
- "situation": "重复他人话语以强化戏谑",
- "style": "就赖你喵",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:45:14.716376"
- },
- {
- "expression_id": 3206,
- "situation": "资源使用讨论中常见争议",
- "style": "强调免费资源价值",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:46:54.961181"
- },
- {
- "expression_id": 249,
- "situation": "夸大技术参数描述",
- "style": "228000W闪充",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:46:56.568483"
- },
- {
- "expression_id": 5487,
- "situation": "对技术实现表示怀疑",
- "style": "思路有,能实现吗",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:46:58.617160"
- },
- {
- "expression_id": 2348,
- "situation": "指出他人明显错误的价格信息",
- "style": "使用反问句质疑,如“没这么便宜吧”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:00.416830"
- },
- {
- "expression_id": 693,
- "situation": "无奈面对网络或服务器问题",
- "style": "使用“妈的真服了”等口语化抱怨",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:01.776104"
- },
- {
- "expression_id": 2275,
- "situation": "认同他人建议的回应",
- "style": "使用 6 表示认可",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:03.328723"
- },
- {
- "expression_id": 2652,
- "situation": "对话中常表达疑问或不确定",
- "style": "使用 似乎...根本...",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:06.184957"
- },
- {
- "expression_id": 2944,
- "situation": "游戏配置讨论中常出现性能对比",
- "style": "使用'样板/全套'术语",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:08.448459"
- },
- {
- "expression_id": 2989,
- "situation": "认可社区管理变化",
- "style": "还行吧",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:10.544008"
- },
- {
- "expression_id": 2080,
- "situation": "对计划略感惊讶。",
- "style": "使用 这么强",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:11.873988"
- },
- {
- "expression_id": 708,
- "situation": "强调功能或架构的独立性",
- "style": "使用“完全分离”“独立运行”",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:15.584648"
- },
- {
- "expression_id": 1003,
- "situation": "对技术细节难以置信",
- "style": "这么牛",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:17.160705"
- },
- {
- "expression_id": 2200,
- "situation": "行为幼稚被指出",
- "style": "使用‘小孩哥’等戏谑称呼",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:18.575640"
- },
- {
- "expression_id": 3650,
- "situation": "对他人操作表示恍然大悟",
- "style": "原来如此",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:19.784532"
- },
- {
- "expression_id": 2445,
- "situation": "质疑异常现象,提出反问",
- "style": "是不是选的...",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:21.479720"
- },
- {
- "expression_id": 1360,
- "situation": "自嘲懒惰或拖延时的幽默表达",
- "style": "使用 懒狗 自称,并描述短暂行动",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:23.568435"
- },
- {
- "expression_id": 3084,
- "situation": "冷淡附和无厘头发言",
- "style": "说 有道理 / ?",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:24.719714"
- },
- {
- "expression_id": 2806,
- "situation": "对未知事物常持质疑态度",
- "style": "有啥好...",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:26.992984"
- },
- {
- "expression_id": 1925,
- "situation": "技术讨论中直接给出操作建议",
- "style": "使用 爆一下",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:28.119852"
- },
- {
- "expression_id": 5222,
- "situation": "回应技术话题时轻描淡写",
- "style": "说还不去教人写插件",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T12:47:29.935147"
- },
- {
- "expression_id": 1721,
- "situation": "回应挑衅时",
- "style": "逗狗玩/你也当真",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:19.039617"
- },
- {
- "expression_id": 2156,
- "situation": "提及技术使用条件",
- "style": "有魔法才行",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:21.348062"
- },
- {
- "expression_id": 1973,
- "situation": "要求SELF执行程序指令时",
- "style": "写一个关机程序并立即执行",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:22.991510"
- },
- {
- "expression_id": 5469,
- "situation": "功能使用不便被多次提及",
- "style": "不得不吐槽",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:26.655334"
- },
- {
- "expression_id": 467,
- "situation": "表达认同",
- "style": "应该是的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:27.671871"
- },
- {
- "expression_id": 2062,
- "situation": "强调SELF的群体归属时",
- "style": "安卓AI",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:28.702077"
- },
- {
- "expression_id": 5067,
- "situation": "关注AI模型的版本差异与更新特性。",
- "style": "Claude4.5吧",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:30.351386"
- },
- {
- "expression_id": 3634,
- "situation": "购买新装备的动机和考虑因素。",
- "style": "不然哪有钱换新xxx",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:33.575785"
- },
- {
- "expression_id": 5268,
- "situation": "自嘲自身状态不佳",
- "style": "我烂完了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:34.768094"
- },
- {
- "expression_id": 3890,
- "situation": "揣测他人后期操作",
- "style": "你这个是...了吧",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:38.615499"
- },
- {
- "expression_id": 2838,
- "situation": "聊天情境表达强烈赞叹或惊讶。",
- "style": "太吊了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:45.118985"
- },
- {
- "expression_id": 4144,
- "situation": "软件出现异常问题时的评价描述。",
- "style": "神秘化表述+缩写简称",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:46.439213"
- },
- {
- "expression_id": 5800,
- "situation": "对他人言论感到震惊或不适",
- "style": "我去这我都不敢看",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:49.119702"
- },
- {
- "expression_id": 2835,
- "situation": "发现新功能支持",
- "style": "哦原来支持了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:52.662444"
- },
- {
- "expression_id": 70,
- "situation": "对他人给出的答案感到疑惑需要进一步了解",
- "style": "有多低",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:03:58.647406"
- },
- {
- "expression_id": 590,
- "situation": "想嘲讽对话缺乏人性",
- "style": "说话和个bot一样",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:04:00.014505"
- },
- {
- "expression_id": 3130,
- "situation": "反驳他人关于内容推荐算法的观点",
- "style": "不是",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:04:02.727223"
- },
- {
- "expression_id": 904,
- "situation": "想转移矛盾时",
- "style": "你家孩子欺负我家宝宝",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:04:04.543511"
- },
- {
- "expression_id": 4362,
- "situation": "误入群聊后解释原因",
- "style": "随便点了一个",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:04:06.750398"
- },
- {
- "expression_id": 2829,
- "situation": "对现状表达负面评价。",
- "style": "要完蛋/完了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:04:10.054870"
- },
- {
- "expression_id": 7047,
- "situation": "对离谱内容无奈回应",
- "style": "确实挺让人无语的",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:26:54.967027"
- },
- {
- "expression_id": 14104,
- "situation": "自嘲式道歉",
- "style": "对不起我的好兄弟啊",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:26:57.071384"
- },
- {
- "expression_id": 14331,
- "situation": "用帧数对比硬件性能",
- "style": "鸡血就能稳110帧",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:26:58.607418"
- },
- {
- "expression_id": 407,
- "situation": "技能缺失时沟通受阻",
- "style": "不会游泳",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:00.358280"
- },
- {
- "expression_id": 1598,
- "situation": "极端对比化解硬件焦虑",
- "style": "e5e3感觉完全没有性价比了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:05.374562"
- },
- {
- "expression_id": 1439,
- "situation": "经济压力下消费无奈",
- "style": "本地赚钱本地花",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:07.391032"
- },
- {
- "expression_id": 4270,
- "situation": "质疑配置实际可用性",
- "style": "咋不能用插?",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:09.934813"
- },
- {
- "expression_id": 1807,
- "situation": "AMD历史策略被反复提及",
- "style": "快死了就卖XX",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:12.998093"
- },
- {
- "expression_id": 3134,
- "situation": "讨论内存频率限制问题",
- "style": "可惜我这个XX上不了XXX",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:15.286033"
- },
- {
- "expression_id": 12694,
- "situation": "确认对方提议",
- "style": "语气词加简短肯定",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:20.630558"
- },
- {
- "expression_id": 12751,
- "situation": "讨论产品来源时信息模糊",
- "style": "提及'OEM盘'等专业概念",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:24.510050"
- },
- {
- "expression_id": 13259,
- "situation": "轻蔑否定他人观点",
- "style": "单字回复如 hh",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:26.294114"
- },
- {
- "expression_id": 8063,
- "situation": "引导他人前往指定地点",
- "style": "在哪儿呢?让我也过去体验一下",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:27.950256"
- },
- {
- "expression_id": 10884,
- "situation": "羡慕他人成绩",
- "style": "羡慕欧神",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:30.301667"
- },
- {
- "expression_id": 6751,
- "situation": "试图用幽默或玩梗活跃气氛",
- "style": "残樱早安",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:31.926564"
- },
- {
- "expression_id": 5382,
- "situation": "彻底崩溃,不想玩了",
- "style": "这游戏我玩不动了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:33.653993"
- },
- {
- "expression_id": 5520,
- "situation": "回应硬件兼容性问题",
- "style": "都能带",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:35.142178"
- },
- {
- "expression_id": 12260,
- "situation": "回应模糊提问时态度含糊",
- "style": "答非所问+诗意回应",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:36.493653"
- },
- {
- "expression_id": 11226,
- "situation": "聊天中表达愤怒与不满",
- "style": "使用日语骂人词汇加表情符号",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:38.550163"
- },
- {
- "expression_id": 13120,
- "situation": "被指出错误时礼貌回应",
- "style": "指正",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:44.261709"
- },
- {
- "expression_id": 8441,
- "situation": "建议方案常被忽视或拒绝",
- "style": "使用\"我建议是\"开头",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:52.766043"
- },
- {
- "expression_id": 13458,
- "situation": "简单归因操作原因",
- "style": "因为包满了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:54.157555"
- },
- {
- "expression_id": 656,
- "situation": "犹豫不决选品牌或型号",
- "style": "我想买XXX",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:27:58.125073"
- },
- {
- "expression_id": 4452,
- "situation": "回应他人提及的设备",
- "style": "你那个我当时有听过吗",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:00.742094"
- },
- {
- "expression_id": 13255,
- "situation": "认同非主流系统使用",
- "style": "还是...好玩",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:03.229237"
- },
- {
- "expression_id": 13441,
- "situation": "表达惊讶或不满",
- "style": "使用粗俗表达",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:04.860968"
- },
- {
- "expression_id": 5957,
- "situation": "价格信息实时更新",
- "style": "w3-2w4",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:06.237835"
- },
- {
- "expression_id": 9732,
- "situation": "羡慕他人经历",
- "style": "使用慕等简短表达",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:08.108808"
- },
- {
- "expression_id": 9115,
- "situation": "询问性别时回应含糊或回避",
- "style": "使用'你猜'来回应",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:09.612616"
- },
- {
- "expression_id": 13910,
- "situation": "游戏更新慢,玩家期待提升",
- "style": "使用md等语气词表达不满",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:10.957259"
- },
- {
- "expression_id": 7319,
- "situation": "用自嘲反差回应夸赞",
- "style": "除了哈气什么都不会",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:15.565359"
- },
- {
- "expression_id": 702,
- "situation": "略带嘲讽,暗藏得意",
- "style": "[表情:偷笑]",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:16.565295"
- },
- {
- "expression_id": 4922,
- "situation": "用游戏术语包装主观判断",
- "style": "红温王之力起手",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:18.605203"
- },
- {
- "expression_id": 11947,
- "situation": "转发消息以共鸣或调侃",
- "style": "转发并保留原消息格式",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:20.765222"
- },
- {
- "expression_id": 13209,
- "situation": "回忆过往经历时的对话",
- "style": "用'上次这样子'开头",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:22.837351"
- },
- {
- "expression_id": 4431,
- "situation": "提醒注意消费风险",
- "style": "真得小心点才行对得起这价格",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:24.077508"
- },
- {
- "expression_id": 10154,
- "situation": "惊讶于物品价格极低",
- "style": "快和我XXX一样的价了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:27.940540"
- },
- {
- "expression_id": 12377,
- "situation": "群内颁奖互动温馨有趣",
- "style": "使用 @人名 + 最xx 的句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:30.621031"
- },
- {
- "expression_id": 10267,
- "situation": "冷淡回应他人疑问",
- "style": "我刚好出",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:34.724283"
- },
- {
- "expression_id": 11121,
- "situation": "调侃或惊讶回应他人发言",
- "style": "说:我靠,你很久之前发的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:37.468246"
- },
- {
- "expression_id": 13390,
- "situation": "对他人发言表现出不屑态度",
- "style": "你傻了吧",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:40.956306"
- },
- {
- "expression_id": 12585,
- "situation": "自豪使用老旧设备",
- "style": "还在服役,健康度还是100%",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:43.244243"
- },
- {
- "expression_id": 12445,
- "situation": "用短词收尾或转移话题",
- "style": "粥",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:44.772466"
- },
- {
- "expression_id": 9420,
- "situation": "拒绝亲密互动请求",
- "style": "不要",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:46.940296"
- },
- {
- "expression_id": 5451,
- "situation": "澄清概念歧义",
- "style": "否定性等式表达,如‘萌新≠🦐’",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:49.484077"
- },
- {
- "expression_id": 13988,
- "situation": "用简短感叹回应他人",
- "style": "回复‘哈哈哈’",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:54.757175"
- },
- {
- "expression_id": 6927,
- "situation": "突出套餐高性价比",
- "style": "送XXX",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:28:57.891739"
- },
- {
- "expression_id": 6982,
- "situation": "以地域或机构为由限制创作自由",
- "style": "要是饺子去美国拍,拍出来也是这种东西",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:02.108265"
- },
- {
- "expression_id": 1220,
- "situation": "互动中需确认事实",
- "style": "简短疑问+状态确认",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:06.852099"
- },
- {
- "expression_id": 13238,
- "situation": "比较产品性价比时的对话",
- "style": "使用 这不比...香 的对比句式",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:08.747487"
- },
- {
- "expression_id": 4367,
- "situation": "嘲讽不合常理的言论",
- "style": "油亮那种",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:12.556811"
- },
- {
- "expression_id": 5223,
- "situation": "偏好隐晦调侃,回避直白否定",
- "style": "不喜欢吃生的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:17.195185"
- },
- {
- "expression_id": 9939,
- "situation": "吐槽电脑折腾人",
- "style": "使用反问句表达观点",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:18.843517"
- },
- {
- "expression_id": 2545,
- "situation": "回应复杂硬件配置",
- "style": "双路2698b v3+-30鸡血+双铜管",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:20.451810"
- },
- {
- "expression_id": 14530,
- "situation": "硬件损坏时用户常表达无奈与焦急",
- "style": "使用'炸了'形容故障",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:23.699650"
- },
- {
- "expression_id": 13447,
- "situation": "调侃式解读模糊信息",
- "style": "用 画饼 替代 空谈或虚假承诺",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:25.251309"
- },
- {
- "expression_id": 2882,
- "situation": "否定物品的实用价值",
- "style": "买了之后纯摆设",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:29.116293"
- },
- {
- "expression_id": 1103,
- "situation": "以冷感附和应对荒诞对话",
- "style": "()",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:30.435544"
- },
- {
- "expression_id": 10018,
- "situation": "强调理论上的可能性",
- "style": "使用'理论上都会'加强语气",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:32.363156"
- },
- {
- "expression_id": 9095,
- "situation": "暗示替代方案更优",
- "style": "加XX可以买XX",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:34.163820"
- },
- {
- "expression_id": 1797,
- "situation": "调侃对方有异常生理反应",
- "style": "喘的老厉害了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:46.723560"
- },
- {
- "expression_id": 13804,
- "situation": "对二手主板持谨慎态度",
- "style": "使用'不敢碰了,别问为什么'的隐晦表达",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:49.578844"
- },
- {
- "expression_id": 5877,
- "situation": "调侃荒诞,无奈以对",
- "style": "有点逆天",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:53.114753"
- },
- {
- "expression_id": 7112,
- "situation": "讨论硬件配置优劣",
- "style": "两条8比一条8一条16强吗",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:54.683008"
- },
- {
- "expression_id": 2951,
- "situation": "聊敏感尴尬经历",
- "style": "皮燕子有点疼,不能让他进来",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:29:58.474757"
- },
- {
- "expression_id": 5029,
- "situation": "借模糊权威支撑观点",
- "style": "都还有直播的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:01.386679"
- },
- {
- "expression_id": 13354,
- "situation": "被骗时描述经历",
- "style": "使用'被坑了'表达上当",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:02.995623"
- },
- {
- "expression_id": 6538,
- "situation": "否认参与相关活动",
- "style": "不太[游戏/行为]",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:04.874409"
- },
- {
- "expression_id": 7418,
- "situation": "反对简化方案",
- "style": "这多没意思啊",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:06.458290"
- },
- {
- "expression_id": 9182,
- "situation": "游戏内事件多场景互动描述",
- "style": "使用重复强调严重性",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:09.066521"
- },
- {
- "expression_id": 6136,
- "situation": "告知商品缺货情况",
- "style": "现在没货了",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:13.161961"
- },
- {
- "expression_id": 225,
- "situation": "解释无法分享内容时",
- "style": "只有一帧可以发",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:15.785816"
- },
- {
- "expression_id": 3948,
- "situation": "抱怨快递运费太高",
- "style": "起X斤了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:17.242756"
- },
- {
- "expression_id": 8024,
- "situation": "随意提出替代建议",
- "style": "用海晶灯",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:18.970183"
- },
- {
- "expression_id": 7911,
- "situation": "贬低国产竞品以抬高自身产品",
- "style": "豆包快速响应都比ds深度思考有质量",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:20.618513"
- },
- {
- "expression_id": 2997,
- "situation": "对他人自述反应异常",
- "style": "怪",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:23.498212"
- },
- {
- "expression_id": 8918,
- "situation": "装傻回应他人调侃",
- "style": "天冷了穿袜子有问题吗",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:25.234428"
- },
- {
- "expression_id": 10834,
- "situation": "偏好特定角色表达",
- "style": "使用明确比较级表达",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:29.434354"
- },
- {
- "expression_id": 3385,
- "situation": "文化现象常被误解或过度解读",
- "style": "吹雪果然出了国外就被扒衣服了",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:33.233914"
- },
- {
- "expression_id": 11395,
- "situation": "讨论硬件性价比时注重实用与成本平衡",
- "style": "用核数对比表达优势",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T15:30:35.065827"
- },
- {
- "situation": "承认自己能力有限但仍愿尝试",
- "style": "那我尽力23333",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:32:52.388737"
- },
- {
- "situation": "游戏运行卡顿影响体验",
- "style": "用夸张比喻形容帧率低下",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:32:56.114331"
- },
- {
- "situation": "回应模糊问题,彰显冷门话题存在感",
- "style": "有的",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:32:58.278949"
- },
- {
- "situation": "用户询问产品真实体验对比",
- "style": "使用疑问句加表情符号",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:00.620736"
- },
- {
- "situation": "对某话题感到厌倦或无奈",
- "style": "...",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:01.669811"
- },
- {
- "situation": "无奈应对硬件圈的荒诞日常",
- "style": "难怪这么熟练",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:03.797481"
- },
- {
- "situation": "回应困境时轻蔑否定对方能力",
- "style": "fw",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:06.650547"
- },
- {
- "situation": "担心硬件安装过程",
- "style": "使用 好吓人 表达不安",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:08.252688"
- },
- {
- "situation": "低成本方案引发共鸣",
- "style": "用 只有...这条路了 表示别无选择",
- "suitable": true,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:09.988369"
- },
- {
- "situation": "试图解释词源或文化梗",
- "style": "这个是日语的空耳",
- "suitable": false,
- "reason": null,
- "evaluator": "manual",
- "evaluated_at": "2025-12-26T16:33:12.429537"
- }
- ]
-}
\ No newline at end of file
diff --git a/scripts/preview_reply_effect_scores.py b/scripts/preview_reply_effect_scores.py
index 86648393..4b43b17e 100644
--- a/scripts/preview_reply_effect_scores.py
+++ b/scripts/preview_reply_effect_scores.py
@@ -14,6 +14,7 @@ DEFAULT_LOG_DIR = Path("logs") / "maisaka_reply_effect"
DEFAULT_MANUAL_DIR = Path("logs") / "maisaka_reply_effect_manual"
DEFAULT_HOST = "127.0.0.1"
DEFAULT_PORT = 8765
+DEFAULT_RECORD_LIMIT = 20
def normalize_name(value: str) -> str:
@@ -49,21 +50,14 @@ class ReplyEffectRepository:
for chat_dir in sorted(path for path in self.log_dir.iterdir() if path.is_dir()):
records = list(chat_dir.glob("*.json"))
- annotated_count = sum(1 for record_file in records if self._annotation_path(chat_dir.name, record_file).exists())
- finalized_count = 0
- pending_count = 0
- for record_file in records:
- payload = load_json_file(record_file)
- if payload.get("status") == "finalized":
- finalized_count += 1
- else:
- pending_count += 1
+ annotation_dir = self.manual_dir / normalize_name(chat_dir.name)
+ annotated_count = len(list(annotation_dir.glob("*.json"))) if annotation_dir.exists() else 0
chats.append(
{
"chat_id": chat_dir.name,
"record_count": len(records),
- "finalized_count": finalized_count,
- "pending_count": pending_count,
+ "finalized_count": None,
+ "pending_count": None,
"annotated_count": annotated_count,
}
)
@@ -75,9 +69,15 @@ class ReplyEffectRepository:
chat_id: str | None = None,
status: str = "",
annotated: str = "",
- ) -> list[dict[str, Any]]:
+ limit: int = DEFAULT_RECORD_LIMIT,
+ offset: int = 0,
+ ) -> dict[str, Any]:
records: list[dict[str, Any]] = []
- for record_file in self._iter_record_files(chat_id):
+ normalized_limit = max(1, min(1000, int(limit or DEFAULT_RECORD_LIMIT)))
+ normalized_offset = max(0, int(offset or 0))
+ matched_count = 0
+ has_more = False
+ for record_file in self._iter_record_files(chat_id, newest_first=True):
payload = load_json_file(record_file)
if not payload:
continue
@@ -88,16 +88,30 @@ class ReplyEffectRepository:
continue
if annotated == "no" and summary["manual"] is not None:
continue
+ matched_count += 1
+ if matched_count <= normalized_offset:
+ continue
+ if len(records) >= normalized_limit:
+ has_more = True
+ break
records.append(summary)
- return sorted(records, key=lambda item: str(item.get("created_at") or ""), reverse=True)
+ return {
+ "records": records,
+ "has_more": has_more,
+ "limit": normalized_limit,
+ "offset": normalized_offset,
+ }
- def get_record(self, chat_id: str, effect_id: str) -> dict[str, Any]:
+ def get_record(self, chat_id: str, effect_id: str, *, compact: bool = False) -> dict[str, Any]:
record_file = self._find_record_file(chat_id, effect_id)
if record_file is None:
return {}
payload = load_json_file(record_file)
if not payload:
return {}
+ self._strip_heavy_reply_metadata(payload)
+ if compact:
+ payload["context_snapshot"] = []
payload["_manual"] = self.get_annotation(chat_id, effect_id)
payload["_record_path"] = str(record_file)
return payload
@@ -114,7 +128,7 @@ class ReplyEffectRepository:
effect_id = normalize_name(str(payload.get("effect_id") or ""))
if not chat_id or chat_id == "unknown" or not effect_id or effect_id == "unknown":
raise ValueError("缺少 chat_id 或 effect_id")
- if self._find_record_file(chat_id, effect_id) is None:
+ if not self._record_exists(chat_id, effect_id):
raise ValueError("找不到对应的回复效果记录")
manual_score = payload.get("manual_score")
@@ -151,29 +165,56 @@ class ReplyEffectRepository:
write_json_file(self._annotation_path(chat_id, effect_id), annotation)
return annotation
- def _iter_record_files(self, chat_id: str | None = None) -> list[Path]:
+ def _iter_record_files(self, chat_id: str | None = None, *, newest_first: bool = False) -> list[Path]:
if not self.log_dir.exists():
return []
if chat_id:
chat_dir = self.log_dir / normalize_name(chat_id)
if not chat_dir.exists() or not chat_dir.is_dir():
return []
- return sorted(chat_dir.glob("*.json"))
+ return self._sort_record_files(chat_dir.glob("*.json"), newest_first=newest_first)
record_files: list[Path] = []
for chat_dir in self.log_dir.iterdir():
if chat_dir.is_dir():
record_files.extend(chat_dir.glob("*.json"))
- return record_files
+ return self._sort_record_files(record_files, newest_first=newest_first)
+
+ @staticmethod
+ def _sort_record_files(record_files: Any, *, newest_first: bool) -> list[Path]:
+ return sorted(
+ record_files,
+ key=lambda path: (path.stat().st_mtime, path.name),
+ reverse=newest_first,
+ )
def _find_record_file(self, chat_id: str, effect_id: str) -> Path | None:
normalized_effect_id = normalize_name(effect_id)
+ direct_match = self._direct_record_file(chat_id, effect_id)
+ if direct_match is not None:
+ return direct_match
for record_file in self._iter_record_files(chat_id):
payload = load_json_file(record_file)
if normalize_name(str(payload.get("effect_id") or "")) == normalized_effect_id:
return record_file
return None
+ def _record_exists(self, chat_id: str, effect_id: str) -> bool:
+ if self._direct_record_file(chat_id, effect_id) is not None:
+ return True
+ return self._find_record_file(chat_id, effect_id) is not None
+
+ def _direct_record_file(self, chat_id: str, effect_id: str) -> Path | None:
+ chat_dir = self.log_dir / normalize_name(chat_id)
+ if not chat_dir.exists() or not chat_dir.is_dir():
+ return None
+ normalized_effect_id = normalize_name(effect_id)
+ matches = sorted(chat_dir.glob(f"*_{normalized_effect_id}.json"))
+ if not matches:
+ exact_path = chat_dir / f"{normalized_effect_id}.json"
+ return exact_path if exact_path.exists() else None
+ return matches[-1]
+
def _annotation_path(self, chat_id: str, record_file_or_effect_id: Path | str) -> Path:
if isinstance(record_file_or_effect_id, Path):
payload = load_json_file(record_file_or_effect_id)
@@ -214,6 +255,24 @@ class ReplyEffectRepository:
return normalized_text
return f"{normalized_text[: limit - 1]}…"
+ @staticmethod
+ def _strip_heavy_reply_metadata(payload: dict[str, Any]) -> None:
+ reply = payload.get("reply")
+ if not isinstance(reply, dict):
+ return
+ metadata = reply.get("reply_metadata")
+ if not isinstance(metadata, dict):
+ return
+ monitor_detail = metadata.get("monitor_detail")
+ if not isinstance(monitor_detail, dict):
+ return
+ compact_detail: dict[str, Any] = {}
+ for key in ("metrics", "output_text", "extra_sections"):
+ value = monitor_detail.get(key)
+ if value:
+ compact_detail[key] = value
+ metadata["monitor_detail"] = compact_detail
+
class ReplyEffectPreviewHandler(BaseHTTPRequestHandler):
repository: ReplyEffectRepository
@@ -232,14 +291,17 @@ class ReplyEffectPreviewHandler(BaseHTTPRequestHandler):
chat_id=self._first(query, "chat_id"),
status=self._first(query, "status"),
annotated=self._first(query, "annotated"),
+ limit=self._int(query, "limit", DEFAULT_RECORD_LIMIT),
+ offset=self._int(query, "offset", 0),
)
- self._send_json({"records": records})
+ self._send_json(records)
return
if parsed.path == "/api/record":
query = parse_qs(parsed.query)
record = self.repository.get_record(
normalize_name(self._first(query, "chat_id")),
normalize_name(self._first(query, "effect_id")),
+ compact=self._first(query, "compact") in {"1", "true", "yes"},
)
if not record:
self._send_json({"error": "record not found"}, status=404)
@@ -360,6 +422,13 @@ class ReplyEffectPreviewHandler(BaseHTTPRequestHandler):
values = query.get(key) or [""]
return values[0]
+ @classmethod
+ def _int(cls, query: dict[str, list[str]], key: str, default: int) -> int:
+ try:
+ return int(cls._first(query, key) or default)
+ except ValueError:
+ return default
+
INDEX_HTML = r"""
@@ -563,7 +632,8 @@ INDEX_HTML = r"""
document.getElementById("detailPane").innerHTML = "选择一条记录查看详情";
}
- async function loadRecords() {
+ async function loadRecords(offset = recordOffset) {
+ recordOffset = Math.max(0, offset);
const params = new URLSearchParams();
if (selectedChat) params.set("chat_id", selectedChat);
const status = document.getElementById("statusFilter").value;
@@ -601,7 +671,7 @@ INDEX_HTML = r"""
selectedEffect = effectId;
renderChats();
renderRecords();
- const data = await api(`/api/record?chat_id=${encodeURIComponent(chatId)}&effect_id=${encodeURIComponent(effectId)}`);
+ const data = await api(`/api/record?chat_id=${encodeURIComponent(chatId)}&effect_id=${encodeURIComponent(effectId)}&compact=1`);
renderDetail(data.record);
}
@@ -748,6 +818,12 @@ INDEX_HTML_V2 = r"""
flex-wrap: wrap;
}
.header-tools { margin-bottom: 0; justify-content: flex-end; }
+ .global-evaluator {
+ width: 150px;
+ }
+ .global-evaluator {
+ width: 150px;
+ }
input, select, textarea, button {
font: inherit;
border: 1px solid var(--line);
@@ -868,6 +944,8 @@ INDEX_HTML_V2 = r"""
Maisaka 回复效果评分预览
@@ -1087,6 +1167,7 @@ INDEX_HTML_V2 = r"""
if (selectedChat) params.set("chat_id", selectedChat);
params.set("status", "finalized");
params.set("annotated", "no");
+ params.set("limit", String(RATING_QUEUE_LIMIT));
const data = await api(`/api/records?${params.toString()}`);
ratingQueue = data.records || [];
ratingIndex = 0;
@@ -1527,6 +1608,33 @@ INDEX_HTML_V3 = r"""
}
.message-name { font-weight: 650; color: var(--text); }
.message-text { white-space: pre-wrap; word-break: break-word; line-height: 1.45; }
+ .quote-card {
+ border-left: 3px solid var(--accent);
+ background: var(--accent-soft);
+ border-radius: 6px;
+ padding: 6px 8px;
+ margin: 0 0 6px;
+ font-size: 12px;
+ color: var(--muted);
+ }
+ .quote-card.missing {
+ border-left-color: var(--warn);
+ background: #fff7ed;
+ }
+ .quote-title {
+ display: flex;
+ justify-content: space-between;
+ gap: 8px;
+ margin-bottom: 3px;
+ font-weight: 650;
+ color: var(--text);
+ }
+ .quote-text {
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ line-height: 1.35;
+ }
.message-attachments {
display: flex;
gap: 6px;
@@ -1578,6 +1686,8 @@ INDEX_HTML_V3 = r"""
Maisaka 回复效果评分预览
@@ -2104,11 +2373,32 @@ INDEX_HTML_V3 = r"""
return "";
}
- function cleanMessageText(text) {
- return String(text || "")
- .replace(/\[图片\]/g, "")
- .replace(/\[表情包?\]/g, "")
- .trim();
+ function cleanMessageText(text, attachments = []) {
+ let normalized = stripVisibleMessagePrefix(String(text || "")).replace(/\[引用回复\]\([^)]+\)/g, "");
+ const shownAttachments = (attachments || []).filter(item => attachmentUrl(item));
+ if (shownAttachments.length) {
+ normalized = normalized
+ .replace(/\[图片\]/g, "")
+ .replace(/\[表情包?\]/g, "");
+ for (const attachment of shownAttachments) {
+ const content = String(attachment.content || "").trim();
+ if (!content) continue;
+ normalized = normalized.split(content).join("");
+ }
+ }
+ return normalized.trim();
+ }
+
+ function stripVisibleMessagePrefix(text) {
+ const parsed = parseVisibleText(text);
+ if (parsed.content && parsed.content !== text) return parsed.content;
+ return String(text || "");
+ }
+
+ function attachmentSummary(attachments) {
+ const count = Array.isArray(attachments) ? attachments.length : 0;
+ if (!count) return "";
+ return count === 1 ? "[图片]" : `[${count} 张图片]`;
}
function isBotContextMessage(message) {
@@ -2152,6 +2442,28 @@ INDEX_HTML_V3 = r"""
return element ? element.value : "";
}
+ function currentEvaluator() {
+ return valueOf("globalEvaluator").trim() || "manual";
+ }
+
+ function saveGlobalEvaluator() {
+ try {
+ localStorage.setItem("replyEffectEvaluator", currentEvaluator());
+ } catch (_err) {
+ return;
+ }
+ }
+
+ function restoreGlobalEvaluator() {
+ const input = document.getElementById("globalEvaluator");
+ if (!input) return;
+ try {
+ input.value = localStorage.getItem("replyEffectEvaluator") || "manual";
+ } catch (_err) {
+ input.value = "manual";
+ }
+ }
+
function scoreText(v) {
return v === null || v === undefined || v === "" ? "N/A" : Number(v).toFixed(1);
}
@@ -2174,6 +2486,7 @@ INDEX_HTML_V3 = r"""
return escapeHtml(value).replace(/`/g, "`");
}
+ restoreGlobalEvaluator();
reloadAll();
diff --git a/scripts/raw_data_preprocessor.py b/scripts/raw_data_preprocessor.py
deleted file mode 100644
index 6cca59d3..00000000
--- a/scripts/raw_data_preprocessor.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-from pathlib import Path
-import sys # 新增系统模块导入
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from src.chat.knowledge.utils.hash import get_sha256
-from src.common.logger import get_logger
-
-logger = get_logger("lpmm")
-ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-RAW_DATA_PATH = os.path.join(ROOT_PATH, "data/lpmm_raw_data")
-# IMPORTED_DATA_PATH = os.path.join(ROOT_PATH, "data/imported_lpmm_data")
-
-
-def _process_text_file(file_path):
- """处理单个文本文件,返回段落列表"""
- with open(file_path, "r", encoding="utf-8") as f:
- raw = f.read()
-
- paragraphs = []
- paragraph = ""
- for line in raw.split("\n"):
- if line.strip() == "":
- if paragraph != "":
- paragraphs.append(paragraph.strip())
- paragraph = ""
- else:
- paragraph += line + "\n"
-
- if paragraph != "":
- paragraphs.append(paragraph.strip())
-
- return paragraphs
-
-
-def _process_multi_files() -> list:
- raw_files = list(Path(RAW_DATA_PATH).glob("*.txt"))
- if not raw_files:
- logger.warning("警告: data/lpmm_raw_data 中没有找到任何 .txt 文件")
- sys.exit(1)
- # 处理所有文件
- all_paragraphs = []
- for file in raw_files:
- logger.info(f"正在处理文件: {file.name}")
- paragraphs = _process_text_file(file)
- all_paragraphs.extend(paragraphs)
- return all_paragraphs
-
-
-def load_raw_data() -> tuple[list[str], list[str]]:
- """加载原始数据文件
-
- 读取原始数据文件,将原始数据加载到内存中
-
- Args:
- path: 可选,指定要读取的json文件绝对路径
-
- Returns:
- - raw_data: 原始数据列表
- - sha256_list: 原始数据的SHA256集合
- """
- raw_paragraphs = _process_multi_files()
- sha256_list = []
- sha256_set = set()
- raw_data: list[str] = []
- for item in raw_paragraphs:
- if not isinstance(item, str):
- logger.warning(f"数据类型错误:{item}")
- continue
- pg_hash = get_sha256(item)
- if pg_hash in sha256_set:
- logger.warning(f"重复数据:{item}")
- continue
- sha256_set.add(pg_hash)
- sha256_list.append(pg_hash)
- raw_data.append(item)
- logger.info(f"共读取到{len(raw_data)}条数据")
-
- return sha256_list, raw_data
diff --git a/scripts/refresh_lpmm_knowledge.py b/scripts/refresh_lpmm_knowledge.py
deleted file mode 100644
index e70093a8..00000000
--- a/scripts/refresh_lpmm_knowledge.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import os
-import sys
-
-try:
- if hasattr(sys.stdout, "reconfigure"):
- sys.stdout.reconfigure(encoding="utf-8")
- if hasattr(sys.stderr, "reconfigure"):
- sys.stderr.reconfigure(encoding="utf-8")
-except Exception:
- pass
-
-# 确保能导入 src.*
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from src.common.logger import get_logger
-from src.config.config import global_config
-from src.chat.knowledge import lpmm_start_up, get_qa_manager
-
-logger = get_logger("refresh_lpmm_knowledge")
-
-
-def main() -> None:
- logger.info("开始刷新 LPMM 知识库(重新加载向量库与 KG)...")
-
- if not global_config.lpmm_knowledge.enable:
- logger.warning(
- "当前配置中 lpmm_knowledge.enable = false,本次仅刷新磁盘数据与内存结构,"
- "但聊天侧如未启用 LPMM 仍不会在问答中使用知识库。"
- )
-
- # 调用标准启动逻辑,内部会加载 data/embedding 与 data/rag
- lpmm_start_up()
-
- qa_manager = get_qa_manager()
- if qa_manager is None:
- logger.error("刷新后 qa_manager 仍为 None,请检查是否已经成功导入过 LPMM 知识库。")
- return
-
- # 简要输出当前知识库规模,方便人工确认
- embed_manager = qa_manager.embed_manager
- kg_manager = qa_manager.kg_manager
-
- para_vec = len(embed_manager.paragraphs_embedding_store.store)
- ent_vec = len(embed_manager.entities_embedding_store.store)
- rel_vec = len(embed_manager.relation_embedding_store.store)
- nodes = len(kg_manager.graph.get_node_list())
- edges = len(kg_manager.graph.get_edge_list())
-
- logger.info("LPMM 知识库刷新完成,当前规模:")
- logger.info(
- "段落向量=%d, 实体向量=%d, 关系向量=%d, KG节点=%d, KG边=%d",
- para_vec,
- ent_vec,
- rel_vec,
- nodes,
- edges,
- )
-
- print("\n[REFRESH] 刷新完成,请注意:")
- print("- 本脚本是在独立进程内执行的,用于验证磁盘数据可以正常加载。")
- print("- 若主程序已在运行且未在内部调用 lpmm_start_up() 重新初始化,仍需重启或新增管理入口来热刷新。")
- print("- 如果不清楚 lpmm_start_up 是什么,只需要重启主程序即可。")
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/replay_llm_request.py b/scripts/replay_llm_request.py
deleted file mode 100644
index d17ed79d..00000000
--- a/scripts/replay_llm_request.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# ruff: noqa: E402
-
-import argparse
-import asyncio
-import json
-import sys
-from pathlib import Path
-from typing import Any
-
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
-SRC_ROOT = PROJECT_ROOT / "src"
-if str(SRC_ROOT) not in sys.path:
- sys.path.insert(0, str(SRC_ROOT))
-if str(PROJECT_ROOT) not in sys.path:
- sys.path.insert(1, str(PROJECT_ROOT))
-
-from src.config.config import config_manager
-from src.llm_models.model_client.base_client import AudioTranscriptionRequest, ResponseRequest, client_registry
-from src.llm_models.model_client.base_client import EmbeddingRequest
-from src.llm_models.request_snapshot import (
- deserialize_messages_snapshot,
- deserialize_model_info_snapshot,
- deserialize_response_format_snapshot,
- deserialize_tool_options_snapshot,
-)
-
-
-def _load_snapshot(snapshot_path: Path) -> dict[str, Any]:
- """加载请求快照。"""
- return json.loads(snapshot_path.read_text(encoding="utf-8"))
-
-
-def _resolve_api_provider(provider_name: str):
- """根据名称解析当前配置中的 API Provider。"""
- model_config = config_manager.get_model_config()
- for api_provider in model_config.api_providers:
- if api_provider.name == provider_name:
- return api_provider
- raise ValueError(f"当前配置中不存在名为 {provider_name!r} 的 API Provider")
-
-
-def _build_response_request(snapshot: dict[str, Any]) -> ResponseRequest:
- """从快照构建响应请求对象。"""
- return ResponseRequest(
- extra_params=dict(snapshot.get("extra_params") or {}),
- max_tokens=snapshot.get("max_tokens"),
- message_list=deserialize_messages_snapshot(snapshot.get("message_list") or []),
- model_info=deserialize_model_info_snapshot(snapshot.get("model_info") or {}),
- response_format=deserialize_response_format_snapshot(snapshot.get("response_format")),
- temperature=snapshot.get("temperature"),
- tool_options=deserialize_tool_options_snapshot(snapshot.get("tool_options")),
- )
-
-
-def _build_embedding_request(snapshot: dict[str, Any]) -> EmbeddingRequest:
- """从快照构建嵌入请求对象。"""
- return EmbeddingRequest(
- embedding_input=str(snapshot.get("embedding_input") or ""),
- extra_params=dict(snapshot.get("extra_params") or {}),
- model_info=deserialize_model_info_snapshot(snapshot.get("model_info") or {}),
- )
-
-
-def _build_audio_request(snapshot: dict[str, Any]) -> AudioTranscriptionRequest:
- """从快照构建音频转写请求对象。"""
- return AudioTranscriptionRequest(
- audio_base64=str(snapshot.get("audio_base64") or ""),
- extra_params=dict(snapshot.get("extra_params") or {}),
- max_tokens=snapshot.get("max_tokens"),
- model_info=deserialize_model_info_snapshot(snapshot.get("model_info") or {}),
- )
-
-
-async def _replay(snapshot_path: Path) -> int:
- """回放一条失败请求快照。"""
- config_manager.initialize()
- snapshot = _load_snapshot(snapshot_path)
-
- internal_request = snapshot.get("internal_request")
- if not isinstance(internal_request, dict):
- raise ValueError("快照缺少 internal_request 字段")
-
- provider_snapshot = snapshot.get("api_provider")
- if not isinstance(provider_snapshot, dict):
- raise ValueError("快照缺少 api_provider 字段")
-
- provider_name = str(provider_snapshot.get("name") or "")
- if not provider_name:
- raise ValueError("快照中的 api_provider.name 不能为空")
-
- api_provider = _resolve_api_provider(provider_name)
- client = client_registry.get_client_class_instance(api_provider, force_new=True)
-
- request_kind = str(internal_request.get("request_kind") or "").strip()
- if request_kind == "response":
- response = await client.get_response(_build_response_request(internal_request))
- elif request_kind == "embedding":
- response = await client.get_embedding(_build_embedding_request(internal_request))
- elif request_kind == "audio_transcription":
- response = await client.get_audio_transcriptions(_build_audio_request(internal_request))
- else:
- raise ValueError(f"不支持的 request_kind: {request_kind!r}")
-
- output_payload = {
- "content": response.content,
- "embedding_length": len(response.embedding or []),
- "has_embedding": response.embedding is not None,
- "model_name": response.usage.model_name if response.usage is not None else None,
- "provider_name": response.usage.provider_name if response.usage is not None else None,
- "raw_data_type": type(response.raw_data).__name__ if response.raw_data is not None else None,
- "reasoning_content": response.reasoning_content,
- "tool_calls": [
- {
- "args": tool_call.args,
- "call_id": tool_call.call_id,
- "func_name": tool_call.func_name,
- }
- for tool_call in (response.tool_calls or [])
- ],
- "usage": {
- "completion_tokens": response.usage.completion_tokens,
- "prompt_tokens": response.usage.prompt_tokens,
- "total_tokens": response.usage.total_tokens,
- }
- if response.usage is not None
- else None,
- }
- print(json.dumps(output_payload, ensure_ascii=False, indent=2))
- return 0
-
-
-def main() -> int:
- """脚本入口。"""
- parser = argparse.ArgumentParser(description="回放失败的 LLM 请求快照。")
- parser.add_argument("snapshot_path", help="请求快照 JSON 文件路径")
- args = parser.parse_args()
-
- snapshot_path = Path(args.snapshot_path).expanduser().resolve()
- if not snapshot_path.exists():
- raise FileNotFoundError(f"快照文件不存在: {snapshot_path}")
-
- return asyncio.run(_replay(snapshot_path))
-
-
-if __name__ == "__main__":
- raise SystemExit(main())
diff --git a/scripts/replyer_action_stats.py b/scripts/replyer_action_stats.py
deleted file mode 100644
index 0974076d..00000000
--- a/scripts/replyer_action_stats.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""
-统计和展示 replyer 动作选择记录
-
-用法:
- python scripts/replyer_action_stats.py
-"""
-
-import json
-import os
-import sys
-from collections import Counter, defaultdict
-from datetime import datetime
-from typing import Dict, List, Any
-from pathlib import Path
-
-# Add project root to Python path
-project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.insert(0, project_root)
-
-try:
- from src.common.database.database_model import ChatStreams
- from src.chat.message_receive.chat_manager import chat_manager as _script_chat_manager
-except ImportError:
- ChatStreams = None
- _script_chat_manager = None
-
-
-def get_chat_name(chat_id: str) -> str:
- """根据 chat_id 获取聊天名称"""
- try:
- if ChatStreams:
- chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id)
- if chat_stream:
- if chat_stream.group_name:
- return f"{chat_stream.group_name}"
- elif chat_stream.user_nickname:
- return f"{chat_stream.user_nickname}的私聊"
-
- if _script_chat_manager:
- chat_manager = _script_chat_manager
- stream_name = chat_manager.get_stream_name(chat_id)
- if stream_name:
- return stream_name
-
- return f"未知聊天 ({chat_id[:8]}...)"
- except Exception:
- return f"查询失败 ({chat_id[:8]}...)"
-
-
-def load_records(temp_dir: str = "data/temp") -> List[Dict[str, Any]]:
- """加载所有 replyer 动作记录"""
- records = []
- temp_path = Path(temp_dir)
-
- if not temp_path.exists():
- print(f"目录不存在: {temp_dir}")
- return records
-
- # 查找所有 replyer_action_*.json 文件
- pattern = "replyer_action_*.json"
- for file_path in temp_path.glob(pattern):
- try:
- with open(file_path, "r", encoding="utf-8") as f:
- data = json.load(f)
- records.append(data)
- except Exception as e:
- print(f"读取文件失败 {file_path}: {e}")
-
- # 按时间戳排序
- records.sort(key=lambda x: x.get("timestamp", ""))
- return records
-
-
-def format_timestamp(ts: str) -> str:
- """格式化时间戳"""
- try:
- dt = datetime.fromisoformat(ts)
- return dt.strftime("%Y-%m-%d %H:%M:%S")
- except Exception:
- return ts
-
-
-def calculate_time_distribution(records: List[Dict[str, Any]]) -> Dict[str, int]:
- """计算时间分布"""
- now = datetime.now()
- distribution = {
- "今天": 0,
- "昨天": 0,
- "3天内": 0,
- "7天内": 0,
- "30天内": 0,
- "更早": 0,
- }
-
- for record in records:
- try:
- ts = record.get("timestamp", "")
- if not ts:
- continue
- dt = datetime.fromisoformat(ts)
- diff = (now - dt).days
-
- if diff == 0:
- distribution["今天"] += 1
- elif diff == 1:
- distribution["昨天"] += 1
- elif diff < 3:
- distribution["3天内"] += 1
- elif diff < 7:
- distribution["7天内"] += 1
- elif diff < 30:
- distribution["30天内"] += 1
- else:
- distribution["更早"] += 1
- except Exception:
- pass
-
- return distribution
-
-
-def print_statistics(records: List[Dict[str, Any]]):
- """打印统计信息"""
- if not records:
- print("没有找到任何记录")
- return
-
- print("=" * 80)
- print("Replyer 动作选择记录统计")
- print("=" * 80)
- print()
-
- # 总记录数
- total_count = len(records)
- print(f"📊 总记录数: {total_count}")
- print()
-
- # 时间范围
- timestamps = [r.get("timestamp", "") for r in records if r.get("timestamp")]
- if timestamps:
- first_time = format_timestamp(min(timestamps))
- last_time = format_timestamp(max(timestamps))
- print(f"📅 时间范围: {first_time} ~ {last_time}")
- print()
-
- # 按 think_level 统计
- think_levels = [r.get("think_level", 0) for r in records]
- think_level_counter = Counter(think_levels)
- print("🧠 思考深度分布:")
- for level in sorted(think_level_counter.keys()):
- count = think_level_counter[level]
- percentage = (count / total_count) * 100
- level_name = {0: "不需要思考", 1: "简单思考", 2: "深度思考"}.get(level, f"未知({level})")
- print(f" Level {level} ({level_name}): {count} 次 ({percentage:.1f}%)")
- print()
-
- # 按 chat_id 统计(总体)
- chat_counter = Counter([r.get("chat_id", "未知") for r in records])
- print(f"💬 聊天分布 (共 {len(chat_counter)} 个聊天):")
- # 只显示前10个
- for chat_id, count in chat_counter.most_common(10):
- chat_name = get_chat_name(chat_id)
- percentage = (count / total_count) * 100
- print(f" {chat_name}: {count} 次 ({percentage:.1f}%)")
- if len(chat_counter) > 10:
- print(f" ... 还有 {len(chat_counter) - 10} 个聊天")
- print()
-
- # 每个 chat_id 的详细统计
- print("=" * 80)
- print("每个聊天的详细统计")
- print("=" * 80)
- print()
-
- # 按 chat_id 分组记录
- records_by_chat = defaultdict(list)
- for record in records:
- chat_id = record.get("chat_id", "未知")
- records_by_chat[chat_id].append(record)
-
- # 按记录数排序
- sorted_chats = sorted(records_by_chat.items(), key=lambda x: len(x[1]), reverse=True)
-
- for chat_id, chat_records in sorted_chats:
- chat_name = get_chat_name(chat_id)
- chat_count = len(chat_records)
- chat_percentage = (chat_count / total_count) * 100
-
- print(f"📱 {chat_name} ({chat_id[:8]}...)")
- print(f" 总记录数: {chat_count} ({chat_percentage:.1f}%)")
-
- # 该聊天的 think_level 分布
- chat_think_levels = [r.get("think_level", 0) for r in chat_records]
- chat_think_counter = Counter(chat_think_levels)
- print(" 思考深度分布:")
- for level in sorted(chat_think_counter.keys()):
- level_count = chat_think_counter[level]
- level_percentage = (level_count / chat_count) * 100
- level_name = {0: "不需要思考", 1: "简单思考", 2: "深度思考"}.get(level, f"未知({level})")
- print(f" Level {level} ({level_name}): {level_count} 次 ({level_percentage:.1f}%)")
-
- # 该聊天的时间范围
- chat_timestamps = [r.get("timestamp", "") for r in chat_records if r.get("timestamp")]
- if chat_timestamps:
- first_time = format_timestamp(min(chat_timestamps))
- last_time = format_timestamp(max(chat_timestamps))
- print(f" 时间范围: {first_time} ~ {last_time}")
-
- # 该聊天的时间分布
- chat_time_dist = calculate_time_distribution(chat_records)
- print(" 时间分布:")
- for period, count in chat_time_dist.items():
- if count > 0:
- period_percentage = (count / chat_count) * 100
- print(f" {period}: {count} 次 ({period_percentage:.1f}%)")
-
- # 显示该聊天最近的一条理由示例
- if chat_records:
- latest_record = chat_records[-1]
- reason = latest_record.get("reason", "无理由")
- if len(reason) > 120:
- reason = reason[:120] + "..."
- timestamp = format_timestamp(latest_record.get("timestamp", ""))
- think_level = latest_record.get("think_level", 0)
- print(f" 最新记录 [{timestamp}] (Level {think_level}): {reason}")
-
- print()
-
- # 时间分布
- time_dist = calculate_time_distribution(records)
- print("⏰ 时间分布:")
- for period, count in time_dist.items():
- if count > 0:
- percentage = (count / total_count) * 100
- print(f" {period}: {count} 次 ({percentage:.1f}%)")
- print()
-
- # 显示一些示例理由
- print("📝 示例理由 (最近5条):")
- recent_records = records[-5:]
- for i, record in enumerate(recent_records, 1):
- reason = record.get("reason", "无理由")
- think_level = record.get("think_level", 0)
- timestamp = format_timestamp(record.get("timestamp", ""))
- chat_id = record.get("chat_id", "未知")
- chat_name = get_chat_name(chat_id)
-
- # 截断过长的理由
- if len(reason) > 100:
- reason = reason[:100] + "..."
-
- print(f" {i}. [{timestamp}] {chat_name} (Level {think_level})")
- print(f" {reason}")
- print()
-
- # 按 think_level 分组显示理由示例
- print("=" * 80)
- print("按思考深度分类的示例理由")
- print("=" * 80)
- print()
-
- for level in [0, 1, 2]:
- level_records = [r for r in records if r.get("think_level") == level]
- if not level_records:
- continue
-
- level_name = {0: "不需要思考", 1: "简单思考", 2: "深度思考"}.get(level, f"未知({level})")
- print(f"Level {level} ({level_name}) - 共 {len(level_records)} 条:")
-
- # 显示3个示例(选择最近的)
- examples = level_records[-3:] if len(level_records) >= 3 else level_records
- for i, record in enumerate(examples, 1):
- reason = record.get("reason", "无理由")
- if len(reason) > 150:
- reason = reason[:150] + "..."
- timestamp = format_timestamp(record.get("timestamp", ""))
- chat_id = record.get("chat_id", "未知")
- chat_name = get_chat_name(chat_id)
- print(f" {i}. [{timestamp}] {chat_name}")
- print(f" {reason}")
- print()
-
- # 统计信息汇总
- print("=" * 80)
- print("统计汇总")
- print("=" * 80)
- print(f"总记录数: {total_count}")
- print(f"涉及聊天数: {len(chat_counter)}")
- if chat_counter:
- avg_count = total_count / len(chat_counter)
- print(f"平均每个聊天记录数: {avg_count:.1f}")
- else:
- print("平均每个聊天记录数: N/A")
- print()
-
-
-def main():
- """主函数"""
- records = load_records()
- print_statistics(records)
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/test_lpmm_retrieval.py b/scripts/test_lpmm_retrieval.py
deleted file mode 100644
index c6aeccda..00000000
--- a/scripts/test_lpmm_retrieval.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import argparse
-import asyncio
-import os
-import sys
-from typing import List, Dict, Any, Optional
-
-# 强制使用 utf-8,避免控制台编码报错影响 Embedding 加载
-try:
- if hasattr(sys.stdout, "reconfigure"):
- sys.stdout.reconfigure(encoding="utf-8")
- if hasattr(sys.stderr, "reconfigure"):
- sys.stderr.reconfigure(encoding="utf-8")
-except Exception:
- pass
-
-# 确保能导入 src.*
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from src.common.logger import get_logger
-from src.config.config import global_config
-from src.chat.knowledge import lpmm_start_up
-from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
-
-logger = get_logger("test_lpmm_retrieval")
-
-
-DEFAULT_TEST_CASES: List[Dict[str, Any]] = [
- {
- "name": "回滚一批知识",
- "query": "LPMM是什么?",
- "expect_keywords": ["哈希列表", "删除脚本", "OpenIE"],
- },
- {
- "name": "调整 LPMM 检索参数",
- "query": "不同用词习惯带来的检索偏差该如何解决",
- "expect_keywords": ["bot_config.toml", "lpmm_knowledge", "qa_paragraph_search_top_k"],
- },
-]
-
-
-async def run_tests(test_cases: Optional[List[Dict[str, Any]]] = None) -> None:
- """简单测试 LPMM 知识库检索能力"""
- if not global_config.lpmm_knowledge.enable:
- logger.warning("当前配置中 lpmm_knowledge.enable 为 False,检索测试可能直接返回“未启用”。")
-
- logger.info("开始初始化 LPMM 知识库...")
- lpmm_start_up()
- logger.info("LPMM 知识库初始化完成,开始执行测试用例。")
-
- cases = test_cases if test_cases is not None else DEFAULT_TEST_CASES
-
- for case in cases:
- name = case["name"]
- query = case["query"]
- expect_keywords: List[str] = case.get("expect_keywords", [])
-
- print("\n" + "=" * 60)
- print(f"[TEST] {name}")
- print(f"[Q] {query}")
-
- result = await query_lpmm_knowledge(query, limit=3)
-
- print("\n[RAW RESULT]")
- print(result)
-
- status = "UNKNOWN"
- hit_keywords: List[str] = []
-
- if isinstance(result, str):
- if "未启用" in result or "未初始化" in result or "查询失败" in result:
- status = "ERROR"
- elif "未找到与" in result:
- status = "NO_HIT"
- else:
- if expect_keywords:
- hit_keywords = [kw for kw in expect_keywords if kw in result]
- status = "PASS" if hit_keywords else "WARN"
- else:
- status = "PASS"
-
- print("\n[CHECK]")
- print(f"Status: {status}")
- if expect_keywords:
- print(f"Expected keywords: {expect_keywords}")
- print(f"Hit keywords: {hit_keywords}")
-
- print("\n" + "=" * 60)
- print("LPMM 检索测试完成。请根据每条用例的 Status 和命中关键词判断检索效果是否符合预期。")
-
-
-def main() -> None:
- parser = argparse.ArgumentParser(
- description=(
- "测试 LPMM 知识库检索能力。\n"
- "如不提供参数,则执行内置的默认用例;\n"
- "也可以通过 --query 与 --expect-keyword 自定义一条测试用例。"
- )
- )
- parser.add_argument(
- "--query",
- help="自定义测试问题(单条)。提供该参数时,将仅运行这一条用例。",
- )
- parser.add_argument(
- "--expect-keyword",
- action="append",
- help="期望在检索结果中出现的关键字,可重复多次指定;仅在提供 --query 时生效。",
- )
- args = parser.parse_args()
-
- if args.query:
- custom_case = {
- "name": "custom",
- "query": args.query,
- "expect_keywords": args.expect_keyword or [],
- }
- asyncio.run(run_tests([custom_case]))
- else:
- asyncio.run(run_tests())
-
-
-if __name__ == "__main__":
- main()
diff --git a/src/common/message_server/server.py b/src/common/message_server/server.py
index c4c58669..58708bf9 100644
--- a/src/common/message_server/server.py
+++ b/src/common/message_server/server.py
@@ -19,7 +19,7 @@ class Server:
def __init__(self, host: Optional[str] = None, port: Optional[int] = None, app_name: str = "MaiMCore"):
self.app = FastAPI(title=app_name)
self._host: str = "127.0.0.1"
- self._port: int = 8080
+ self._port: int = 8000
self._server: Optional[UvicornServer] = None
self.set_address(host, port)
diff --git a/src/config/config.py b/src/config/config.py
index 9a82449c..006c8ae1 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -55,7 +55,7 @@ BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
LEGACY_ENV_PATH: Path = (PROJECT_ROOT / ".env").resolve().absolute()
MMC_VERSION: str = "1.0.0"
-CONFIG_VERSION: str = "8.9.3"
+CONFIG_VERSION: str = "8.9.4"
MODEL_CONFIG_VERSION: str = "1.14.1"
logger = get_logger("config")
diff --git a/src/config/legacy_migration.py b/src/config/legacy_migration.py
index f62e7eca..e7d9f85c 100644
--- a/src/config/legacy_migration.py
+++ b/src/config/legacy_migration.py
@@ -84,6 +84,20 @@ def _parse_triplet_target(s: str) -> Optional[dict[str, str]]:
return {"platform": platform, "item_id": item_id, "rule_type": rule_type}
+def _parse_expression_group_target(s: str) -> Optional[dict[str, str]]:
+ """
+ 解析表达互通组目标,兼容旧版 "*" 全局共享标记。
+ """
+ if not isinstance(s, str):
+ return None
+
+ normalized_value = s.strip()
+ if normalized_value == "*":
+ return {"platform": "*", "item_id": "*", "rule_type": "group"}
+
+ return _parse_triplet_target(normalized_value)
+
+
def _parse_enable_disable(v: Any) -> Optional[bool]:
"""
兼容旧值 "enable"/"disable" 以及 bool。
@@ -93,9 +107,9 @@ def _parse_enable_disable(v: Any) -> Optional[bool]:
if isinstance(v, str):
normalized_value = v.strip().lower()
- if normalized_value == "enable":
+ if normalized_value in {"enable", "true"}:
return True
- if normalized_value == "disable":
+ if normalized_value in {"disable", "false"}:
return False
return None
@@ -160,7 +174,21 @@ def _migrate_expression_groups(expr: dict[str, Any]) -> bool:
"""
将旧版 expression.expression_groups 转成当前结构。
"""
- expression_groups = _as_list(expr.get("expression_groups"))
+ raw_expression_groups = expr.get("expression_groups")
+ if isinstance(raw_expression_groups, str):
+ normalized_value = raw_expression_groups.strip()
+ if not normalized_value:
+ expr["expression_groups"] = []
+ return True
+
+ parsed = _parse_expression_group_target(normalized_value)
+ if parsed is None:
+ return False
+
+ expr["expression_groups"] = [{"expression_groups": [parsed]}]
+ return True
+
+ expression_groups = _as_list(raw_expression_groups)
if expression_groups is None:
return False
if expression_groups and all(isinstance(item, dict) for item in expression_groups):
@@ -174,7 +202,7 @@ def _migrate_expression_groups(expr: dict[str, Any]) -> bool:
targets: list[dict[str, str]] = []
for item in group_items:
- parsed = _parse_triplet_target(str(item))
+ parsed = _parse_expression_group_target(str(item))
if parsed is None:
return False
targets.append(parsed)
@@ -206,6 +234,35 @@ def _migrate_target_item_list(parent: dict[str, Any], key: str) -> bool:
return True
+def _drop_empty_keyword_rules(keyword_reaction: dict[str, Any], key: str) -> bool:
+ raw = _as_list(keyword_reaction.get(key))
+ if raw is None:
+ return False
+
+ cleaned_rules: list[Any] = []
+ dropped_any = False
+ for item in raw:
+ item_dict = _as_dict(item)
+ if item_dict is None:
+ cleaned_rules.append(item)
+ continue
+
+ keywords = _as_list(item_dict.get("keywords")) or []
+ regex = _as_list(item_dict.get("regex")) or []
+ reaction = item_dict.get("reaction")
+ if not keywords and not regex and (reaction is None or str(reaction).strip() == ""):
+ dropped_any = True
+ continue
+
+ cleaned_rules.append(item)
+
+ if not dropped_any:
+ return False
+
+ keyword_reaction[key] = cleaned_rules
+ return True
+
+
def migrate_legacy_bind_env_to_bot_config_dict(data: dict[str, Any]) -> MigrationResult:
"""将旧版 `.env` 中的绑定地址迁移到主配置结构。"""
@@ -222,7 +279,7 @@ def migrate_legacy_bind_env_to_bot_config_dict(data: dict[str, Any]) -> Migratio
if maim_message is not None and _migrate_env_value(maim_message, "ws_server_host", main_host_env, "127.0.0.1"):
migrated_any = True
reasons.append("HOST->maim_message.ws_server_host")
- if maim_message is not None and _migrate_env_value(maim_message, "ws_server_port", main_port_env, 8080):
+ if maim_message is not None and _migrate_env_value(maim_message, "ws_server_port", main_port_env, 8000):
migrated_any = True
reasons.append("PORT->maim_message.ws_server_port")
@@ -253,6 +310,12 @@ def try_migrate_legacy_bot_config_dict(data: dict[str, Any]) -> MigrationResult:
migrated_any = False
reasons: list[str] = []
+ bot = _as_dict(data.get("bot"))
+ if bot is not None and isinstance(bot.get("qq_account"), str) and not bot["qq_account"].strip():
+ bot["qq_account"] = 0
+ migrated_any = True
+ reasons.append("bot.qq_account_empty")
+
expr = _as_dict(data.get("expression"))
if expr is not None:
if _migrate_expression_learning_list(expr):
@@ -298,5 +361,14 @@ def try_migrate_legacy_bot_config_dict(data: dict[str, Any]) -> MigrationResult:
migrated_any = True
reasons.append("memory.global_memory_blacklist")
+ keyword_reaction = _as_dict(data.get("keyword_reaction"))
+ if keyword_reaction is not None:
+ if _drop_empty_keyword_rules(keyword_reaction, "keyword_rules"):
+ migrated_any = True
+ reasons.append("keyword_reaction.keyword_rules_empty")
+ if _drop_empty_keyword_rules(keyword_reaction, "regex_rules"):
+ migrated_any = True
+ reasons.append("keyword_reaction.regex_rules_empty")
+
reason = ",".join(reasons)
return MigrationResult(data=data, migrated=migrated_any, reason=reason)
diff --git a/src/config/legacy_upgrade_confirmation.py b/src/config/legacy_upgrade_confirmation.py
new file mode 100644
index 00000000..469dd0b6
--- /dev/null
+++ b/src/config/legacy_upgrade_confirmation.py
@@ -0,0 +1,223 @@
+from pathlib import Path
+
+import os
+import re
+import sqlite3
+import sys
+
+LEGACY_UPGRADE_CONFIRM_ENV = "MAIBOT_LEGACY_0X_UPGRADE_CONFIRMED"
+LEGACY_0X_BOT_CONFIG_BOUNDARY = "8.9.4"
+LEGACY_0X_MODEL_CONFIG_BOUNDARY = "1.14.1"
+
+
+def _parse_semver(version: str) -> tuple[int, int, int] | None:
+ """解析三段式版本号。"""
+ parts = version.strip().split(".")
+ if len(parts) != 3:
+ return None
+ try:
+ return tuple(int(part) for part in parts) # type: ignore[return-value]
+ except ValueError:
+ return None
+
+
+def _read_config_constant(project_root: Path, name: str) -> str | None:
+ """从配置模块源码读取版本常量,避免提前触发配置加载和自动迁移。"""
+ config_source_path = project_root / "src" / "config" / "config.py"
+ try:
+ config_source = config_source_path.read_text(encoding="utf-8")
+ except OSError:
+ return None
+
+ pattern = rf'^{re.escape(name)}:\s*str\s*=\s*"([^"]+)"'
+ match = re.search(pattern, config_source, flags=re.MULTILINE)
+ if match is None:
+ return None
+ return match.group(1)
+
+
+def _read_inner_config_version(config_path: Path) -> str | None:
+ """读取配置文件 [inner].version,失败时返回 None。"""
+ if not config_path.exists():
+ return None
+
+ try:
+ config_text = config_path.read_text(encoding="utf-8")
+ except OSError:
+ return None
+
+ in_inner_table = False
+ for raw_line in config_text.splitlines():
+ line = raw_line.strip()
+ if not line or line.startswith("#"):
+ continue
+ if line.startswith("[") and line.endswith("]"):
+ in_inner_table = line == "[inner]"
+ continue
+ if not in_inner_table:
+ continue
+ match = re.match(r'^version\s*=\s*"([^"]+)"', line)
+ if match is not None:
+ return match.group(1)
+ return None
+
+
+def _is_version_lower(current_version: str | None, target_version: str | None) -> bool:
+ current_parts = _parse_semver(current_version or "")
+ target_parts = _parse_semver(target_version or "")
+ if current_parts is None or target_parts is None:
+ return False
+ return current_parts < target_parts
+
+
+def _needs_legacy_config_confirmation(
+ current_version: str | None,
+ target_version: str | None,
+ legacy_boundary_version: str,
+) -> bool:
+ """判断配置更新是否属于 0.x -> 1.0.0 边界升级。"""
+ return _is_version_lower(current_version, legacy_boundary_version) and _is_version_lower(
+ current_version,
+ target_version,
+ )
+
+
+def _load_sqlite_schema(db_path: Path) -> tuple[int, dict[str, set[str]]] | None:
+ """读取 SQLite user_version 与用户表列名,不创建新数据库文件。"""
+ if not db_path.exists():
+ return None
+
+ database_uri = f"file:{db_path.as_posix()}?mode=ro"
+ try:
+ with sqlite3.connect(database_uri, uri=True) as connection:
+ user_version_row = connection.execute("PRAGMA user_version").fetchone()
+ if user_version_row is None:
+ return None
+
+ table_rows = connection.execute(
+ """
+ SELECT name
+ FROM sqlite_master
+ WHERE type = 'table'
+ AND name NOT LIKE 'sqlite_%'
+ """
+ ).fetchall()
+ tables: dict[str, set[str]] = {}
+ for (table_name,) in table_rows:
+ table_name = str(table_name)
+ escaped_table_name = table_name.replace('"', '""')
+ column_rows = connection.execute(f'PRAGMA table_info("{escaped_table_name}")').fetchall()
+ tables[table_name] = {str(row[1]) for row in column_rows}
+ except sqlite3.Error:
+ return None
+
+ return int(user_version_row[0]), tables
+
+
+def _detect_legacy_0x_database(db_path: Path) -> bool:
+ """检测旧版 0.x 数据库结构。"""
+ schema = _load_sqlite_schema(db_path)
+ if schema is None:
+ return False
+
+ user_version, tables = schema
+ if user_version == 1:
+ return True
+ if user_version > 1 or not tables:
+ return False
+
+ legacy_exclusive_tables = {
+ "chat_streams",
+ "emoji",
+ "emoji_description_cache",
+ "expression",
+ "group_info",
+ "image_descriptions",
+ "jargon",
+ "messages",
+ "thinking_back",
+ }
+ if legacy_exclusive_tables.intersection(tables):
+ return True
+
+ legacy_shared_markers = (
+ ("action_records", ("chat_id", "time")),
+ ("chat_history", ("chat_id", "original_text")),
+ ("images", ("emoji_hash", "path", "type")),
+ ("llm_usage", ("model_api_provider", "status")),
+ ("online_time", ("duration",)),
+ ("person_info", ("nickname", "group_nick_name")),
+ )
+ for table_name, required_columns in legacy_shared_markers:
+ table_columns = tables.get(table_name)
+ if table_columns is not None and all(column_name in table_columns for column_name in required_columns):
+ return True
+ return False
+
+
+def collect_legacy_upgrade_reasons(project_root: Path) -> list[str]:
+ """收集启动前需要用户确认的 0.x 升级风险项。"""
+ reasons: list[str] = []
+
+ db_path = project_root / "data" / "MaiBot.db"
+ if _detect_legacy_0x_database(db_path):
+ reasons.append(f"检测到旧版 0.x 数据库结构,将更新数据库:{db_path}")
+
+ bot_config_path = project_root / "config" / "bot_config.toml"
+ bot_config_version = _read_inner_config_version(bot_config_path)
+ target_bot_config_version = _read_config_constant(project_root, "CONFIG_VERSION")
+ if _needs_legacy_config_confirmation(
+ bot_config_version,
+ target_bot_config_version,
+ LEGACY_0X_BOT_CONFIG_BOUNDARY,
+ ):
+ reasons.append(
+ "检测到主配置文件版本较旧,将更新配置文件:"
+ f"{bot_config_path} ({bot_config_version} -> {target_bot_config_version})"
+ )
+
+ model_config_path = project_root / "config" / "model_config.toml"
+ model_config_version = _read_inner_config_version(model_config_path)
+ target_model_config_version = _read_config_constant(project_root, "MODEL_CONFIG_VERSION")
+ if _needs_legacy_config_confirmation(
+ model_config_version,
+ target_model_config_version,
+ LEGACY_0X_MODEL_CONFIG_BOUNDARY,
+ ):
+ reasons.append(
+ "检测到模型配置文件版本较旧,将更新配置文件:"
+ f"{model_config_path} ({model_config_version} -> {target_model_config_version})"
+ )
+
+ return reasons
+
+
+def require_legacy_upgrade_confirmation(project_root: Path) -> None:
+ """在执行 0.x 升级迁移前要求用户显式确认。"""
+ if os.getenv(LEGACY_UPGRADE_CONFIRM_ENV) == "1":
+ return
+
+ reasons = collect_legacy_upgrade_reasons(project_root)
+ if not reasons:
+ return
+
+ print()
+ print("=" * 72)
+ print("MaiBot 升级提示")
+ print("检测到当前实例可能是从 1.0.0 以前的 0.x.x 版本升级而来。")
+ print("继续启动将会执行自动升级,可能包括数据库结构更新和配置文件更新。")
+ print("建议在继续前备份 data/ 与 config/ 目录。")
+ print()
+ for reason in reasons:
+ print(f"- {reason}")
+ print("=" * 72)
+
+ try:
+ user_input = input("确认继续升级并启动吗?请输入 y 后回车:").strip().lower()
+ except EOFError:
+ user_input = ""
+ if user_input != "y":
+ print("未确认升级,启动已取消。")
+ sys.exit(1)
+
+ os.environ[LEGACY_UPGRADE_CONFIRM_ENV] = "1"
diff --git a/src/config/official_configs.py b/src/config/official_configs.py
index e8f364ae..d68e707d 100644
--- a/src/config/official_configs.py
+++ b/src/config/official_configs.py
@@ -1090,14 +1090,14 @@ class DebugConfig(ConfigBase):
__ui_label__ = "其他"
__ui_icon__ = "more-horizontal"
- show_prompt: bool = Field(
- default=False,
+ enable_maisaka_stage_board: bool = Field(
+ default=True,
json_schema_extra={
"x-widget": "switch",
- "x-icon": "eye",
+ "x-icon": "layout-dashboard",
},
)
- """是否显示prompt"""
+ """是否启用 Maisaka 阶段看板"""
show_maisaka_thinking: bool = Field(
default=True,
@@ -1205,7 +1205,7 @@ class MaimMessageConfig(ConfigBase):
"""旧版基于WS的服务器主机地址"""
ws_server_port: int = Field(
- default=8080,
+ default=8000,
json_schema_extra={
"x-widget": "input",
"x-icon": "hash",
diff --git a/src/main.py b/src/main.py
index 8b5eb9a1..1e515fe9 100644
--- a/src/main.py
+++ b/src/main.py
@@ -66,7 +66,8 @@ class MainSystem:
async def initialize(self) -> None:
"""初始化系统组件"""
- enable_stage_status_board()
+ if global_config.debug.enable_maisaka_stage_board:
+ enable_stage_status_board()
logger.info(t("startup.waking_up", nickname=global_config.bot.nickname))
# 其他初始化任务
diff --git a/src/maisaka/builtin_tool/reply.py b/src/maisaka/builtin_tool/reply.py
index 8f66a8ca..c3d3bf4a 100644
--- a/src/maisaka/builtin_tool/reply.py
+++ b/src/maisaka/builtin_tool/reply.py
@@ -160,6 +160,7 @@ async def handle_tool(
reply_segments = tool_ctx.post_process_reply_text(reply_text)
combined_reply_text = "".join(reply_segments)
+ sent_message_ids: list[str] = []
try:
sent = False
if tool_ctx.runtime.chat_stream.platform == CLI_PLATFORM_NAME:
@@ -181,6 +182,9 @@ async def handle_tool(
sent = sent_message is not None
if not sent:
break
+ sent_message_id = str(getattr(sent_message, "message_id", "") or "").strip()
+ if sent_message_id:
+ sent_message_ids.append(sent_message_id)
except Exception:
logger.exception(
f"{tool_ctx.runtime.log_prefix} 发送文字消息时发生异常,目标消息编号={target_message_id}"
@@ -209,6 +213,7 @@ async def handle_tool(
if tool_ctx.runtime.chat_stream.platform == CLI_PLATFORM_NAME:
tool_ctx.append_guided_reply_to_chat_history(combined_reply_text)
tool_ctx.runtime._record_reply_sent()
+ reply_metadata["sent_message_ids"] = sent_message_ids
await tool_ctx.runtime.track_reply_effect(
tool_call_id=invocation.call_id,
target_message=target_message,
diff --git a/src/maisaka/reply_effect/models.py b/src/maisaka/reply_effect/models.py
index 26d06fb0..d7a32f84 100644
--- a/src/maisaka/reply_effect/models.py
+++ b/src/maisaka/reply_effect/models.py
@@ -66,6 +66,7 @@ class FollowupMessageSnapshot:
plain_text: str
latency_seconds: float
is_target_user: bool
+ quote_target_ids: List[str] = field(default_factory=list)
attachments: List[Dict[str, Any]] = field(default_factory=list)
diff --git a/src/maisaka/reply_effect/quote_utils.py b/src/maisaka/reply_effect/quote_utils.py
new file mode 100644
index 00000000..531ac423
--- /dev/null
+++ b/src/maisaka/reply_effect/quote_utils.py
@@ -0,0 +1,32 @@
+"""回复效果记录中的引用消息辅助工具。"""
+
+from typing import Any
+
+from src.common.data_models.message_component_data_model import MessageSequence, ReplyComponent
+
+
+def extract_quote_target_ids(message_sequence: MessageSequence | None) -> list[str]:
+ """从消息片段中提取引用回复目标消息 ID。"""
+
+ if message_sequence is None:
+ return []
+
+ target_ids: list[str] = []
+ for component in getattr(message_sequence, "components", []):
+ if not isinstance(component, ReplyComponent):
+ continue
+ target_message_id = str(component.target_message_id or "").strip()
+ if target_message_id:
+ target_ids.append(target_message_id)
+ return target_ids
+
+
+def message_id_from_context_message(message: Any) -> str:
+ """尽量从 Maisaka 上下文消息中取真实消息 ID。"""
+
+ message_id = str(getattr(message, "message_id", "") or "").strip()
+ if message_id:
+ return message_id
+
+ original_message = getattr(message, "original_message", None)
+ return str(getattr(original_message, "message_id", "") or "").strip()
diff --git a/src/maisaka/reply_effect/tracker.py b/src/maisaka/reply_effect/tracker.py
index 39895403..c2e7c8ef 100644
--- a/src/maisaka/reply_effect/tracker.py
+++ b/src/maisaka/reply_effect/tracker.py
@@ -23,6 +23,7 @@ from .models import (
UserSnapshot,
now_iso,
)
+from .quote_utils import extract_quote_target_ids
from .path_utils import build_reply_effect_chat_dir_name
from .scoring import (
has_explicit_negative_feedback,
@@ -190,6 +191,7 @@ class ReplyEffectTracker:
plain_text=plain_text,
latency_seconds=round(latency_seconds, 3),
is_target_user=bool(record.target_user.user_id and user_id == record.target_user.user_id),
+ quote_target_ids=extract_quote_target_ids(message.raw_message),
attachments=extract_visual_attachments_from_sequence(message.raw_message),
)
diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py
index c91b972c..d3e6b41b 100644
--- a/src/maisaka/runtime.py
+++ b/src/maisaka/runtime.py
@@ -41,6 +41,7 @@ from .display.stage_status_board import remove_stage_status, update_stage_status
from .reasoning_engine import MaisakaReasoningEngine
from .reply_effect import ReplyEffectTracker
from .reply_effect.image_utils import extract_visual_attachments_from_sequence
+from .reply_effect.quote_utils import extract_quote_target_ids, message_id_from_context_message
from .tool_provider import MaisakaBuiltinToolProvider
logger = get_logger("maisaka_runtime")
@@ -349,10 +350,12 @@ class MaisakaHeartFlowChatting:
continue
snapshot.append(
{
+ "message_id": message_id_from_context_message(message),
"source": message.source,
"role": message.role,
"timestamp": message.timestamp.isoformat(timespec="seconds"),
"text": text,
+ "quote_target_ids": extract_quote_target_ids(getattr(message, "raw_message", None)),
"attachments": extract_visual_attachments_from_sequence(getattr(message, "raw_message", None)),
}
)