diff --git a/.dockerignore b/.dockerignore index 654a03fd..61a88dff 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,3 +21,5 @@ temp/ tmp/ mai_knowledge/ depends-data/ +!depends-data/ +!depends-data/char_frequency.json diff --git a/dashboard/src/routes/logs.tsx b/dashboard/src/routes/logs.tsx index c68dda40..56e241f9 100644 --- a/dashboard/src/routes/logs.tsx +++ b/dashboard/src/routes/logs.tsx @@ -22,16 +22,26 @@ import { zhCN } from 'date-fns/locale' // 字号配置 type FontSize = 'xs' | 'sm' | 'base' +type LogLevelFilter = LogEntry['level'] | 'all' + const fontSizeConfig: Record = { xs: { label: '小', rowHeight: 28, class: 'text-[10px] sm:text-xs' }, sm: { label: '中', rowHeight: 36, class: 'text-xs sm:text-sm' }, base: { label: '大', rowHeight: 44, class: 'text-sm sm:text-base' }, } +const levelPriority: Record = { + DEBUG: 10, + INFO: 20, + WARNING: 30, + ERROR: 40, + CRITICAL: 50, +} + export function LogViewerPage() { const [logs, setLogs] = useState([]) const [searchQuery, setSearchQuery] = useState('') - const [levelFilter, setLevelFilter] = useState('all') + const [levelFilter, setLevelFilter] = useState('INFO') const [moduleFilter, setModuleFilter] = useState('all') const [dateFrom, setDateFrom] = useState(undefined) const [dateTo, setDateTo] = useState(undefined) @@ -154,8 +164,10 @@ export function LogViewerPage() { log.message.toLowerCase().includes(searchQuery.toLowerCase()) || log.module.toLowerCase().includes(searchQuery.toLowerCase()) - // 级别过滤 - const matchesLevel = levelFilter === 'all' || log.level === levelFilter + // 级别过滤:选择某个级别时显示该级别及以上的日志 + const matchesLevel = + levelFilter === 'all' || + levelPriority[log.level] >= levelPriority[levelFilter] // 模块过滤 const matchesModule = moduleFilter === 'all' || log.module === moduleFilter @@ -355,17 +367,17 @@ export function LogViewerPage() { {/* 级别和模块筛选 */}
- setLevelFilter(value as LogLevelFilter)}> - + 全部级别 - DEBUG - INFO - WARNING - ERROR + DEBUG 及以上 + INFO 及以上 + WARNING 及以上 + ERROR 及以上 CRITICAL diff --git a/docker-compose.yml b/docker-compose.yml index 853990b1..f6dcded1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,7 @@ services: - ./data/MaiMBot/emoji:/data/emoji # 持久化表情包 - ./data/MaiMBot/plugins:/MaiMBot/plugins # 插件目录 - ./data/MaiMBot/logs:/MaiMBot/logs # 日志目录 + - ./depends-data:/MaiMBot/depends-data:ro # 运行时资源文件 # - site-packages:/usr/local/lib/python3.13/site-packages # 持久化Python包,需要时启用 restart: always networks: diff --git a/pytests/A_memorix_test/test_chat_summary_writeback_integration.py b/pytests/A_memorix_test/test_chat_summary_writeback_integration.py index 0a0eccb3..24c7ff4f 100644 --- a/pytests/A_memorix_test/test_chat_summary_writeback_integration.py +++ b/pytests/A_memorix_test/test_chat_summary_writeback_integration.py @@ -193,7 +193,6 @@ def _build_incoming_message( message.is_command = False message.is_notify = False message.processed_plain_text = text - message.display_message = text message.initialized = True return message diff --git a/pytests/common_test/test_database_migration_foundation.py b/pytests/common_test/test_database_migration_foundation.py index ffec2b6a..3e4955de 100644 --- a/pytests/common_test/test_database_migration_foundation.py +++ b/pytests/common_test/test_database_migration_foundation.py @@ -754,15 +754,6 @@ def test_default_bootstrapper_can_migrate_legacy_v1_database(tmp_path: Path) -> """ ) ).mappings().one() - action_row = connection.execute( - text( - """ - SELECT session_id, action_name, action_display_prompt - FROM action_records - WHERE action_id = 'action-1' - """ - ) - ).mappings().one() tool_row = connection.execute( text( """ @@ -796,6 +787,8 @@ def test_default_bootstrapper_can_migrate_legacy_v1_database(tmp_path: Path) -> assert snapshot.has_table("chat_sessions") assert snapshot.has_table("mai_messages") assert snapshot.has_table("tool_records") + assert not snapshot.has_table("action_records") + assert not snapshot.has_column("mai_messages", "display_message") unpacked_raw_content = msgpack.unpackb(message_row["raw_content"], raw=False) additional_config = json.loads(message_row["additional_config"]) @@ -807,9 +800,6 @@ def test_default_bootstrapper_can_migrate_legacy_v1_database(tmp_path: Path) -> assert message_row["processed_plain_text"] == "你好" assert unpacked_raw_content == [{"type": "text", "data": "你好呀"}] assert additional_config == {"priority_mode": "high", "source": "legacy"} - assert action_row["session_id"] == "session-1" - assert action_row["action_name"] == "search" - assert action_row["action_display_prompt"] == "执行搜索" assert tool_row["session_id"] == "session-1" assert tool_row["tool_name"] == "search" assert tool_row["tool_display_prompt"] == "执行搜索" @@ -848,8 +838,8 @@ def test_legacy_v1_migration_reports_table_progress(tmp_path: Path) -> None: migration_plan = manager.migrate(target_version=LATEST_SCHEMA_VERSION) - assert migration_plan.step_count() == 1 - assert len(reporter_instances) == 1 + assert migration_plan.step_count() == 3 + assert len(reporter_instances) == 3 reporter_events = reporter_instances[0].events assert reporter_events[0] == ("open", None, None, None) @@ -894,10 +884,6 @@ def test_initialize_database_calls_bootstrapper_before_create_all( del bind call_order.append("create_all") - def _fake_migrate_action_records() -> None: - """记录轻量补迁移调用。""" - call_order.append("migrate_action_records") - def _fake_finalize_database(migration_state: DatabaseMigrationState) -> None: """记录迁移收尾调用。 @@ -912,13 +898,11 @@ def test_initialize_database_calls_bootstrapper_before_create_all( monkeypatch.setattr(database_module._migration_bootstrapper, "prepare_database", _fake_prepare_database) monkeypatch.setattr(database_module._migration_bootstrapper, "finalize_database", _fake_finalize_database) monkeypatch.setattr(database_module.SQLModel.metadata, "create_all", _fake_create_all) - monkeypatch.setattr(database_module, "_migrate_action_records_to_tool_records", _fake_migrate_action_records) database_module.initialize_database() assert call_order == [ "prepare_database", "create_all", - "migrate_action_records", "finalize_database", ] diff --git a/pytests/test_maisaka_message_adapter.py b/pytests/test_maisaka_message_adapter.py index d872253c..de9130b1 100644 --- a/pytests/test_maisaka_message_adapter.py +++ b/pytests/test_maisaka_message_adapter.py @@ -41,7 +41,6 @@ def test_build_message_returns_session_message_with_maisaka_metadata() -> None: assert message.message_id == "maisaka-msg-1" assert message.timestamp == timestamp assert message.processed_plain_text == "展示消息内容" - assert message.display_message == "展示消息内容" assert message.raw_message is raw_message assert get_message_role(message) == "assistant" diff --git a/pytests/test_napcat_adapter_sdk.py b/pytests/test_napcat_adapter_sdk.py index c207531f..40c9aa4b 100644 --- a/pytests/test_napcat_adapter_sdk.py +++ b/pytests/test_napcat_adapter_sdk.py @@ -554,7 +554,6 @@ async def test_inbound_codec_resolves_at_to_group_cardname() -> None: ) assert message_dict["processed_plain_text"] == "@群昵称" - assert message_dict["display_message"] == "@群昵称" assert message_dict["raw_message"] == [ { "type": "at", @@ -599,7 +598,6 @@ async def test_inbound_codec_falls_back_to_qq_nickname_when_group_cardname_is_em ) assert message_dict["processed_plain_text"] == "@QQ昵称" - assert message_dict["display_message"] == "@QQ昵称" assert message_dict["raw_message"] == [ { "type": "at", @@ -640,7 +638,6 @@ async def test_inbound_codec_falls_back_to_stranger_nickname_when_group_profile_ ) assert message_dict["processed_plain_text"] == "@QQ昵称" - assert message_dict["display_message"] == "@QQ昵称" assert message_dict["raw_message"] == [ { "type": "at", diff --git a/pytests/test_plugin_message_utils_runtime.py b/pytests/test_plugin_message_utils_runtime.py index cb4b5341..82e63db2 100644 --- a/pytests/test_plugin_message_utils_runtime.py +++ b/pytests/test_plugin_message_utils_runtime.py @@ -31,7 +31,6 @@ def test_plugin_message_utils_preserves_binary_components_and_reply_metadata() - ) message.session_id = "qq:20001:10001" message.processed_plain_text = "binary payload" - message.display_message = "binary payload" message.raw_message = MessageSequence( components=[ TextComponent("hello"), diff --git a/pytests/test_send_service.py b/pytests/test_send_service.py index 5af18d1f..afad8dd6 100644 --- a/pytests/test_send_service.py +++ b/pytests/test_send_service.py @@ -298,7 +298,7 @@ async def test_private_outbound_message_preserves_bot_sender_and_receiver_user( outbound_message = send_service._build_outbound_session_message( message_sequence=MessageSequence(components=[TextComponent(text="你好")]), stream_id="test-session", - display_message="你好", + processed_plain_text="你好", ) assert outbound_message is not None @@ -329,7 +329,7 @@ async def test_group_outbound_message_preserves_bot_sender_and_target_group( outbound_message = send_service._build_outbound_session_message( message_sequence=MessageSequence(components=[TextComponent(text="大家好")]), stream_id="group-session", - display_message="大家好", + processed_plain_text="大家好", ) assert outbound_message is not None diff --git a/scripts/build_io_pairs.py b/scripts/build_io_pairs.py index 944d7671..69698ec4 100644 --- a/scripts/build_io_pairs.py +++ b/scripts/build_io_pairs.py @@ -114,14 +114,10 @@ def _merge_bucket_to_message(bucket: List[DatabaseMessages]) -> DatabaseMessages time=latest.time, chat_id=latest.chat_id, reply_to=latest.reply_to, - interest_value=latest.interest_value, - key_words=latest.key_words, - key_words_lite=latest.key_words_lite, is_mentioned=latest.is_mentioned, is_at=latest.is_at, reply_probability_boost=latest.reply_probability_boost, processed_plain_text="\n".join(merged_texts) if merged_texts else latest.processed_plain_text, - display_message=latest.display_message, priority_mode=latest.priority_mode, priority_info=latest.priority_info, additional_config=latest.additional_config, diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index 2d5f3f3a..b9b3a2b4 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -8,8 +8,6 @@ import random import re import time -import jieba - from src.chat.message_receive.chat_manager import chat_manager as _chat_manager from src.chat.message_receive.message import SessionMessage from src.common.logger import get_logger @@ -912,110 +910,3 @@ def parse_keywords_string(keywords_input) -> list[str]: return [keywords_str] if keywords_str else [] -def cut_key_words(concept_name: str) -> list[str]: - """对概念名称进行jieba分词,并过滤掉关键词列表中的关键词""" - concept_name_tokens = list(jieba.cut(concept_name)) - - # 定义常见连词、停用词与标点 - conjunctions = {"和", "与", "及", "跟", "以及", "并且", "而且", "或", "或者", "并"} - stop_words = { - "的", - "了", - "呢", - "吗", - "吧", - "啊", - "哦", - "恩", - "嗯", - "呀", - "嘛", - "哇", - "在", - "是", - "很", - "也", - "又", - "就", - "都", - "还", - "更", - "最", - "被", - "把", - "给", - "对", - "和", - "与", - "及", - "跟", - "并", - "而且", - "或者", - "或", - "以及", - } - chinese_punctuations = set(",。!?、;:()【】《》“”‘’—…·-——,.!?;:()[]<>'\"/\\") - - # 清理空白并初步过滤纯标点 - cleaned_tokens = [] - for tok in concept_name_tokens: - t = tok.strip() - if not t: - continue - # 去除纯标点 - if all(ch in chinese_punctuations for ch in t): - continue - cleaned_tokens.append(t) - - # 合并连词两侧的词(仅当两侧都存在且不是标点/停用词时) - merged_tokens = [] - i = 0 - n = len(cleaned_tokens) - while i < n: - tok = cleaned_tokens[i] - if tok in conjunctions and merged_tokens and i + 1 < n: - left = merged_tokens[-1] - right = cleaned_tokens[i + 1] - # 左右都需要是有效词 - if ( - left - and right - and left not in conjunctions - and right not in conjunctions - and left not in stop_words - and right not in stop_words - and not all(ch in chinese_punctuations for ch in left) - and not all(ch in chinese_punctuations for ch in right) - ): - # 合并为一个新词,并替换掉左侧与跳过右侧 - combined = f"{left}{tok}{right}" - merged_tokens[-1] = combined - i += 2 - continue - # 常规推进 - merged_tokens.append(tok) - i += 1 - - # 二次过滤:去除停用词、单字符纯标点与无意义项 - result_tokens = [] - seen = set() - # ban_words = set(getattr(global_config.memory, "memory_ban_words", []) or []) - for tok in merged_tokens: - if tok in conjunctions: - # 独立连词丢弃 - continue - if tok in stop_words: - continue - # if tok in ban_words: - # continue - if all(ch in chinese_punctuations for ch in tok): - continue - if tok.strip() == "": - continue - if tok not in seen: - seen.add(tok) - result_tokens.append(tok) - - filtered_concept_name_tokens = result_tokens - return filtered_concept_name_tokens diff --git a/src/cli/maisaka_cli.py b/src/cli/maisaka_cli.py index 6206bfec..931eda97 100644 --- a/src/cli/maisaka_cli.py +++ b/src/cli/maisaka_cli.py @@ -79,7 +79,6 @@ class BufferCLI: ) message.raw_message = MessageSequence([TextComponent(text=user_text)]) message.processed_plain_text = user_text - message.display_message = user_text message.initialized = True return message diff --git a/src/common/data_models/mai_message_data_model.py b/src/common/data_models/mai_message_data_model.py index 814f642b..8ae9de9e 100644 --- a/src/common/data_models/mai_message_data_model.py +++ b/src/common/data_models/mai_message_data_model.py @@ -59,7 +59,6 @@ class MaiMessage(BaseDatabaseDataModel[Messages]): self.reply_to: Optional[str] = None self.processed_plain_text: Optional[str] = None - self.display_message: Optional[str] = None self.raw_message: MessageSequence @classmethod @@ -86,7 +85,6 @@ class MaiMessage(BaseDatabaseDataModel[Messages]): obj.reply_to = db_record.reply_to obj.session_id = db_record.session_id obj.processed_plain_text = db_record.processed_plain_text - obj.display_message = db_record.display_message obj.raw_message = MessageUtils.from_db_record_msg_to_MaiSeq(db_record.raw_content) return obj @@ -113,7 +111,6 @@ class MaiMessage(BaseDatabaseDataModel[Messages]): is_notify=self.is_notify, raw_content=MessageUtils.from_MaiSeq_to_db_record_msg(self.raw_message), processed_plain_text=self.processed_plain_text, - display_message=self.display_message, additional_config=additional_config, ) diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py index 2a35ab0c..766567a0 100644 --- a/src/common/database/database_model.py +++ b/src/common/database/database_model.py @@ -51,7 +51,6 @@ class Messages(SQLModel, table=True): # 消息内容 raw_content: bytes = Field(sa_column=Column(LargeBinary)) # msgpack后的原始消息内容 processed_plain_text: Optional[str] = Field(default=None) # 平面化处理后的纯文本消息 - display_message: Optional[str] = Field(default=None) # 显示的消息内容(被放入Prompt) # 其他配置 additional_config: Optional[str] = Field(default=None) # 额外配置,JSON格式存储 diff --git a/src/common/database/migrations/builtin.py b/src/common/database/migrations/builtin.py index 501b3e3c..aa74e019 100644 --- a/src/common/database/migrations/builtin.py +++ b/src/common/database/migrations/builtin.py @@ -8,12 +8,14 @@ from .registry import MigrationRegistry from .resolver import BaseSchemaVersionDetector, SchemaVersionResolver from .schema import SQLiteSchemaInspector from .v2_to_v3 import migrate_v2_to_v3 +from .v3_to_v4 import migrate_v3_to_v4 from .version_store import SQLiteUserVersionStore EMPTY_SCHEMA_VERSION = 0 LEGACY_V1_SCHEMA_VERSION = 1 V2_SCHEMA_VERSION = 2 -LATEST_SCHEMA_VERSION = 3 +V3_SCHEMA_VERSION = 3 +LATEST_SCHEMA_VERSION = 4 _LEGACY_V1_EXCLUSIVE_TABLES = ( "chat_streams", @@ -78,9 +80,46 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector): return None if not snapshot.has_column("person_info", "user_nickname"): return None + if snapshot.has_column("mai_messages", "display_message"): + return None return LATEST_SCHEMA_VERSION +class V3SchemaVersionDetector(BaseSchemaVersionDetector): + """v3 schema 结构探测器。""" + + @property + def name(self) -> str: + return "v3_schema_detector" + + def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]: + """检测数据库是否为 v3 结构。""" + + if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES): + return None + if not all(snapshot.has_table(table_name) for table_name in _COMMON_MARKER_TABLES): + return None + if snapshot.has_table("action_records"): + return None + if snapshot.has_table("thinking_questions"): + return None + if snapshot.has_column("images", "emotion"): + return None + if not snapshot.has_column("images", "image_hash"): + return None + if not snapshot.has_column("images", "full_path"): + return None + if not snapshot.has_column("images", "image_type"): + return None + if not snapshot.has_column("chat_history", "session_id"): + return None + if not snapshot.has_column("person_info", "user_nickname"): + return None + if not snapshot.has_column("mai_messages", "display_message"): + return None + return V3_SCHEMA_VERSION + + class V2SchemaVersionDetector(BaseSchemaVersionDetector): """v2 schema 结构探测器。""" @@ -174,6 +213,7 @@ def build_default_schema_version_detectors() -> List[BaseSchemaVersionDetector]: return [ LatestSchemaVersionDetector(), + V3SchemaVersionDetector(), V2SchemaVersionDetector(), LegacyV1SchemaDetector(), ] @@ -211,10 +251,17 @@ def build_default_migration_registry() -> MigrationRegistry: ), MigrationStep( version_from=V2_SCHEMA_VERSION, - version_to=LATEST_SCHEMA_VERSION, + version_to=V3_SCHEMA_VERSION, name="v2_to_v3", description="移除废弃表,并将 emoji 标签统一收敛到 description 字段。", handler=migrate_v2_to_v3, ), + MigrationStep( + version_from=V3_SCHEMA_VERSION, + version_to=LATEST_SCHEMA_VERSION, + name="v3_to_v4", + description="移除 mai_messages.display_message 弃用列。", + handler=migrate_v3_to_v4, + ), ] ) diff --git a/src/common/database/migrations/legacy_v1_to_v2.py b/src/common/database/migrations/legacy_v1_to_v2.py index c7cc9cb8..1ce74941 100644 --- a/src/common/database/migrations/legacy_v1_to_v2.py +++ b/src/common/database/migrations/legacy_v1_to_v2.py @@ -489,9 +489,6 @@ def _build_legacy_message_additional_config(row: Mapping[str, Any]) -> Optional[ legacy_fields = { "intercept_message_level": row.get("intercept_message_level"), - "interest_value": row.get("interest_value"), - "key_words": row.get("key_words"), - "key_words_lite": row.get("key_words_lite"), "priority_info": row.get("priority_info"), "priority_mode": row.get("priority_mode"), "selected_expressions": row.get("selected_expressions"), diff --git a/src/common/database/migrations/v3_to_v4.py b/src/common/database/migrations/v3_to_v4.py new file mode 100644 index 00000000..24267349 --- /dev/null +++ b/src/common/database/migrations/v3_to_v4.py @@ -0,0 +1,155 @@ +"""v3 schema 升级到 v4 的迁移逻辑。""" + +from sqlalchemy import text +from sqlalchemy.engine import Connection + +from src.common.logger import get_logger + +from .exceptions import DatabaseMigrationExecutionError +from .models import MigrationExecutionContext +from .schema import SQLiteSchemaInspector + +logger = get_logger("database_migration") + +_V3_MESSAGES_BACKUP_TABLE = "__v3_mai_messages_backup" +_V4_MESSAGES_CREATE_SQL = """ +CREATE TABLE mai_messages ( + id INTEGER NOT NULL, + message_id VARCHAR(255) NOT NULL, + timestamp DATETIME, + platform VARCHAR(100) NOT NULL, + user_id VARCHAR(255) NOT NULL, + user_nickname VARCHAR(255) NOT NULL, + user_cardname VARCHAR(255), + group_id VARCHAR(255), + group_name VARCHAR(255), + is_mentioned BOOLEAN NOT NULL, + is_at BOOLEAN NOT NULL, + session_id VARCHAR(255) NOT NULL, + reply_to VARCHAR(255), + is_emoji BOOLEAN NOT NULL, + is_picture BOOLEAN NOT NULL, + is_command BOOLEAN NOT NULL, + is_notify BOOLEAN NOT NULL, + raw_content BLOB, + processed_plain_text VARCHAR, + additional_config VARCHAR, + PRIMARY KEY (id) +) +""" +_V4_MESSAGES_INDEX_STATEMENTS = ( + "CREATE INDEX ix_mai_messages_group_id ON mai_messages (group_id)", + "CREATE INDEX ix_mai_messages_message_id ON mai_messages (message_id)", + "CREATE INDEX ix_mai_messages_platform ON mai_messages (platform)", + "CREATE INDEX ix_mai_messages_session_id ON mai_messages (session_id)", + "CREATE INDEX ix_mai_messages_user_id ON mai_messages (user_id)", + "CREATE INDEX ix_mai_messages_user_nickname ON mai_messages (user_nickname)", +) + + +def migrate_v3_to_v4(context: MigrationExecutionContext) -> None: + """执行 v3 到 v4 的 schema 迁移。""" + + connection = context.connection + total_records = _count_table_rows(connection, "mai_messages") + context.start_progress( + total_tables=1, + total_records=total_records, + description="v3 -> v4 迁移进度", + table_unit_name="表", + record_unit_name="记录", + ) + + migrated_message_rows = _migrate_messages_table_to_v4(connection) + context.advance_progress( + records=migrated_message_rows, + completed_tables=1, + item_name="mai_messages", + ) + + logger.info(f"v3 -> v4 数据库迁移完成: mai_messages重建={migrated_message_rows}") + + +def _count_table_rows(connection: Connection, table_name: str) -> int: + """统计表记录数,不存在时返回 0。""" + + schema_inspector = SQLiteSchemaInspector() + if not schema_inspector.table_exists(connection, table_name): + return 0 + row = connection.execute(text(f'SELECT COUNT(*) FROM "{table_name}"')).first() + return int(row[0]) if row else 0 + + +def _migrate_messages_table_to_v4(connection: Connection) -> int: + """重建 ``mai_messages`` 表并移除弃用的 ``display_message`` 列。""" + + schema_inspector = SQLiteSchemaInspector() + if not schema_inspector.table_exists(connection, "mai_messages"): + return 0 + if not schema_inspector.get_table_schema(connection, "mai_messages").has_column("display_message"): + return _count_table_rows(connection, "mai_messages") + if schema_inspector.table_exists(connection, _V3_MESSAGES_BACKUP_TABLE): + raise DatabaseMigrationExecutionError( + f"检测到残留备份表 {_V3_MESSAGES_BACKUP_TABLE},无法安全执行 v3 -> v4 mai_messages 迁移。" + ) + + connection.exec_driver_sql(f'ALTER TABLE "mai_messages" RENAME TO "{_V3_MESSAGES_BACKUP_TABLE}"') + connection.exec_driver_sql(_V4_MESSAGES_CREATE_SQL) + + connection.execute( + text( + f""" + INSERT INTO mai_messages ( + id, + message_id, + timestamp, + platform, + user_id, + user_nickname, + user_cardname, + group_id, + group_name, + is_mentioned, + is_at, + session_id, + reply_to, + is_emoji, + is_picture, + is_command, + is_notify, + raw_content, + processed_plain_text, + additional_config + ) + SELECT + id, + message_id, + timestamp, + platform, + user_id, + user_nickname, + user_cardname, + group_id, + group_name, + is_mentioned, + is_at, + session_id, + reply_to, + is_emoji, + is_picture, + is_command, + is_notify, + raw_content, + COALESCE(NULLIF(processed_plain_text, ''), display_message), + additional_config + FROM "{_V3_MESSAGES_BACKUP_TABLE}" + ORDER BY id + """ + ) + ) + + migrated_rows = _count_table_rows(connection, "mai_messages") + connection.exec_driver_sql(f'DROP TABLE "{_V3_MESSAGES_BACKUP_TABLE}"') + for statement in _V4_MESSAGES_INDEX_STATEMENTS: + connection.exec_driver_sql(statement) + return migrated_rows diff --git a/src/maisaka/builtin_tool/reply.py b/src/maisaka/builtin_tool/reply.py index e53f8adf..5d247b34 100644 --- a/src/maisaka/builtin_tool/reply.py +++ b/src/maisaka/builtin_tool/reply.py @@ -207,7 +207,7 @@ async def handle_tool( sent_message = await send_service._send_to_target_with_message( message_sequence=reply_sequence, stream_id=tool_ctx.runtime.session_id, - display_message=segment, + processed_plain_text=segment, set_reply=segment_set_quote, reply_message=target_message if segment_set_quote else None, selected_expressions=reply_result.selected_expression_ids or None, diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py index e2282326..117673ea 100644 --- a/src/maisaka/reasoning_engine.py +++ b/src/maisaka/reasoning_engine.py @@ -53,7 +53,6 @@ if TYPE_CHECKING: logger = get_logger("maisaka_reasoning_engine") TIMING_GATE_CONTEXT_DROP_HEAD_RATIO = 0.7 -TIMING_GATE_MAX_TOKENS = 384 TIMING_GATE_MAX_ATTEMPTS = 3 TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"} HISTORY_SILENT_TOOL_NAMES = {"finish"} @@ -140,7 +139,6 @@ class MaisakaReasoningEngine: system_prompt=system_prompt, request_kind="timing_gate", interrupt_flag=None, - max_tokens=TIMING_GATE_MAX_TOKENS, tool_definitions=tool_definitions, ) diff --git a/src/plugin_runtime/capabilities/core.py b/src/plugin_runtime/capabilities/core.py index 25f2a6b9..f445b4fc 100644 --- a/src/plugin_runtime/capabilities/core.py +++ b/src/plugin_runtime/capabilities/core.py @@ -190,7 +190,7 @@ class RuntimeCoreCapabilityMixin: content=command, stream_id=stream_id, storage_message=bool(args.get("storage_message", True)), - display_message=str(args.get("display_message", "")), + processed_plain_text=str(args.get("processed_plain_text", "")), sync_to_maisaka_history=sync_to_maisaka_history, maisaka_source_kind=maisaka_source_kind, ) @@ -228,7 +228,7 @@ class RuntimeCoreCapabilityMixin: message_type=message_type, content=content, stream_id=stream_id, - display_message=str(args.get("display_message", "")), + processed_plain_text=str(args.get("processed_plain_text", "")), typing=bool(args.get("typing", False)), storage_message=bool(args.get("storage_message", True)), sync_to_maisaka_history=sync_to_maisaka_history, diff --git a/src/plugin_runtime/capabilities/data.py b/src/plugin_runtime/capabilities/data.py index 3736230a..616dc9b3 100644 --- a/src/plugin_runtime/capabilities/data.py +++ b/src/plugin_runtime/capabilities/data.py @@ -296,11 +296,13 @@ class RuntimeDataCapabilityMixin: return {"success": False, "error": str(e)} @staticmethod - def _serialize_messages(messages: list) -> List[Any]: + def _serialize_messages(messages: list, include_binary_data: bool = True) -> List[Any]: result: List[Any] = [] for msg in messages: if all(hasattr(msg, attr) for attr in ("message_id", "timestamp", "platform", "message_info", "raw_message")): - result.append(dict(PluginMessageUtils._session_message_to_dict(msg))) + result.append( + dict(PluginMessageUtils._session_message_to_dict(msg, include_binary_data=include_binary_data)) + ) elif hasattr(msg, "model_dump"): result.append(msg.model_dump()) elif hasattr(msg, "__dict__"): @@ -321,7 +323,12 @@ class RuntimeDataCapabilityMixin: message_id=message_id, chat_id=str(args.get("chat_id") or args.get("stream_id") or "").strip() or None, ) - serialized_message = self._serialize_messages([message])[0] if message is not None else None + include_binary_data = bool(args.get("include_binary_data", False)) + serialized_message = ( + self._serialize_messages([message], include_binary_data=include_binary_data)[0] + if message is not None + else None + ) return {"success": True, "message": serialized_message} except Exception as e: logger.error(f"[cap.message.get_by_id] 执行失败: {e}", exc_info=True) @@ -338,7 +345,13 @@ class RuntimeDataCapabilityMixin: limit_mode=args.get("limit_mode", "latest"), filter_mai=args.get("filter_mai", False), ) - return {"success": True, "messages": self._serialize_messages(messages)} + return { + "success": True, + "messages": self._serialize_messages( + messages, + include_binary_data=bool(args.get("include_binary_data", False)), + ), + } except Exception as e: logger.error(f"[cap.message.get_by_time] 执行失败: {e}", exc_info=True) return {"success": False, "error": str(e)} @@ -360,7 +373,13 @@ class RuntimeDataCapabilityMixin: filter_mai=args.get("filter_mai", False), filter_command=args.get("filter_command", False), ) - return {"success": True, "messages": self._serialize_messages(messages)} + return { + "success": True, + "messages": self._serialize_messages( + messages, + include_binary_data=bool(args.get("include_binary_data", False)), + ), + } except Exception as e: logger.error(f"[cap.message.get_by_time_in_chat] 执行失败: {e}", exc_info=True) return {"success": False, "error": str(e)} @@ -385,7 +404,13 @@ class RuntimeDataCapabilityMixin: limit_mode=args.get("limit_mode", "latest"), filter_mai=args.get("filter_mai", False), ) - return {"success": True, "messages": self._serialize_messages(messages)} + return { + "success": True, + "messages": self._serialize_messages( + messages, + include_binary_data=bool(args.get("include_binary_data", False)), + ), + } except Exception as e: logger.error(f"[cap.message.get_recent] 执行失败: {e}", exc_info=True) return {"success": False, "error": str(e)} diff --git a/src/plugin_runtime/host/message_utils.py b/src/plugin_runtime/host/message_utils.py index cc31824b..7f72fff3 100644 --- a/src/plugin_runtime/host/message_utils.py +++ b/src/plugin_runtime/host/message_utils.py @@ -56,12 +56,14 @@ class MessageDict(TypedDict, total=False): session_id: str reply_to: Optional[str] processed_plain_text: Optional[str] - display_message: Optional[str] class PluginMessageUtils: @staticmethod - def _message_sequence_to_dict(message_sequence: MessageSequence) -> List[Dict[str, Any]]: + def _message_sequence_to_dict( + message_sequence: MessageSequence, + include_binary_data: bool = True, + ) -> List[Dict[str, Any]]: """将消息组件序列转换为插件运行时使用的字典结构。 Args: @@ -70,10 +72,16 @@ class PluginMessageUtils: Returns: List[Dict[str, Any]]: 供插件运行时协议使用的消息段字典列表。 """ - return [PluginMessageUtils._component_to_dict(component) for component in message_sequence.components] + return [ + PluginMessageUtils._component_to_dict(component, include_binary_data=include_binary_data) + for component in message_sequence.components + ] @staticmethod - def _component_to_dict(component: StandardMessageComponents) -> Dict[str, Any]: + def _component_to_dict( + component: StandardMessageComponents, + include_binary_data: bool = True, + ) -> Dict[str, Any]: """将单个消息组件转换为插件运行时字典结构。 Args: @@ -91,8 +99,10 @@ class PluginMessageUtils: "data": component.content, "hash": component.binary_hash, } - if component.binary_data: - serialized["binary_data_base64"] = base64.b64encode(component.binary_data).decode("utf-8") + if include_binary_data and ( + binary_data_base64 := PluginMessageUtils._binary_component_to_base64(component, "image") + ): + serialized["binary_data_base64"] = binary_data_base64 return serialized if isinstance(component, EmojiComponent): @@ -101,8 +111,10 @@ class PluginMessageUtils: "data": component.content, "hash": component.binary_hash, } - if component.binary_data: - serialized["binary_data_base64"] = base64.b64encode(component.binary_data).decode("utf-8") + if include_binary_data and ( + binary_data_base64 := PluginMessageUtils._binary_component_to_base64(component, "emoji") + ): + serialized["binary_data_base64"] = binary_data_base64 return serialized if isinstance(component, VoiceComponent): @@ -111,7 +123,7 @@ class PluginMessageUtils: "data": component.content, "hash": component.binary_hash, } - if component.binary_data: + if include_binary_data and component.binary_data: serialized["binary_data_base64"] = base64.b64encode(component.binary_data).decode("utf-8") return serialized @@ -140,13 +152,53 @@ class PluginMessageUtils: if isinstance(component, ForwardNodeComponent): return { "type": "forward", - "data": [PluginMessageUtils._forward_component_to_dict(item) for item in component.forward_components], + "data": [ + PluginMessageUtils._forward_component_to_dict(item, include_binary_data=include_binary_data) + for item in component.forward_components + ], } return {"type": "dict", "data": component.data} @staticmethod - def _forward_component_to_dict(component: ForwardComponent) -> Dict[str, Any]: + def _binary_component_to_base64(component: Any, image_type: str) -> str: + """将图片或表情组件转换为 Base64,必要时通过 hash 从图片库加载文件。""" + + if component.binary_data: + return base64.b64encode(component.binary_data).decode("utf-8") + + binary_hash = str(component.binary_hash or "").strip() + if not binary_hash: + return "" + + try: + from pathlib import Path + + from sqlmodel import select + + from src.common.database.database import get_db_session + from src.common.database.database_model import Images, ImageType + + target_image_type = ImageType.IMAGE if image_type == "image" else ImageType.EMOJI + with get_db_session(auto_commit=False) as db: + statement = select(Images).filter_by(image_hash=binary_hash, image_type=target_image_type).limit(1) + image_record = db.exec(statement).first() + if image_record is None or image_record.no_file_flag: + return "" + + image_path = Path(image_record.full_path) + if not image_path.is_file(): + return "" + return base64.b64encode(image_path.read_bytes()).decode("utf-8") + except Exception as exc: + logger.debug("通过 hash 加载历史媒体失败: type=%s hash=%s error=%s", image_type, binary_hash, exc) + return "" + + @staticmethod + def _forward_component_to_dict( + component: ForwardComponent, + include_binary_data: bool = True, + ) -> Dict[str, Any]: """将单个转发节点组件转换为字典结构。 Args: @@ -160,7 +212,10 @@ class PluginMessageUtils: "user_nickname": component.user_nickname, "user_cardname": component.user_cardname, "message_id": component.message_id, - "content": [PluginMessageUtils._component_to_dict(item) for item in component.content], + "content": [ + PluginMessageUtils._component_to_dict(item, include_binary_data=include_binary_data) + for item in component.content + ], } @staticmethod @@ -341,7 +396,10 @@ class PluginMessageUtils: ) @staticmethod - def _session_message_to_dict(session_message: SessionMessage) -> MessageDict: + def _session_message_to_dict( + session_message: SessionMessage, + include_binary_data: bool = True, + ) -> MessageDict: """ 将 SessionMessage 对象转换为字典格式(复用 MessageSequence.to_dict 方法) @@ -357,7 +415,10 @@ class PluginMessageUtils: timestamp=str(session_message.timestamp.timestamp()), # 转换为时间戳字符串 platform=session_message.platform, message_info=PluginMessageUtils._message_info_to_dict(session_message.message_info), - raw_message=PluginMessageUtils._message_sequence_to_dict(session_message.raw_message), + raw_message=PluginMessageUtils._message_sequence_to_dict( + session_message.raw_message, + include_binary_data=include_binary_data, + ), is_mentioned=session_message.is_mentioned, is_at=session_message.is_at, is_emoji=session_message.is_emoji, @@ -372,8 +433,6 @@ class PluginMessageUtils: message_dict["reply_to"] = session_message.reply_to if session_message.processed_plain_text is not None: message_dict["processed_plain_text"] = session_message.processed_plain_text - if session_message.display_message is not None: - message_dict["display_message"] = session_message.display_message return message_dict @@ -485,8 +544,5 @@ class PluginMessageUtils: session_message.processed_plain_text, str ): session_message.processed_plain_text = None - session_message.display_message = message_dict.get("display_message") - if session_message.display_message is not None and not isinstance(session_message.display_message, str): - session_message.display_message = None return session_message diff --git a/src/services/message_service.py b/src/services/message_service.py index 4a0d2b6f..257ff72f 100644 --- a/src/services/message_service.py +++ b/src/services/message_service.py @@ -43,8 +43,6 @@ def _build_readable_line( def _normalize_messages(messages: List[SessionMessage]) -> List[SessionMessage]: normalized: List[SessionMessage] = [] for message in messages: - if not message.processed_plain_text: - message.processed_plain_text = message.display_message or "" normalized.append(message) return normalized diff --git a/src/services/send_service.py b/src/services/send_service.py index c6eedffa..bd2e986d 100644 --- a/src/services/send_service.py +++ b/src/services/send_service.py @@ -73,9 +73,9 @@ def register_send_service_hook_specs(registry: HookSpecRegistry) -> List[HookSpe "type": "string", "description": "目标会话 ID。", }, - "display_message": { + "processed_plain_text": { "type": "string", - "description": "展示层文本。", + "description": "可选的预处理纯文本内容。", }, "typing": { "type": "boolean", @@ -97,7 +97,7 @@ def register_send_service_hook_specs(registry: HookSpecRegistry) -> List[HookSpe required=[ "message", "stream_id", - "display_message", + "processed_plain_text", "typing", "set_reply", "storage_message", @@ -494,7 +494,7 @@ def _build_outbound_log_preview(message: SessionMessage, max_length: int = 160) Returns: str: 适用于日志展示的消息摘要。 """ - preview_text = (message.processed_plain_text or message.display_message or "").strip() + preview_text = (message.processed_plain_text or "").strip() if not preview_text: preview_text = f"[{_describe_message_sequence(message.raw_message)}]" @@ -507,7 +507,7 @@ def _build_outbound_log_preview(message: SessionMessage, max_length: int = 160) def _build_outbound_session_message( message_sequence: MessageSequence, stream_id: str, - display_message: str = "", + processed_plain_text: str = "", reply_message: Optional[MaiMessage] = None, selected_expressions: Optional[List[int]] = None, ) -> Optional[SessionMessage]: @@ -516,7 +516,7 @@ def _build_outbound_session_message( Args: message_sequence: 待发送的消息组件序列。 stream_id: 目标会话 ID。 - display_message: 用于界面展示的文本内容。 + processed_plain_text: 可选的预处理纯文本内容。 reply_message: 被回复的锚点消息。 selected_expressions: 可选的表情候选索引列表。 @@ -571,7 +571,7 @@ def _build_outbound_session_message( ) outbound_message.raw_message = _clone_message_sequence(message_sequence) outbound_message.session_id = target_stream.session_id - outbound_message.display_message = display_message + outbound_message.processed_plain_text = processed_plain_text.strip() or _build_processed_plain_text(outbound_message) outbound_message.reply_to = anchor_message.message_id if anchor_message is not None else None message_flags = _detect_outbound_message_flags(outbound_message.raw_message) outbound_message.is_emoji = message_flags["is_emoji"] @@ -619,7 +619,8 @@ async def _prepare_message_for_platform_io( raise ValueError("set_reply=True 时必须提供 reply_message_id") _ensure_reply_component(message, reply_message_id) - message.processed_plain_text = _build_processed_plain_text(message) + if set_reply or not message.processed_plain_text: + message.processed_plain_text = _build_processed_plain_text(message) if typing: typing_time = calculate_typing_time( input_string=message.processed_plain_text or "", @@ -935,7 +936,7 @@ async def send_session_message( async def _send_to_target( message_sequence: MessageSequence, stream_id: str, - display_message: str = "", + processed_plain_text: str = "", typing: bool = False, set_reply: bool = False, reply_message: Optional[MaiMessage] = None, @@ -950,7 +951,7 @@ async def _send_to_target( await _send_to_target_with_message( message_sequence=message_sequence, stream_id=stream_id, - display_message=display_message, + processed_plain_text=processed_plain_text, typing=typing, set_reply=set_reply, reply_message=reply_message, @@ -967,7 +968,7 @@ async def _send_to_target( async def _send_to_target_with_message( message_sequence: MessageSequence, stream_id: str, - display_message: str = "", + processed_plain_text: str = "", typing: bool = False, set_reply: bool = False, reply_message: Optional[MaiMessage] = None, @@ -982,7 +983,7 @@ async def _send_to_target_with_message( Args: message_sequence: 待发送的消息组件序列。 stream_id: 目标会话 ID。 - display_message: 用于界面展示的文本内容。 + processed_plain_text: 可选的预处理纯文本内容。 typing: 是否显示输入中状态。 set_reply: 是否在发送时附带引用回复。 reply_message: 被回复的消息对象。 @@ -1004,7 +1005,7 @@ async def _send_to_target_with_message( outbound_message = _build_outbound_session_message( message_sequence=message_sequence, stream_id=stream_id, - display_message=display_message, + processed_plain_text=processed_plain_text, reply_message=reply_message, selected_expressions=selected_expressions, ) @@ -1015,7 +1016,7 @@ async def _send_to_target_with_message( "send_service.after_build_message", outbound_message, stream_id=stream_id, - display_message=display_message, + processed_plain_text=processed_plain_text, typing=typing, set_reply=set_reply, storage_message=storage_message, @@ -1068,7 +1069,6 @@ async def text_to_stream_with_message( return await _send_to_target_with_message( message_sequence=MessageSequence(components=[TextComponent(text=text)]), stream_id=stream_id, - display_message="", typing=typing, set_reply=set_reply, reply_message=reply_message, @@ -1133,7 +1133,6 @@ async def emoji_to_stream_with_message( return await _send_to_target_with_message( message_sequence=_build_message_sequence_from_custom_message("emoji", emoji_base64), stream_id=stream_id, - display_message="", typing=False, storage_message=storage_message, set_reply=set_reply, @@ -1202,7 +1201,6 @@ async def image_to_stream( return await _send_to_target( message_sequence=_build_message_sequence_from_custom_message("image", image_base64), stream_id=stream_id, - display_message="", typing=False, storage_message=storage_message, set_reply=set_reply, @@ -1216,7 +1214,7 @@ async def custom_to_stream( message_type: str, content: str | Dict[str, Any], stream_id: str, - display_message: str = "", + processed_plain_text: str = "", typing: bool = False, reply_message: Optional[MaiMessage] = None, set_reply: bool = False, @@ -1231,7 +1229,7 @@ async def custom_to_stream( message_type: 自定义消息类型。 content: 自定义消息内容。 stream_id: 目标会话 ID。 - display_message: 用于展示的文本内容。 + processed_plain_text: 可选的预处理纯文本内容。 typing: 是否显示输入中状态。 reply_message: 被回复的消息对象。 set_reply: 是否附带引用回复。 @@ -1244,7 +1242,7 @@ async def custom_to_stream( return await _send_to_target( message_sequence=_build_message_sequence_from_custom_message(message_type, content), stream_id=stream_id, - display_message=display_message, + processed_plain_text=processed_plain_text, typing=typing, reply_message=reply_message, set_reply=set_reply, @@ -1258,7 +1256,7 @@ async def custom_to_stream( async def custom_reply_set_to_stream( reply_set: MessageSequence, stream_id: str, - display_message: str = "", + processed_plain_text: str = "", typing: bool = False, reply_message: Optional[MaiMessage] = None, set_reply: bool = False, @@ -1272,7 +1270,7 @@ async def custom_reply_set_to_stream( Args: reply_set: 待发送的消息组件序列。 stream_id: 目标会话 ID。 - display_message: 用于展示的文本内容。 + processed_plain_text: 可选的预处理纯文本内容。 typing: 是否显示输入中状态。 reply_message: 被回复的消息对象。 set_reply: 是否附带引用回复。 @@ -1285,7 +1283,7 @@ async def custom_reply_set_to_stream( return await _send_to_target( message_sequence=reply_set, stream_id=stream_id, - display_message=display_message, + processed_plain_text=processed_plain_text, typing=typing, reply_message=reply_message, set_reply=set_reply, diff --git a/src/webui/routers/chat/service.py b/src/webui/routers/chat/service.py index 168d3190..7a35a8c3 100644 --- a/src/webui/routers/chat/service.py +++ b/src/webui/routers/chat/service.py @@ -112,7 +112,7 @@ class ChatHistoryManager: return { "id": msg.message_id, "type": "bot" if is_bot else "user", - "content": msg.processed_plain_text or msg.display_message or "", + "content": msg.processed_plain_text or "", "timestamp": msg.timestamp.timestamp(), "sender_name": user_info.user_nickname or (global_config.bot.nickname if is_bot else "未知用户"), "sender_id": "bot" if is_bot else user_id, @@ -175,11 +175,7 @@ class ChatHistoryManager: user_info = target_msg.message_info.user_info if not has_content: - content_text = ( - target_msg.processed_plain_text - or target_msg.display_message - or "" - ) + content_text = target_msg.processed_plain_text or "" data["target_message_content"] = content_text if not has_sender: data["target_message_sender_id"] = user_info.user_id or ""