diff --git a/pytests/A_memorix_test/test_legacy_config_migration.py b/pytests/A_memorix_test/test_legacy_config_migration.py index c382e4f3..15599241 100644 --- a/pytests/A_memorix_test/test_legacy_config_migration.py +++ b/pytests/A_memorix_test/test_legacy_config_migration.py @@ -33,3 +33,34 @@ def test_legacy_learning_list_with_numeric_fourth_column_is_migrated(): "enable_jargon_learning": False, }, ] + + +def test_visual_multimodal_replyer_is_migrated_to_replyer_mode() -> None: + payload = { + "visual": { + "multimodal_replyer": True, + } + } + + result = try_migrate_legacy_bot_config_dict(payload) + + assert result.migrated is True + assert "visual.multimodal_replyer_moved_to_visual.replyer_mode" in result.reason + assert result.data["visual"]["replyer_mode"] == "multimodal" + assert "multimodal_replyer" not in result.data["visual"] + + +def test_chat_replyer_generator_type_is_migrated_to_replyer_mode() -> None: + payload = { + "chat": { + "replyer_generator_type": "legacy", + }, + "visual": {}, + } + + result = try_migrate_legacy_bot_config_dict(payload) + + assert result.migrated is True + assert "chat.replyer_generator_type_moved_to_visual.replyer_mode" in result.reason + assert result.data["visual"]["replyer_mode"] == "text" + assert "replyer_generator_type" not in result.data["chat"] diff --git a/src/chat/replyer/maisaka_generator.py b/src/chat/replyer/maisaka_generator.py index 166b03c6..45fd722a 100644 --- a/src/chat/replyer/maisaka_generator.py +++ b/src/chat/replyer/maisaka_generator.py @@ -1,10 +1,8 @@ -from datetime import datetime from typing import Any, Callable, Optional from src.chat.message_receive.chat_manager import BotChatSession from src.common.prompt_i18n import load_prompt from src.config.config import global_config -from src.maisaka.context_messages import SessionBackedMessage from src.services.llm_service import LLMServiceClient from .maisaka_generator_base import BaseMaisakaReplyGenerator @@ -26,9 +24,6 @@ class MaisakaReplyGenerator(BaseMaisakaReplyGenerator): request_type=request_type, llm_client_cls=llm_client_cls or LLMServiceClient, load_prompt_func=load_prompt_func or load_prompt, - enable_visual_message=( - global_config.visual.multimodal_replyer - if enable_visual_message is None - else enable_visual_message - ), + enable_visual_message=enable_visual_message, + replyer_mode=global_config.visual.replyer_mode, ) diff --git a/src/chat/replyer/maisaka_generator_base.py b/src/chat/replyer/maisaka_generator_base.py index 3178647e..06833997 100644 --- a/src/chat/replyer/maisaka_generator_base.py +++ b/src/chat/replyer/maisaka_generator_base.py @@ -1,7 +1,7 @@ import time from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple +from typing import Any, Awaitable, Callable, Dict, List, Literal, Optional, Tuple import random @@ -23,6 +23,7 @@ from src.common.data_models.reply_generation_data_models import ( from src.common.logger import get_logger from src.common.utils.utils_session import SessionUtils from src.config.config import global_config +from src.config.model_configs import ModelInfo from src.core.types import ActionInfo from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType from src.maisaka.context_messages import ( @@ -59,13 +60,15 @@ class BaseMaisakaReplyGenerator: request_type: str = "maisaka_replyer", llm_client_cls: Any, load_prompt_func: Callable[..., str], - enable_visual_message: bool, + enable_visual_message: Optional[bool], + replyer_mode: Literal["text", "multimodal", "auto"], ) -> None: self.chat_stream = chat_stream self.request_type = request_type self._llm_client_cls = llm_client_cls self._load_prompt = load_prompt_func self._enable_visual_message = enable_visual_message + self._replyer_mode = replyer_mode self.express_model = llm_client_cls( task_name="replyer", request_type=request_type, @@ -265,8 +268,9 @@ class BaseMaisakaReplyGenerator: def _build_visual_user_message( self, message: SessionBackedMessage, + enable_visual_message: bool, ) -> Optional[Message]: - if not self._enable_visual_message: + if not enable_visual_message: return None raw_message = clone_message_sequence(message.raw_message) @@ -283,7 +287,11 @@ class BaseMaisakaReplyGenerator: ) return visual_message.to_llm_message() - def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]: + def _build_history_messages( + self, + chat_history: List[LLMContextMessage], + enable_visual_message: bool, + ) -> List[Message]: bot_nickname = global_config.bot.nickname.strip() or "Bot" default_user_name = global_config.maisaka.cli_user_name.strip() or "User" messages: List[Message] = [] @@ -300,7 +308,7 @@ class BaseMaisakaReplyGenerator: ) continue - visual_message = self._build_visual_user_message(message) + visual_message = self._build_visual_user_message(message, enable_visual_message) if visual_message is not None: messages.append(visual_message) continue @@ -337,6 +345,7 @@ class BaseMaisakaReplyGenerator: reply_reason: str, expression_habits: str = "", stream_id: Optional[str] = None, + enable_visual_message: bool = False, ) -> List[Message]: messages: List[Message] = [] system_prompt = self._build_system_prompt( @@ -348,10 +357,21 @@ class BaseMaisakaReplyGenerator: instruction = self._build_reply_instruction() messages.append(MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build()) - messages.extend(self._build_history_messages(chat_history)) + messages.extend(self._build_history_messages(chat_history, enable_visual_message)) messages.append(MessageBuilder().set_role(RoleType.User).add_text_content(instruction).build()) return messages + def _resolve_enable_visual_message(self, model_info: Optional[ModelInfo] = None) -> bool: + if self._enable_visual_message is not None: + return self._enable_visual_message + if self._replyer_mode == "multimodal": + if model_info is not None and not model_info.visual: + raise ValueError(f"replyer_mode=multimodal,但模型 '{model_info.name}' 未开启 visual,无法使用多模态 replyer") + return True + if self._replyer_mode == "text": + return False + return bool(model_info.visual) if model_info is not None else False + def _resolve_session_id(self, stream_id: Optional[str]) -> str: if stream_id: return stream_id @@ -494,7 +514,19 @@ class BaseMaisakaReplyGenerator: show_replyer_prompt = bool(getattr(global_config.debug, "show_replyer_prompt", False)) show_replyer_reasoning = bool(getattr(global_config.debug, "show_replyer_reasoning", False)) - def message_factory(_client: object) -> List[Message]: + def message_factory(_client: object, model_info: Optional[ModelInfo] = None) -> List[Message]: + nonlocal prompt_ms, prompt_preview, request_messages + prompt_started_at = time.perf_counter() + request_messages = self._build_request_messages( + chat_history=filtered_history, + reply_message=reply_message, + reply_reason=reply_reason or "", + expression_habits=merged_expression_habits, + stream_id=stream_id, + enable_visual_message=self._resolve_enable_visual_message(model_info), + ) + prompt_ms = round((time.perf_counter() - prompt_started_at) * 1000, 2) + prompt_preview = PromptCLIVisualizer._build_prompt_dump_text(request_messages) return request_messages result.completion.request_prompt = prompt_preview @@ -531,6 +563,8 @@ class BaseMaisakaReplyGenerator: ) return finalize(False) + result.completion.request_prompt = prompt_preview + result.request_messages = serialize_prompt_messages(request_messages) llm_ms = round((time.perf_counter() - llm_started_at) * 1000, 2) response_text = (generation_result.response or "").strip() result.success = bool(response_text) diff --git a/src/chat/replyer/replyer_manager.py b/src/chat/replyer/replyer_manager.py index 8afb8c20..bd9bf9d3 100644 --- a/src/chat/replyer/replyer_manager.py +++ b/src/chat/replyer/replyer_manager.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import Any, Dict, Optional from src.chat.message_receive.chat_manager import BotChatSession, chat_manager as _chat_manager from src.config.config import global_config @@ -6,10 +6,6 @@ from src.common.logger import get_logger from .maisaka_generator import MaisakaReplyGenerator -if TYPE_CHECKING: - from src.chat.replyer.group_generator import DefaultReplyer - from src.chat.replyer.private_generator import PrivateReplyer - logger = get_logger("ReplyerManager") @@ -22,7 +18,7 @@ class ReplyerManager: @staticmethod def _get_maisaka_generator_type() -> str: """返回当前配置下 Maisaka replyer 的消息模式。""" - return "multimodal" if global_config.visual.multimodal_replyer else "legacy" + return global_config.visual.replyer_mode def get_replyer( self, @@ -30,7 +26,7 @@ class ReplyerManager: chat_id: Optional[str] = None, request_type: str = "replyer", replyer_type: str = "default", - ) -> Optional["DefaultReplyer | PrivateReplyer | Any"]: + ) -> Optional[MaisakaReplyGenerator]: """按会话和 replyer 类型获取实例。""" stream_id = chat_stream.session_id if chat_stream else chat_id if not stream_id: diff --git a/src/common/data_models/llm_service_data_models.py b/src/common/data_models/llm_service_data_models.py index 415707b0..4326d410 100644 --- a/src/common/data_models/llm_service_data_models.py +++ b/src/common/data_models/llm_service_data_models.py @@ -14,7 +14,6 @@ from src.llm_models.payload_content.resp_format import RespFormat from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput if TYPE_CHECKING: - from src.llm_models.model_client.base_client import BaseClient from src.llm_models.payload_content.message import Message @@ -24,7 +23,7 @@ PromptMessage: TypeAlias = Dict[str, Any] PromptInput: TypeAlias = str | List[PromptMessage] """统一的提示输入类型。""" -MessageFactory: TypeAlias = Callable[["BaseClient"], List["Message"]] +MessageFactory: TypeAlias = Callable[..., List["Message"]] """统一的消息工厂类型。""" diff --git a/src/config/config.py b/src/config/config.py index 6adcf706..1bb4776e 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -54,7 +54,7 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config" BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute() MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute() MMC_VERSION: str = "1.0.0" -CONFIG_VERSION: str = "8.5.5" +CONFIG_VERSION: str = "8.6.0" MODEL_CONFIG_VERSION: str = "1.14.0" logger = get_logger("config") diff --git a/src/config/legacy_migration.py b/src/config/legacy_migration.py index bd9eb302..8dc2ff98 100644 --- a/src/config/legacy_migration.py +++ b/src/config/legacy_migration.py @@ -253,6 +253,8 @@ def _migrate_target_item_list(parent: dict[str, Any], key: str) -> bool: raw = _as_list(parent.get(key)) if raw is None: return False + if not raw: + return False if raw and all(isinstance(i, dict) for i in raw): return False targets: list[dict[str, str]] = [] @@ -285,18 +287,18 @@ def _migrate_extra_prompt_list(exp: dict[str, Any], key: str) -> bool: return True -def _parse_multimodal_replyer(v: Any) -> Optional[bool]: +def _parse_replyer_mode(v: Any) -> Optional[str]: """兼容旧 replyer_generator_type 到布尔开关的迁移。""" if isinstance(v, bool): - return v + return "multimodal" if v else "text" if not isinstance(v, str): return None normalized_value = v.strip().lower() - if normalized_value == "multimodal": - return True + if normalized_value in {"text", "multimodal", "auto"}: + return normalized_value if normalized_value == "legacy": - return False + return "text" return None @@ -403,14 +405,23 @@ def try_migrate_legacy_bot_config_dict(data: dict[str, Any]) -> MigrationResult: migrated_any = True reasons.append("chat.multimodal_planner_moved_to_visual.multimodal_planner") + if visual is not None and "multimodal_replyer" in visual: + replyer_mode = _parse_replyer_mode(visual.get("multimodal_replyer")) + if "replyer_mode" not in visual and replyer_mode is not None: + visual["replyer_mode"] = replyer_mode + if "replyer_mode" in visual: + visual.pop("multimodal_replyer", None) + migrated_any = True + reasons.append("visual.multimodal_replyer_moved_to_visual.replyer_mode") + if visual is not None and "replyer_generator_type" in chat: - multimodal_replyer = _parse_multimodal_replyer(chat["replyer_generator_type"]) - if "multimodal_replyer" not in visual and multimodal_replyer is not None: - visual["multimodal_replyer"] = multimodal_replyer - if "multimodal_replyer" in visual: + replyer_mode = _parse_replyer_mode(chat["replyer_generator_type"]) + if "replyer_mode" not in visual and replyer_mode is not None: + visual["replyer_mode"] = replyer_mode + if "replyer_mode" in visual: chat.pop("replyer_generator_type", None) migrated_any = True - reasons.append("chat.replyer_generator_type_moved_to_visual.multimodal_replyer") + reasons.append("chat.replyer_generator_type_moved_to_visual.replyer_mode") maisaka = _as_dict(data.get("maisaka")) mem = _as_dict(data.get("memory")) diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 093c965c..d4313849 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -152,16 +152,16 @@ class VisualConfig(ConfigBase): "x-icon": "image", }, ) - """是否直接输入图片""" + """是否启用多模态planner""" - multimodal_replyer: bool = Field( - default=False, + replyer_mode: Literal["text", "multimodal", "auto"] = Field( + default="auto", json_schema_extra={ - "x-widget": "switch", + "x-widget": "select", "x-icon": "git-branch", }, ) - """是否启用 Maisaka 多模态 replyer 生成器""" + """回复器模式,auto根据模型信息自动选择,text为纯文本模式,multimodal为多模态模式""" visual_style: str = Field( default="请用中文描述这张图片的内容。如果有文字,请把文字描述概括出来,请留意其主题,直观感受,输出为一段平文本,最多30字,请注意不要分点,就输出一段文本", diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index c550f311..78cd5cae 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -3,6 +3,7 @@ from enum import Enum from typing import Any, Callable, Dict, List, Optional, Set, Tuple import asyncio +import inspect import random import re import time @@ -910,7 +911,11 @@ class LLMOrchestrator: model_info, api_provider, client = self._select_model(exclude_models=failed_models_this_request) message_list = [] if message_factory: - message_list = message_factory(client) + parameter_count = len(inspect.signature(message_factory).parameters) + if parameter_count >= 2: + message_list = message_factory(client, model_info) + else: + message_list = message_factory(client) try: request = self._build_client_request( request_type=request_type, diff --git a/src/maisaka/builtin_tool/send_emoji.py b/src/maisaka/builtin_tool/send_emoji.py index b409abb8..5cd73736 100644 --- a/src/maisaka/builtin_tool/send_emoji.py +++ b/src/maisaka/builtin_tool/send_emoji.py @@ -2,11 +2,11 @@ from datetime import datetime from io import BytesIO -import math from random import sample from typing import Any, Dict, Optional import asyncio +import math from PIL import Image as PILImage from PIL import ImageDraw, ImageFont @@ -20,12 +20,14 @@ from src.common.logger import get_logger from src.config.config import global_config from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec from src.llm_models.payload_content.resp_format import RespFormat, RespFormatType +from src.llm_models.payload_content.message import MessageBuilder, RoleType from src.maisaka.context_messages import ( LLMContextMessage, ReferenceMessage, ReferenceMessageType, SessionBackedMessage, ) +from src.plugin_runtime.hook_payloads import serialize_prompt_messages from .context import BuiltinToolRuntimeContext @@ -270,6 +272,7 @@ def _build_send_emoji_prompt_preview( def _build_send_emoji_monitor_detail( *, prompt_text: str = "", + request_messages: Optional[list[dict[str, Any]]] = None, reasoning_text: str = "", output_text: str = "", metrics: Optional[Dict[str, Any]] = None, @@ -280,6 +283,8 @@ def _build_send_emoji_monitor_detail( detail: Dict[str, Any] = {} if prompt_text.strip(): detail["prompt_text"] = prompt_text.strip() + if isinstance(request_messages, list) and request_messages: + detail["request_messages"] = request_messages if reasoning_text.strip(): detail["reasoning_text"] = reasoning_text.strip() if output_text.strip(): @@ -394,6 +399,16 @@ async def _select_emoji_with_sub_agent( grid_columns=grid_columns, sampled_emojis=sampled_emojis, ) + request_messages = [ + MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build(), + ] + prompt_llm_message = prompt_message.to_llm_message() + if prompt_llm_message is not None: + request_messages.append(prompt_llm_message) + candidate_llm_message = candidate_message.to_llm_message() + if candidate_llm_message is not None: + request_messages.append(candidate_llm_message) + serialized_request_messages = serialize_prompt_messages(request_messages) selection_started_at = datetime.now() response = await tool_ctx.runtime.run_sub_agent( @@ -422,6 +437,7 @@ async def _select_emoji_with_sub_agent( if selection_metadata is not None: selection_metadata["monitor_detail"] = _build_send_emoji_monitor_detail( prompt_text=prompt_preview, + request_messages=serialized_request_messages, output_text=response.content or "", metrics=selection_metrics, extra_sections=[{ @@ -436,6 +452,7 @@ async def _select_emoji_with_sub_agent( selection_metadata["reason"] = selection.reason.strip() selection_metadata["monitor_detail"] = _build_send_emoji_monitor_detail( prompt_text=prompt_preview, + request_messages=serialized_request_messages, reasoning_text=selection.reason, output_text=response.content or "", metrics=selection_metrics, diff --git a/src/maisaka/display/prompt_cli_renderer.py b/src/maisaka/display/prompt_cli_renderer.py index 3ac84f26..eeca0a5a 100644 --- a/src/maisaka/display/prompt_cli_renderer.py +++ b/src/maisaka/display/prompt_cli_renderer.py @@ -440,6 +440,9 @@ class PromptCLIVisualizer: return ( "
" f"
图片 image/{html.escape(normalized_format)} {html.escape(size_text)}
" + f"" + f"图片预览" + "" f"
{html.escape(str(file_path))}
" f"打开图片" "
" @@ -727,6 +730,22 @@ class PromptCLIVisualizer: font-family: "Cascadia Mono", "JetBrains Mono", "Consolas", monospace; word-break: break-all; }} + .image-preview-link {{ + display: block; + margin-top: 10px; + }} + .image-preview {{ + display: block; + max-width: min(100%, 560px); + max-height: 420px; + width: auto; + height: auto; + border-radius: 12px; + border: 1px solid #dbe4f0; + background: #fff; + box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08); + object-fit: contain; + }} .image-link {{ display: inline-block; margin-top: 8px; diff --git a/src/services/generator_service.py b/src/services/generator_service.py index 616bacc8..bc5aa190 100644 --- a/src/services/generator_service.py +++ b/src/services/generator_service.py @@ -11,8 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING from rich.traceback import install from src.chat.message_receive.chat_manager import BotChatSession -from src.chat.replyer.group_generator import DefaultReplyer -from src.chat.replyer.private_generator import PrivateReplyer +from src.chat.replyer.maisaka_generator import MaisakaReplyGenerator from src.chat.replyer.replyer_manager import replyer_manager from src.chat.utils.utils import process_llm_response from src.common.data_models.message_component_data_model import MessageSequence, TextComponent @@ -38,7 +37,7 @@ def _get_replyer( chat_stream: Optional[BotChatSession] = None, chat_id: Optional[str] = None, request_type: str = "replyer", -) -> Optional[DefaultReplyer | PrivateReplyer]: +) -> Optional[MaisakaReplyGenerator]: """获取回复器对象""" if not chat_id and not chat_stream: raise ValueError("chat_stream 和 chat_id 不可均为空")