feat：统一replyer在是否多模态下的表现，提高一致性和通用性，新增模型visual参数

2026-04-11 16:41:00 +08:00
parent 3ea2bf1059
commit c0230fc313
20 changed files with 323 additions and 1802 deletions
--- a/pytests/test_maisaka_monitor_protocol.py
+++ b/pytests/test_maisaka_monitor_protocol.py
@@ -5,8 +5,7 @@ import pytest
 from rich.panel import Panel
 from rich.text import Text

-from src.chat.replyer import maisaka_generator as legacy_replyer_module
-from src.chat.replyer import maisaka_generator_multi as multimodal_replyer_module
+from src.chat.replyer import maisaka_generator as replyer_module
 from src.common.data_models.reply_generation_data_models import (
    GenerationMetrics,
    LLMCompletionResult,
@@ -37,8 +36,8 @@ class _FakeLegacyLLMServiceClient:
        del args
        del kwargs

-    async def generate_response(self, prompt: str) -> _FakeLLMResult:
-        assert prompt
+    async def generate_response_with_messages(self, *, message_factory: Callable[[object], list[Any]]) -> _FakeLLMResult:
+        assert message_factory(object())
        return _FakeLLMResult()


@@ -54,13 +53,21 @@ class _FakeMultimodalLLMServiceClient:

@pytest.mark.asyncio
 async def test_legacy_and_multimodal_replyer_monitor_detail_have_same_shape(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.setattr(legacy_replyer_module, "LLMServiceClient", _FakeLegacyLLMServiceClient)
-    monkeypatch.setattr(multimodal_replyer_module, "LLMServiceClient", _FakeMultimodalLLMServiceClient)
-    monkeypatch.setattr(legacy_replyer_module, "load_prompt", lambda *args, **kwargs: "legacy prompt")
-    monkeypatch.setattr(multimodal_replyer_module, "load_prompt", lambda *args, **kwargs: "multi prompt")
+    monkeypatch.setattr(replyer_module, "LLMServiceClient", _FakeLegacyLLMServiceClient)
+    monkeypatch.setattr(replyer_module, "load_prompt", lambda *args, **kwargs: "legacy prompt")

-    legacy_generator = legacy_replyer_module.MaisakaReplyGenerator(chat_stream=None, request_type="test_legacy")
-    multimodal_generator = multimodal_replyer_module.MaisakaReplyGenerator(chat_stream=None, request_type="test_multi")
+    legacy_generator = replyer_module.MaisakaReplyGenerator(
+        chat_stream=None,
+        request_type="test_legacy",
+        enable_visual_message=False,
+    )
+    multimodal_generator = replyer_module.MaisakaReplyGenerator(
+        chat_stream=None,
+        request_type="test_multi",
+        llm_client_cls=_FakeMultimodalLLMServiceClient,
+        load_prompt_func=lambda *args, **kwargs: "multi prompt",
+        enable_visual_message=True,
+    )

    legacy_success, legacy_result = await legacy_generator.generate_reply_with_context(
        stream_id="session-legacy",
@@ -84,6 +91,40 @@ async def test_legacy_and_multimodal_replyer_monitor_detail_have_same_shape(monk
    assert legacy_result.monitor_detail["metrics"]["total_tokens"] == 19


+def test_legacy_replyer_builds_message_sequence_like_multimodal() -> None:
+    legacy_generator = replyer_module.MaisakaReplyGenerator(
+        chat_stream=None,
+        request_type="test_legacy",
+        enable_visual_message=False,
+    )
+    legacy_prompt_loader = replyer_module.load_prompt
+    replyer_module.load_prompt = lambda *args, **kwargs: "legacy prompt"
+
+    try:
+        session_message = replyer_module.SessionBackedMessage(
+            raw_message=SimpleNamespace(),
+            visible_text="[Alice]你好\n[Bob]在吗",
+            timestamp=replyer_module.datetime.now(),
+            source_kind="user",
+        )
+        request_messages = legacy_generator._build_request_messages(
+            chat_history=[session_message],
+            reply_message=None,
+            reply_reason="测试原因",
+            stream_id="session-legacy",
+        )
+    finally:
+        replyer_module.load_prompt = legacy_prompt_loader
+
+    assert len(request_messages) == 4
+    assert request_messages[0].role.value == "system"
+    assert request_messages[1].role.value == "user"
+    assert request_messages[1].get_text_content() == "[Alice]你好"
+    assert request_messages[2].role.value == "user"
+    assert request_messages[2].get_text_content() == "[Bob]在吗"
+    assert request_messages[3].role.value == "user"
+
+
@pytest.mark.asyncio
 async def test_reply_tool_puts_monitor_detail_into_metadata(monkeypatch: pytest.MonkeyPatch) -> None:
    fake_monitor_detail = {
@@ -324,7 +365,7 @@ def test_reasoning_engine_build_tool_monitor_result_keeps_non_reply_tool_without
 def test_runtime_build_tool_detail_panels_renders_reply_monitor_detail() -> None:
    runtime = object.__new__(MaisakaHeartFlowChatting)
    runtime.session_id = "session-1"
-    panels = runtime._build_tool_detail_panels(
+    panels = runtime._build_tool_detail_cards(
        [
            {
                "tool_call_id": "call-reply-1",
@@ -348,7 +389,8 @@ def test_runtime_build_tool_detail_panels_renders_reply_monitor_detail() -> None
                    },
                },
            }
-        ]
+        ],
+        stage_title="工具调用",
    )

    assert len(panels) == 1
@@ -387,7 +429,7 @@ def test_runtime_build_tool_detail_panels_uses_prompt_access_panel(monkeypatch:
        _fake_build_text_access_panel,
    )

-    panels = runtime._build_tool_detail_panels(
+    panels = runtime._build_tool_detail_cards(
        [
            {
                "tool_call_id": "call-reply-2",
@@ -401,7 +443,8 @@ def test_runtime_build_tool_detail_panels_uses_prompt_access_panel(monkeypatch:
                    "output_text": "reply output",
                },
            }
-        ]
+        ],
+        stage_title="工具调用",
    )

    assert len(panels) == 1
@@ -425,7 +468,7 @@ def test_runtime_build_tool_detail_panels_uses_emotion_prompt_access_panel(monke
        _fake_build_text_access_panel,
    )

-    panels = runtime._build_tool_detail_panels(
+    panels = runtime._build_tool_detail_cards(
        [
            {
                "tool_call_id": "call-emoji-1",
@@ -439,7 +482,8 @@ def test_runtime_build_tool_detail_panels_uses_emotion_prompt_access_panel(monke
                    "output_text": '{"emoji_index": 1}',
                },
            }
-        ]
+        ],
+        stage_title="工具调用",
    )

    assert len(panels) == 1
@@ -448,6 +492,63 @@ def test_runtime_build_tool_detail_panels_uses_emotion_prompt_access_panel(monke
    assert captured["kwargs"]["request_kind"] == "emotion"


+def test_runtime_build_tool_detail_cards_uses_structured_prompt_messages_with_images(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    runtime = object.__new__(MaisakaHeartFlowChatting)
+    runtime.session_id = "session-image"
+    captured: dict[str, Any] = {}
+
+    def _fake_build_prompt_access_panel(messages: list[Any], **kwargs: Any) -> str:
+        captured["messages"] = messages
+        captured["kwargs"] = kwargs
+        return "IMAGE_PROMPT_LINK"
+
+    def _fake_build_text_access_panel(content: str, **kwargs: Any) -> str:
+        captured["text_content"] = content
+        captured["text_kwargs"] = kwargs
+        return "TEXT_PROMPT_LINK"
+
+    monkeypatch.setattr(
+        "src.maisaka.runtime.PromptCLIVisualizer.build_prompt_access_panel",
+        _fake_build_prompt_access_panel,
+    )
+    monkeypatch.setattr(
+        "src.maisaka.runtime.PromptCLIVisualizer.build_text_access_panel",
+        _fake_build_text_access_panel,
+    )
+
+    panels = runtime._build_tool_detail_cards(
+        [
+            {
+                "tool_call_id": "call-reply-image-1",
+                "tool_name": "reply",
+                "tool_args": {"msg_id": "m3"},
+                "success": True,
+                "duration_ms": 22.0,
+                "summary": "- reply [成功]: 已回复",
+                "detail": {
+                    "prompt_text": "reply prompt image",
+                    "request_messages": [
+                        {
+                            "role": "user",
+                            "content": ["前缀文本", ["png", "ZmFrZQ=="]],
+                        }
+                    ],
+                    "output_text": "reply output",
+                },
+            }
+        ],
+        stage_title="工具调用",
+    )
+
+    assert len(panels) == 1
+    assert "messages" in captured
+    assert "text_content" not in captured
+    assert captured["kwargs"]["chat_id"] == "session-image"
+    assert captured["kwargs"]["request_kind"] == "replyer"
+
+
 def test_runtime_render_context_usage_panel_merges_timing_and_planner(monkeypatch: pytest.MonkeyPatch) -> None:
    runtime = object.__new__(MaisakaHeartFlowChatting)
    runtime.session_id = "session-merged"
--- a/pytests/test_prompt_message_roundtrip.py
+++ b/pytests/test_prompt_message_roundtrip.py
@@ -0,0 +1,18 @@
+from src.llm_models.payload_content.message import MessageBuilder, RoleType
+from src.plugin_runtime.hook_payloads import deserialize_prompt_messages, serialize_prompt_messages
+
+
+def test_prompt_messages_roundtrip_preserves_image_parts() -> None:
+    messages = [
+        MessageBuilder().set_role(RoleType.User).add_text_content("你好").add_image_content("png", "ZmFrZQ==").build(),
+    ]
+
+    serialized_messages = serialize_prompt_messages(messages)
+    restored_messages = deserialize_prompt_messages(serialized_messages)
+
+    assert len(restored_messages) == 1
+    assert restored_messages[0].role == RoleType.User
+    assert restored_messages[0].get_text_content() == "你好"
+    assert len(restored_messages[0].parts) == 2
+    assert restored_messages[0].parts[1].image_format == "png"
+    assert restored_messages[0].parts[1].image_base64 == "ZmFrZQ=="
--- a/src/chat/replyer/group_generator.py
+++ b/src/chat/replyer/group_generator.py
--- a/src/chat/replyer/maisaka_generator.py
+++ b/src/chat/replyer/maisaka_generator.py
@@ -1,530 +1,34 @@
-from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Awaitable, Callable, Dict, List, Optional, Tuple
-
-import random
-import time
-
-from rich.panel import Panel
+from typing import Any, Callable, Optional

 from src.chat.message_receive.chat_manager import BotChatSession
-from src.chat.message_receive.message import SessionMessage
-from src.chat.utils.utils import get_chat_type_and_target_info
-from src.cli.console import console
-from src.common.data_models.reply_generation_data_models import (
-    GenerationMetrics,
-    LLMCompletionResult,
-    ReplyGenerationResult,
-    build_reply_monitor_detail,
-)
-from src.common.logger import get_logger
 from src.common.prompt_i18n import load_prompt
-from src.common.utils.utils_session import SessionUtils
 from src.config.config import global_config
-from src.core.types import ActionInfo
-from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
+from src.maisaka.context_messages import SessionBackedMessage
 from src.services.llm_service import LLMServiceClient

-from src.maisaka.context_messages import (
-    AssistantMessage,
-    LLMContextMessage,
-    ReferenceMessage,
-    SessionBackedMessage,
-    ToolResultMessage,
-)
-from src.maisaka.message_adapter import parse_speaker_content
-from src.maisaka.prompt_cli_renderer import PromptCLIVisualizer
-from src.plugin_runtime.hook_payloads import serialize_prompt_messages
-
-from .maisaka_expression_selector import maisaka_expression_selector
-
-logger = get_logger("replyer")
+from .maisaka_generator_base import BaseMaisakaReplyGenerator


-@dataclass
-class MaisakaReplyContext:
-    """Maisaka replyer 使用的回复上下文。"""
-
-    expression_habits: str = ""
-    selected_expression_ids: List[int] = field(default_factory=list)
-
-
-class MaisakaReplyGenerator:
-    """生成 Maisaka 的最终可见回复。"""
+class MaisakaReplyGenerator(BaseMaisakaReplyGenerator):
+    """Maisaka replyer。"""

    def __init__(
        self,
        chat_stream: Optional[BotChatSession] = None,
        request_type: str = "maisaka_replyer",
+        llm_client_cls: Optional[Any] = None,
+        load_prompt_func: Optional[Callable[..., str]] = None,
+        enable_visual_message: Optional[bool] = None,
    ) -> None:
-        self.chat_stream = chat_stream
-        self.request_type = request_type
-        self.express_model = LLMServiceClient(
-            task_name="replyer",
+        super().__init__(
+            chat_stream=chat_stream,
            request_type=request_type,
+            llm_client_cls=llm_client_cls or LLMServiceClient,
+            load_prompt_func=load_prompt_func or load_prompt,
+            enable_visual_message=(
+                global_config.visual.multimodal_replyer
+                if enable_visual_message is None
+                else enable_visual_message
+            ),
        )
-        self._personality_prompt = self._build_personality_prompt()
-
-    def _build_personality_prompt(self) -> str:
-        """构建 replyer 使用的人设提示。"""
-        try:
-            bot_name = global_config.bot.nickname
-            alias_names = global_config.bot.alias_names
-            bot_aliases = f"，也有人叫你{','.join(alias_names)}" if alias_names else ""
-
-            prompt_personality = global_config.personality.personality
-            if (
-                hasattr(global_config.personality, "states")
-                and global_config.personality.states
-                and hasattr(global_config.personality, "state_probability")
-                and global_config.personality.state_probability > 0
-                and random.random() < global_config.personality.state_probability
-            ):
-                prompt_personality = random.choice(global_config.personality.states)
-
-            return f"你的名字是{bot_name}{bot_aliases}，你{prompt_personality};"
-        except Exception as exc:
-            logger.warning(f"构建 Maisaka 人设提示词失败: {exc}")
-            return "你的名字是麦麦，你是一个活泼可爱的 AI 助手。"
-
-    @staticmethod
-    def _normalize_content(content: str, limit: int = 500) -> str:
-        normalized = " ".join((content or "").split())
-        if len(normalized) > limit:
-            return normalized[:limit] + "..."
-        return normalized
-
-    @staticmethod
-    def _format_message_time(message: LLMContextMessage) -> str:
-        return message.timestamp.strftime("%H:%M:%S")
-
-    @staticmethod
-    def _extract_visible_assistant_reply(message: AssistantMessage) -> str:
-        del message
-        return ""
-
-    def _extract_guided_bot_reply(self, message: SessionBackedMessage) -> str:
-        speaker_name, body = parse_speaker_content(message.processed_plain_text.strip())
-        bot_nickname = global_config.bot.nickname.strip() or "Bot"
-        if speaker_name == bot_nickname:
-            return self._normalize_content(body.strip())
-        return ""
-
-    @staticmethod
-    def _split_user_message_segments(raw_content: str) -> List[tuple[Optional[str], str]]:
-        """按说话人拆分用户消息。"""
-        segments: List[tuple[Optional[str], str]] = []
-        current_speaker: Optional[str] = None
-        current_lines: List[str] = []
-
-        for raw_line in raw_content.splitlines():
-            speaker_name, content_body = parse_speaker_content(raw_line)
-            if speaker_name is not None:
-                if current_lines:
-                    segments.append((current_speaker, "\n".join(current_lines)))
-                current_speaker = speaker_name
-                current_lines = [content_body]
-                continue
-
-            current_lines.append(raw_line)
-
-        if current_lines:
-            segments.append((current_speaker, "\n".join(current_lines)))
-
-        return segments
-
-    def _format_chat_history(self, messages: List[LLMContextMessage]) -> str:
-        """格式化 replyer 使用的可见聊天记录。"""
-        bot_nickname = global_config.bot.nickname.strip() or "Bot"
-        parts: List[str] = []
-
-        for message in messages:
-            timestamp = self._format_message_time(message)
-
-            if isinstance(message, (ReferenceMessage, ToolResultMessage)):
-                continue
-
-            if isinstance(message, SessionBackedMessage):
-                guided_reply = self._extract_guided_bot_reply(message)
-                if guided_reply:
-                    parts.append(f"{timestamp} {bot_nickname}(you): {guided_reply}")
-                    continue
-
-                raw_content = message.processed_plain_text
-                for speaker_name, content_body in self._split_user_message_segments(raw_content):
-                    content = self._normalize_content(content_body)
-                    if not content:
-                        continue
-                    visible_speaker = speaker_name or global_config.maisaka.cli_user_name.strip() or "User"
-                    parts.append(f"{timestamp} {visible_speaker}: {content}")
-                continue
-
-            if isinstance(message, AssistantMessage):
-                visible_reply = self._extract_visible_assistant_reply(message)
-                if visible_reply:
-                    parts.append(f"{timestamp} {bot_nickname}(you): {visible_reply}")
-
-        return "\n".join(parts)
-
-    def _build_target_message_block(self, reply_message: Optional[SessionMessage]) -> str:
-        """构建当前需要回复的目标消息摘要。"""
-        if reply_message is None:
-            return ""
-
-        user_info = reply_message.message_info.user_info
-        sender_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
-        target_message_id = reply_message.message_id.strip() if reply_message.message_id else "未知"
-        target_content = self._normalize_content((reply_message.processed_plain_text or "").strip(), limit=300)
-        if not target_content:
-            target_content = "[无可见文本内容]"
-
-        return (
-            "【本次回复目标】\n"
-            f"- 目标消息ID：{target_message_id}\n"
-            f"- 发送者：{sender_name}\n"
-            f"- 消息内容：{target_content}\n"
-            "- 你这次要回复的就是这条目标消息，请结合整段上下文理解，但不要误把其他历史消息当成当前回复对象。"
-        )
-
-    @staticmethod
-    def _get_chat_prompt_for_chat(chat_id: str, is_group_chat: Optional[bool]) -> str:
-        """根据聊天流 ID 获取匹配的额外 prompt。"""
-        if not global_config.chat.chat_prompts:
-            return ""
-
-        for chat_prompt_item in global_config.chat.chat_prompts:
-            if hasattr(chat_prompt_item, "platform"):
-                platform = str(chat_prompt_item.platform or "").strip()
-                item_id = str(chat_prompt_item.item_id or "").strip()
-                rule_type = str(chat_prompt_item.rule_type or "").strip()
-                prompt_content = str(chat_prompt_item.prompt or "").strip()
-            elif isinstance(chat_prompt_item, str):
-                parts = chat_prompt_item.split(":", 3)
-                if len(parts) != 4:
-                    continue
-
-                platform, item_id, rule_type, prompt_content = parts
-                platform = platform.strip()
-                item_id = item_id.strip()
-                rule_type = rule_type.strip()
-                prompt_content = prompt_content.strip()
-            else:
-                continue
-
-            if not platform or not item_id or not prompt_content:
-                continue
-
-            if rule_type == "group":
-                config_is_group = True
-                config_chat_id = SessionUtils.calculate_session_id(platform, group_id=item_id)
-            elif rule_type == "private":
-                config_is_group = False
-                config_chat_id = SessionUtils.calculate_session_id(platform, user_id=item_id)
-            else:
-                continue
-
-            if config_is_group != is_group_chat:
-                continue
-            if config_chat_id == chat_id:
-                return prompt_content
-
-        return ""
-
-    def _build_group_chat_attention_block(self, session_id: str) -> str:
-        """构建当前聊天场景下的额外注意事项块。"""
-        if not session_id:
-            return ""
-
-        try:
-            is_group_chat, _ = get_chat_type_and_target_info(session_id)
-        except Exception:
-            is_group_chat = None
-
-        prompt_lines: List[str] = []
-
-        if is_group_chat is True:
-            if group_chat_prompt := global_config.chat.group_chat_prompt.strip():
-                prompt_lines.append(f"通用注意事项：\n{group_chat_prompt}")
-        elif is_group_chat is False:
-            if private_chat_prompt := global_config.chat.private_chat_prompts.strip():
-                prompt_lines.append(f"通用注意事项：\n{private_chat_prompt}")
-
-        if chat_prompt := self._get_chat_prompt_for_chat(session_id, is_group_chat).strip():
-            prompt_lines.append(f"当前聊天额外注意事项：\n{chat_prompt}")
-
-        if not prompt_lines:
-            return ""
-
-        return "在该聊天中的注意事项：\n" + "\n\n".join(prompt_lines) + "\n"
-
-    def _build_request_messages(
-        self,
-        chat_history: List[LLMContextMessage],
-        reply_message: Optional[SessionMessage],
-        reply_reason: str,
-        expression_habits: str = "",
-        stream_id: Optional[str] = None,
-    ) -> List[Message]:
-        """构建 Maisaka replyer 请求消息列表。"""
-        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        formatted_history = self._format_chat_history(chat_history)
-        target_message_block = self._build_target_message_block(reply_message)
-        session_id = self._resolve_session_id(stream_id)
-
-        try:
-            system_prompt = load_prompt(
-                "maisaka_replyer",
-                bot_name=global_config.bot.nickname,
-                group_chat_attention_block=self._build_group_chat_attention_block(session_id),
-                time_block=f"当前时间：{current_time}",
-                identity=self._personality_prompt,
-                reply_style=global_config.personality.reply_style,
-            )
-        except Exception:
-            system_prompt = "你是一个友好的 AI 助手，请根据聊天记录自然回复。"
-
-        extra_sections: List[str] = []
-        if expression_habits.strip():
-            extra_sections.append(expression_habits.strip())
-
-        user_sections = [
-            f"当前时间：{current_time}",
-            f"【聊天记录】\n{formatted_history}",
-        ]
-        if target_message_block:
-            user_sections.append(target_message_block)
-        if extra_sections:
-            user_sections.append("\n\n".join(extra_sections))
-        user_sections.append(f"【回复信息参考】\n{reply_reason}")
-        user_sections.append("现在，你说：")
-
-        user_prompt = "\n\n".join(user_sections)
-        return [
-            MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build(),
-            MessageBuilder().set_role(RoleType.User).add_text_content(user_prompt).build(),
-        ]
-
-    def _resolve_session_id(self, stream_id: Optional[str]) -> str:
-        """解析当前回复使用的会话 ID。"""
-        if stream_id:
-            return stream_id
-        if self.chat_stream is not None:
-            return self.chat_stream.session_id
-        return ""
-
-    async def _build_reply_context(
-        self,
-        chat_history: List[LLMContextMessage],
-        reply_message: Optional[SessionMessage],
-        reply_reason: str,
-        stream_id: Optional[str],
-        sub_agent_runner: Optional[Callable[[str], Awaitable[str]]],
-    ) -> MaisakaReplyContext:
-        """构建回复上下文：表达习惯和已选表达 ID。"""
-        session_id = self._resolve_session_id(stream_id)
-        if not session_id:
-            logger.warning("构建 Maisaka 回复上下文失败：缺少会话标识")
-            return MaisakaReplyContext()
-
-        if sub_agent_runner is None:
-            logger.info("表达方式选择跳过：缺少子代理执行器")
-            return MaisakaReplyContext()
-
-        selection_result = await maisaka_expression_selector.select_for_reply(
-            session_id=session_id,
-            chat_history=chat_history,
-            reply_message=reply_message,
-            reply_reason=reply_reason,
-            sub_agent_runner=sub_agent_runner,
-        )
-        return MaisakaReplyContext(
-            expression_habits=selection_result.expression_habits,
-            selected_expression_ids=selection_result.selected_expression_ids,
-        )
-
-    async def generate_reply_with_context(
-        self,
-        extra_info: str = "",
-        reply_reason: str = "",
-        available_actions: Optional[Dict[str, ActionInfo]] = None,
-        chosen_actions: Optional[List[object]] = None,
-        from_plugin: bool = True,
-        stream_id: Optional[str] = None,
-        reply_message: Optional[SessionMessage] = None,
-        reply_time_point: Optional[float] = None,
-        think_level: int = 1,
-        unknown_words: Optional[List[str]] = None,
-        log_reply: bool = True,
-        chat_history: Optional[List[LLMContextMessage]] = None,
-        expression_habits: str = "",
-        selected_expression_ids: Optional[List[int]] = None,
-        sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None,
-    ) -> Tuple[bool, ReplyGenerationResult]:
-        """结合上下文生成 Maisaka 的最终可见回复。"""
-
-        def finalize(success_value: bool) -> Tuple[bool, ReplyGenerationResult]:
-            result.monitor_detail = build_reply_monitor_detail(result)
-            return success_value, result
-
-        del available_actions
-        del chosen_actions
-        del extra_info
-        del from_plugin
-        del log_reply
-        del reply_time_point
-        del think_level
-        del unknown_words
-
-        result = ReplyGenerationResult()
-        overall_started_at = time.perf_counter()
-        if chat_history is None:
-            result.error_message = "聊天历史为空"
-            return finalize(False)
-
-        logger.info(
-            f"Maisaka 回复器开始生成: 会话流标识={stream_id} 回复原因={reply_reason!r} "
-            f"历史消息数={len(chat_history)} 目标消息编号={reply_message.message_id if reply_message else None}"
-        )
-
-        filtered_history = [
-            message
-            for message in chat_history
-            if not isinstance(message, (ReferenceMessage, ToolResultMessage))
-        ]
-        logger.debug(f"Maisaka 回复器过滤后历史消息数={len(filtered_history)}")
-
-        if self.express_model is None:
-            logger.error("Maisaka 回复器的回复模型未初始化")
-            result.error_message = "回复模型尚未初始化"
-            return finalize(False)
-
-        try:
-            reply_context = await self._build_reply_context(
-                chat_history=filtered_history,
-                reply_message=reply_message,
-                reply_reason=reply_reason or "",
-                stream_id=stream_id,
-                sub_agent_runner=sub_agent_runner,
-            )
-        except Exception as exc:
-            import traceback
-
-            logger.error(f"Maisaka 回复器构建回复上下文失败: {exc}\n{traceback.format_exc()}")
-            result.error_message = f"构建回复上下文失败: {exc}"
-            result.metrics = GenerationMetrics(
-                overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
-            )
-            return finalize(False)
-
-        merged_expression_habits = expression_habits.strip() or reply_context.expression_habits
-        result.selected_expression_ids = (
-            list(selected_expression_ids)
-            if selected_expression_ids is not None
-            else list(reply_context.selected_expression_ids)
-        )
-
-        logger.info(
-            f"Maisaka 回复上下文构建完成: 会话流标识={stream_id} "
-            f"已选表达编号={result.selected_expression_ids!r}"
-        )
-
-        prompt_started_at = time.perf_counter()
-        try:
-            request_messages = self._build_request_messages(
-                chat_history=filtered_history,
-                reply_message=reply_message,
-                reply_reason=reply_reason or "",
-                expression_habits=merged_expression_habits,
-                stream_id=stream_id,
-            )
-        except Exception as exc:
-            import traceback
-
-            logger.error(f"Maisaka 回复器构建提示词失败: {exc}\n{traceback.format_exc()}")
-            result.error_message = f"构建提示词失败: {exc}"
-            result.metrics = GenerationMetrics(
-                overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
-            )
-            return finalize(False)
-
-        prompt_ms = round((time.perf_counter() - prompt_started_at) * 1000, 2)
-        request_prompt = PromptCLIVisualizer._build_prompt_dump_text(request_messages)
-        result.completion.request_prompt = request_prompt
-        result.request_messages = serialize_prompt_messages(request_messages)
-        show_replyer_prompt = bool(getattr(global_config.debug, "show_replyer_prompt", False))
-        show_replyer_reasoning = bool(getattr(global_config.debug, "show_replyer_reasoning", False))
-        preview_chat_id = self._resolve_session_id(stream_id) or "unknown"
-
-        if show_replyer_prompt:
-            console.print(
-                Panel(
-                    PromptCLIVisualizer.build_prompt_access_panel(
-                        request_messages,
-                        category="replyer",
-                        chat_id=preview_chat_id,
-                        request_kind="replyer",
-                        selection_reason=f"ID: {preview_chat_id}",
-                        image_display_mode="path_link" if global_config.maisaka.show_image_path else "legacy",
-                    ),
-                    title="Maisaka Replyer Prompt",
-                    border_style="bright_yellow",
-                    padding=(0, 1),
-                )
-            )
-
-        def message_factory(_client: object) -> List[Message]:
-            return request_messages
-
-        llm_started_at = time.perf_counter()
-        try:
-            generation_result = await self.express_model.generate_response_with_messages(
-                message_factory=message_factory
-            )
-        except Exception as exc:
-            logger.exception("Maisaka 回复器调用失败")
-            result.error_message = str(exc)
-            result.metrics = GenerationMetrics(
-                prompt_ms=prompt_ms,
-                llm_ms=round((time.perf_counter() - llm_started_at) * 1000, 2),
-                overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
-            )
-            return finalize(False)
-
-        llm_ms = round((time.perf_counter() - llm_started_at) * 1000, 2)
-        response_text = (generation_result.response or "").strip()
-        result.success = bool(response_text)
-        result.completion = LLMCompletionResult(
-            request_prompt=request_prompt,
-            response_text=response_text,
-            reasoning_text=generation_result.reasoning or "",
-            model_name=generation_result.model_name or "",
-            tool_calls=generation_result.tool_calls or [],
-            prompt_tokens=generation_result.prompt_tokens,
-            completion_tokens=generation_result.completion_tokens,
-            total_tokens=generation_result.total_tokens,
-        )
-        result.metrics = GenerationMetrics(
-            prompt_ms=prompt_ms,
-            llm_ms=llm_ms,
-            overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
-            stage_logs=[
-                f"prompt: {prompt_ms} ms",
-                f"llm: {llm_ms} ms",
-            ],
-        )
-
-        if show_replyer_reasoning and result.completion.reasoning_text:
-            logger.info(f"Maisaka 回复器思考内容:\n{result.completion.reasoning_text}")
-
-        if not result.success:
-            result.error_message = "回复器返回了空内容"
-            logger.warning("Maisaka 回复器返回了空内容")
-            return finalize(False)
-
-        logger.info(
-            f"Maisaka 回复器生成成功: 回复文本={response_text!r} "
-            f"总耗时毫秒={result.metrics.overall_ms} "
-            f"已选表达编号={result.selected_expression_ids!r}"
-        )
-        result.text_fragments = [response_text]
-        return finalize(True)
--- a/src/chat/replyer/maisaka_generator_multi.py
+++ b/src/chat/replyer/maisaka_generator_multi.py
@@ -1,8 +1,9 @@
-import random
 import time
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Awaitable, Callable, Dict, List, Optional, Tuple
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
+
+import random

 from rich.console import Group, RenderableType
 from rich.panel import Panel
@@ -20,13 +21,10 @@ from src.common.data_models.reply_generation_data_models import (
    build_reply_monitor_detail,
 )
 from src.common.logger import get_logger
-from src.common.prompt_i18n import load_prompt
 from src.common.utils.utils_session import SessionUtils
 from src.config.config import global_config
 from src.core.types import ActionInfo
 from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
-from src.services.llm_service import LLMServiceClient
-
 from src.maisaka.context_messages import (
    AssistantMessage,
    LLMContextMessage,
@@ -34,8 +32,8 @@ from src.maisaka.context_messages import (
    SessionBackedMessage,
    ToolResultMessage,
 )
+from src.maisaka.display.prompt_cli_renderer import PromptCLIVisualizer
 from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
-from src.maisaka.prompt_cli_renderer import PromptCLIVisualizer
 from src.plugin_runtime.hook_payloads import serialize_prompt_messages

 from .maisaka_expression_selector import maisaka_expression_selector
@@ -51,17 +49,24 @@ class MaisakaReplyContext:
    selected_expression_ids: List[int] = field(default_factory=list)


-class MaisakaReplyGenerator:
-    """生成 Maisaka 的最终可见回复（多模态管线）。"""
+class BaseMaisakaReplyGenerator:
+    """Maisaka replyer 的共享实现。"""

    def __init__(
        self,
+        *,
        chat_stream: Optional[BotChatSession] = None,
        request_type: str = "maisaka_replyer",
+        llm_client_cls: Any,
+        load_prompt_func: Callable[..., str],
+        enable_visual_message: bool,
    ) -> None:
        self.chat_stream = chat_stream
        self.request_type = request_type
-        self.express_model = LLMServiceClient(
+        self._llm_client_cls = llm_client_cls
+        self._load_prompt = load_prompt_func
+        self._enable_visual_message = enable_visual_message
+        self.express_model = llm_client_cls(
            task_name="replyer",
            request_type=request_type,
        )
@@ -232,7 +237,7 @@ class MaisakaReplyGenerator:
        session_id = self._resolve_session_id(stream_id)

        try:
-            system_prompt = load_prompt(
+            system_prompt = self._load_prompt(
                "maisaka_replyer",
                bot_name=global_config.bot.nickname,
                group_chat_attention_block=self._build_group_chat_attention_block(session_id),
@@ -255,17 +260,20 @@ class MaisakaReplyGenerator:
        return f"{system_prompt}\n\n" + "\n\n".join(sections)

    def _build_reply_instruction(self) -> str:
-        return "请自然地回复。不要输出多余说明、括号、at 或额外标记，只输出实际要发送的内容。"
+        return "请自然地回复。不要输出多余说明、括号、@ 或额外标记，只输出实际要发送的内容。"

-    def _build_multimodal_user_message(
+    def _build_visual_user_message(
        self,
        message: SessionBackedMessage,
    ) -> Optional[Message]:
+        if not self._enable_visual_message:
+            return None
+
        raw_message = clone_message_sequence(message.raw_message)
        if not raw_message.components:
            raw_message = MessageSequence([TextComponent(message.processed_plain_text)])

-        multimodal_message = SessionBackedMessage(
+        visual_message = SessionBackedMessage(
            raw_message=raw_message,
            visible_text=message.processed_plain_text,
            timestamp=message.timestamp,
@@ -273,7 +281,7 @@ class MaisakaReplyGenerator:
            original_message=message.original_message,
            source_kind=message.source_kind,
        )
-        return multimodal_message.to_llm_message()
+        return visual_message.to_llm_message()

    def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
        bot_nickname = global_config.bot.nickname.strip() or "Bot"
@@ -292,9 +300,9 @@ class MaisakaReplyGenerator:
                    )
                    continue

-                multimodal_message = self._build_multimodal_user_message(message)
-                if multimodal_message is not None:
-                    messages.append(multimodal_message)
+                visual_message = self._build_visual_user_message(message)
+                if visual_message is not None:
+                    messages.append(visual_message)
                    continue

                for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text):
@@ -398,7 +406,6 @@ class MaisakaReplyGenerator:
        selected_expression_ids: Optional[List[int]] = None,
        sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None,
    ) -> Tuple[bool, ReplyGenerationResult]:
-
        def finalize(success_value: bool) -> Tuple[bool, ReplyGenerationResult]:
            result.monitor_detail = build_reply_monitor_detail(result)
            return success_value, result
@@ -460,7 +467,7 @@ class MaisakaReplyGenerator:
        )

        logger.info(
-            f"回复上下文完成: 流={stream_id} 已选表达={result.selected_expression_ids!r}"
+            f"回复上下文完成 流={stream_id} 已选表达={result.selected_expression_ids!r}"
        )

        prompt_started_at = time.perf_counter()
@@ -556,7 +563,7 @@ class MaisakaReplyGenerator:
            return finalize(False)

        logger.info(
-            f"Maisaka 回复器生成成功: 文本={response_text!r} "
+            f"Maisaka 回复器生成成功 文本={response_text!r} "
            f"总耗时ms={result.metrics.overall_ms} 已选表达={result.selected_expression_ids!r}"
        )
        if show_replyer_prompt or show_replyer_reasoning:
--- a/src/chat/replyer/maisaka_replyer_factory.py
+++ b/src/chat/replyer/maisaka_replyer_factory.py
@@ -1,21 +0,0 @@
-from typing import Type
-
-from src.config.config import global_config
-
-
-def get_maisaka_replyer_class() -> Type[object]:
-    """根据配置返回 Maisaka replyer 类。"""
-    generator_type = get_maisaka_replyer_generator_type()
-    if generator_type == "multimodal":
-        from .maisaka_generator_multi import MaisakaReplyGenerator
-
-        return MaisakaReplyGenerator
-
-    from .maisaka_generator import MaisakaReplyGenerator
-
-    return MaisakaReplyGenerator
-
-
-def get_maisaka_replyer_generator_type() -> str:
-    """返回当前配置的 Maisaka replyer 生成器类型。"""
-    return "multimodal" if global_config.visual.multimodal_replyer else "legacy"
--- a/src/chat/replyer/replyer_manager.py
+++ b/src/chat/replyer/replyer_manager.py
@@ -1,12 +1,11 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional

 from src.chat.message_receive.chat_manager import BotChatSession, chat_manager as _chat_manager
-from src.chat.replyer.maisaka_replyer_factory import (
-    get_maisaka_replyer_class,
-    get_maisaka_replyer_generator_type,
-)
+from src.config.config import global_config
 from src.common.logger import get_logger

+from .maisaka_generator import MaisakaReplyGenerator
+
 if TYPE_CHECKING:
    from src.chat.replyer.group_generator import DefaultReplyer
    from src.chat.replyer.private_generator import PrivateReplyer
@@ -20,6 +19,11 @@ class ReplyerManager:
    def __init__(self) -> None:
        self._repliers: Dict[str, Any] = {}

+    @staticmethod
+    def _get_maisaka_generator_type() -> str:
+        """返回当前配置下 Maisaka replyer 的消息模式。"""
+        return "multimodal" if global_config.visual.multimodal_replyer else "legacy"
+
    def get_replyer(
        self,
        chat_stream: Optional[BotChatSession] = None,
@@ -33,7 +37,7 @@ class ReplyerManager:
            logger.warning("[ReplyerManager] 缺少 stream_id，无法获取 replyer")
            return None

-        generator_type = get_maisaka_replyer_generator_type() if replyer_type == "maisaka" else ""
+        generator_type = self._get_maisaka_generator_type() if replyer_type == "maisaka" else ""
        cache_key = f"{replyer_type}:{generator_type}:{stream_id}"
        if cache_key in self._repliers:
            logger.info(f"[ReplyerManager] 命中缓存 replyer: cache_key={cache_key}")
@@ -50,13 +54,14 @@ class ReplyerManager:
        )

        try:
-            maisaka_replyer_class = get_maisaka_replyer_class()
-
-            replyer = maisaka_replyer_class(
-                chat_stream=target_stream,
-                request_type=request_type,
-            )
-
+            if replyer_type == "maisaka":
+                replyer = MaisakaReplyGenerator(
+                    chat_stream=target_stream,
+                    request_type=request_type,
+                )
+            else:
+                logger.warning(f"[ReplyerManager] 不支持的 replyer_type={replyer_type}")
+                return None
        except Exception:
            logger.exception(f"[ReplyerManager] 创建 replyer 失败: cache_key={cache_key}")
            raise
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -55,7 +55,7 @@ BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
 MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
 MMC_VERSION: str = "1.0.0"
 CONFIG_VERSION: str = "8.5.5"
-MODEL_CONFIG_VERSION: str = "1.13.1"
+MODEL_CONFIG_VERSION: str = "1.14.0"

 logger = get_logger("config")

--- a/src/config/model_configs.py
+++ b/src/config/model_configs.py
@@ -307,6 +307,15 @@ class ModelInfo(ConfigBase):
    )
    """强制流式输出模式 (若模型不支持非流式输出, 请设置为true启用强制流式输出, 默认值为false)"""

+    visual: bool = Field(
+        default=False,
+        json_schema_extra={
+            "x-widget": "switch",
+            "x-icon": "image",
+        },
+    )
+    """是否为多模态模型。开启后表示该模型支持视觉输入。"""
+
    extra_params: dict[str, Any] = Field(
        default_factory=dict,
        json_schema_extra={
--- a/src/llm_models/request_snapshot.py
+++ b/src/llm_models/request_snapshot.py
@@ -228,6 +228,7 @@ def serialize_model_info_snapshot(model_info: ModelInfo) -> dict[str, Any]:
        "model_identifier": model_info.model_identifier,
        "name": model_info.name,
        "temperature": model_info.temperature,
+        "visual": model_info.visual,
    }


@@ -244,6 +245,7 @@ def deserialize_model_info_snapshot(raw_model_info: Any) -> ModelInfo:
        model_identifier=str(raw_model_info.get("model_identifier") or ""),
        name=str(raw_model_info.get("name") or ""),
        temperature=raw_model_info.get("temperature"),
+        visual=bool(raw_model_info.get("visual", False)),
    )


--- a/src/main.py
+++ b/src/main.py
@@ -18,7 +18,7 @@ from src.common.message_server.server import Server, get_global_server
 from src.common.remote import TelemetryHeartBeatTask
 from src.config.config import config_manager, global_config
 from src.manager.async_task_manager import async_task_manager
-from src.maisaka.stage_status_board import disable_stage_status_board, enable_stage_status_board
+from src.maisaka.display.stage_status_board import disable_stage_status_board, enable_stage_status_board
 from src.plugin_runtime.integration import get_plugin_runtime_manager
 from src.prompt.prompt_manager import prompt_manager
 from src.services.memory_flow_service import memory_automation_service
--- a/src/maisaka/chat_loop_service.py
+++ b/src/maisaka/chat_loop_service.py
@@ -32,7 +32,7 @@ from src.services.llm_service import LLMServiceClient
 from .builtin_tool import get_builtin_tools
 from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
 from .history_utils import drop_orphan_tool_results
-from .prompt_cli_renderer import PromptCLIVisualizer
+from .display.prompt_cli_renderer import PromptCLIVisualizer

 TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"}

--- a/src/maisaka/display/init.py
+++ b/src/maisaka/display/init.py
@@ -0,0 +1,33 @@
+"""Maisaka 展示模块。"""
+
+from .display_utils import (
+    build_tool_call_summary_lines,
+    format_token_count,
+    format_tool_call_for_display,
+    get_request_panel_style,
+    get_role_badge_label,
+    get_role_badge_style,
+)
+from .prompt_cli_renderer import PromptCLIVisualizer
+from .prompt_preview_logger import PromptPreviewLogger
+from .stage_status_board import (
+    disable_stage_status_board,
+    enable_stage_status_board,
+    remove_stage_status,
+    update_stage_status,
+)
+
+__all__ = [
+    "PromptCLIVisualizer",
+    "PromptPreviewLogger",
+    "build_tool_call_summary_lines",
+    "disable_stage_status_board",
+    "enable_stage_status_board",
+    "format_token_count",
+    "format_tool_call_for_display",
+    "get_request_panel_style",
+    "get_role_badge_label",
+    "get_role_badge_style",
+    "remove_stage_status",
+    "update_stage_status",
+]
--- a/src/maisaka/display/display_utils.py
+++ b/src/maisaka/display/display_utils.py
@@ -4,15 +4,15 @@ from typing import Any


 _REQUEST_PANEL_STYLE_MAP: dict[str, tuple[str, str]] = {
-    "planner": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - \u5bf9\u8bdd\u5355\u6b65", "green"),
-    "timing_gate": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - Timing Gate \u5b50\u4ee3\u7406", "bright_magenta"),
-    "replyer": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u56de\u590d\u5668 Prompt", "bright_yellow"),
+    "planner": ("MaiSaka 大模型请求 - 对话单步", "green"),
+    "timing_gate": ("MaiSaka 大模型请求 - Timing Gate 子代理", "bright_magenta"),
+    "replyer": ("MaiSaka 回复器 Prompt", "bright_yellow"),
    "emotion": ("MaiSaka Emotion Tool Prompt", "bright_cyan"),
-    "sub_agent": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - \u5b50\u4ee3\u7406", "bright_blue"),
+    "sub_agent": ("MaiSaka 大模型请求 - 子代理", "bright_blue"),
 }

 _DEFAULT_REQUEST_PANEL_STYLE: tuple[str, str] = (
-    "\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - \u5bf9\u8bdd\u5355\u6b65",
+    "MaiSaka 大模型请求 - 对话单步",
    "cyan",
 )

@@ -24,10 +24,10 @@ _ROLE_BADGE_STYLE_MAP: dict[str, str] = {
 }

 _ROLE_BADGE_LABEL_MAP: dict[str, str] = {
-    "system": "\u7cfb\u7edf",
-    "user": "\u7528\u6237",
-    "assistant": "\u52a9\u624b",
-    "tool": "\u5de5\u5177",
+    "system": "系统",
+    "user": "用户",
+    "assistant": "助手",
+    "tool": "工具",
 }


@@ -55,7 +55,7 @@ def get_role_badge_style(role: str) -> str:
 def get_role_badge_label(role: str) -> str:
    """返回角色标签对应的展示文案。"""

-    return _ROLE_BADGE_LABEL_MAP.get(role, "\u672a\u77e5")
+    return _ROLE_BADGE_LABEL_MAP.get(role, "未知")


 def format_tool_call_for_display(tool_call: Any) -> dict[str, Any]:
--- a/src/maisaka/display/prompt_cli_renderer.py
+++ b/src/maisaka/display/prompt_cli_renderer.py
@@ -181,6 +181,16 @@ class PromptCLIVisualizer:
            padding=(0, 1),
        )

+    @staticmethod
+    def _extract_image_pair(item: Any) -> tuple[str, str] | None:
+        """兼容图片片段被序列化为 tuple 或 list 的两种形式。"""
+
+        if isinstance(item, (tuple, list)) and len(item) == 2:
+            image_format, image_base64 = item
+            if isinstance(image_format, str) and isinstance(image_base64, str):
+                return image_format, image_base64
+        return None
+
    @classmethod
    def _render_message_content(cls, content: Any, settings: PromptImageDisplaySettings) -> RenderableType:
        if isinstance(content, str):
@@ -192,11 +202,11 @@ class PromptCLIVisualizer:
                if isinstance(item, str):
                    parts.append(Text(item))
                    continue
-                if isinstance(item, tuple) and len(item) == 2:
-                    image_format, image_base64 = item
-                    if isinstance(image_format, str) and isinstance(image_base64, str):
-                        parts.append(cls._render_image_item(image_format, image_base64, settings))
-                        continue
+                image_pair = cls._extract_image_pair(item)
+                if image_pair is not None:
+                    image_format, image_base64 = image_pair
+                    parts.append(cls._render_image_item(image_format, image_base64, settings))
+                    continue
                if isinstance(item, dict) and item.get("type") == "text" and isinstance(item.get("text"), str):
                    parts.append(Text(item["text"]))
                else:
@@ -218,8 +228,9 @@ class PromptCLIVisualizer:
                if isinstance(item, str):
                    parts.append(item)
                    continue
-                if isinstance(item, tuple) and len(item) == 2:
-                    image_format, image_base64 = item
+                image_pair = cls._extract_image_pair(item)
+                if image_pair is not None:
+                    image_format, image_base64 = image_pair
                    approx_size = max(0, len(str(image_base64)) * 3 // 4)
                    parts.append(f"[图片 image/{image_format} {approx_size} B]")
                    continue
@@ -395,8 +406,9 @@ class PromptCLIVisualizer:
                if isinstance(item, str):
                    parts.append(f"<pre>{html.escape(item)}</pre>")
                    continue
-                if isinstance(item, tuple) and len(item) == 2:
-                    image_format, image_base64 = item
+                image_pair = cls._extract_image_pair(item)
+                if image_pair is not None:
+                    image_format, image_base64 = image_pair
                    image_html = cls._render_image_item_html(str(image_format), str(image_base64))
                    parts.append(image_html)
                    continue
--- a/src/maisaka/display/prompt_preview_logger.py
+++ b/src/maisaka/display/prompt_preview_logger.py
@@ -8,6 +8,7 @@ from pathlib import Path
 from typing import Dict
 from uuid import uuid4

+
 class PromptPreviewLogger:
    """负责保存 Maisaka Prompt 预览文件并控制目录容量。"""

--- a/src/maisaka/display/stage_status_board.py
+++ b/src/maisaka/display/stage_status_board.py
--- a/src/maisaka/display/stage_status_viewer.py
+++ b/src/maisaka/display/stage_status_viewer.py
@@ -8,8 +8,8 @@ from typing import Any
 import json
 import os
 import sys
-import traceback
 import time
+import traceback


 def _clear_screen() -> None:
--- a/src/maisaka/runtime.py
+++ b/src/maisaka/runtime.py
@@ -34,10 +34,10 @@ from src.plugin_runtime.hook_payloads import deserialize_prompt_messages

 from .chat_loop_service import ChatResponse, MaisakaChatLoopService
 from .context_messages import LLMContextMessage
-from .display_utils import build_tool_call_summary_lines, format_token_count
-from .prompt_cli_renderer import PromptCLIVisualizer
+from .display.display_utils import build_tool_call_summary_lines, format_token_count
+from .display.prompt_cli_renderer import PromptCLIVisualizer
+from .display.stage_status_board import remove_stage_status, update_stage_status
 from .reasoning_engine import MaisakaReasoningEngine
-from .stage_status_board import remove_stage_status, update_stage_status
 from .tool_provider import MaisakaBuiltinToolProvider

 logger = get_logger("maisaka_runtime")
--- a/src/services/llm_service.py
+++ b/src/services/llm_service.py
@@ -267,6 +267,46 @@ def _parse_data_url_image(image_url: str) -> Tuple[str, str]:
    return image_format, image_base64


+def _append_image_content(message_builder: MessageBuilder, content_item: Any) -> bool:
+    """向消息构建器追加图片片段。
+
+    兼容两种输入格式：
+    1. 旧序列化格式中的 `(image_format, image_base64)` 元组。
+    2. 标准字典片段中的 Data URL 或 `image_format`/`image_base64` 字段。
+    """
+
+    if isinstance(content_item, (tuple, list)) and len(content_item) == 2:
+        image_format, image_base64 = content_item
+        if not isinstance(image_format, str) or not isinstance(image_base64, str):
+            raise ValueError("图片元组片段必须包含字符串类型的 image_format 和 image_base64")
+
+        message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
+        return True
+
+    if not isinstance(content_item, dict):
+        return False
+
+    part_type = str(content_item.get("type", "text")).strip().lower()
+    if part_type not in {"image", "image_url", "input_image"}:
+        return False
+
+    image_url = content_item.get("image_url")
+    if isinstance(image_url, dict):
+        image_url = image_url.get("url")
+    if isinstance(image_url, str):
+        image_format, image_base64 = _parse_data_url_image(image_url)
+        message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
+        return True
+
+    image_format = content_item.get("image_format")
+    image_base64 = content_item.get("image_base64")
+    if isinstance(image_format, str) and isinstance(image_base64, str):
+        message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
+        return True
+
+    raise ValueError("图片片段缺少可识别的图片数据")
+
+
 def _append_content_parts(message_builder: MessageBuilder, content: Any) -> None:
    """将原始消息内容追加到内部消息构建器。

@@ -293,8 +333,10 @@ def _append_content_parts(message_builder: MessageBuilder, content: Any) -> None
        if isinstance(content_item, str):
            message_builder.add_text_content(content_item)
            continue
+        if _append_image_content(message_builder, content_item):
+            continue
        if not isinstance(content_item, dict):
-            raise ValueError("消息内容列表中仅支持字符串或字典片段")
+            raise ValueError("消息内容列表中仅支持字符串、图片元组或字典片段")

        part_type = str(content_item.get("type", "text")).strip().lower()
        if part_type == "text":
@@ -304,22 +346,6 @@ def _append_content_parts(message_builder: MessageBuilder, content: Any) -> None
            message_builder.add_text_content(text_content)
            continue

-        if part_type in {"image", "image_url", "input_image"}:
-            image_url = content_item.get("image_url")
-            if isinstance(image_url, dict):
-                image_url = image_url.get("url")
-            if isinstance(image_url, str):
-                image_format, image_base64 = _parse_data_url_image(image_url)
-                message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
-                continue
-
-            image_format = content_item.get("image_format")
-            image_base64 = content_item.get("image_base64")
-            if isinstance(image_format, str) and isinstance(image_base64, str):
-                message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
-                continue
-            raise ValueError("图片片段缺少可识别的图片数据")
-
        raise ValueError(f"不支持的消息片段类型: {part_type}")