From 6297c500119bd14719a6651e77d7ac0edc6777a0 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Wed, 15 Apr 2026 11:46:22 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E4=BF=AE=E5=A4=8D=E9=9D=9E?= =?UTF-8?q?=E5=A4=9A=E6=A8=A1=E6=80=81=E6=A8=A1=E5=9E=8B=E6=84=8F=E5=A4=96?= =?UTF-8?q?=E4=BC=A0=E5=85=A5=E5=9B=BE=E7=89=87=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/replyer/maisaka_generator_base.py | 36 ++---------- src/maisaka/chat_loop_service.py | 38 ++++++++++++- src/maisaka/context_messages.py | 64 ++++++++++++++++++---- src/maisaka/reasoning_engine.py | 42 +------------- src/maisaka/visual_mode_utils.py | 43 +++++++++++++++ 5 files changed, 139 insertions(+), 84 deletions(-) create mode 100644 src/maisaka/visual_mode_utils.py diff --git a/src/chat/replyer/maisaka_generator_base.py b/src/chat/replyer/maisaka_generator_base.py index 18e90fd4..812b82d5 100644 --- a/src/chat/replyer/maisaka_generator_base.py +++ b/src/chat/replyer/maisaka_generator_base.py @@ -13,7 +13,6 @@ from src.chat.message_receive.chat_manager import BotChatSession from src.chat.message_receive.message import SessionMessage from src.chat.utils.utils import get_chat_type_and_target_info from src.cli.console import console -from src.common.data_models.message_component_data_model import MessageSequence, TextComponent from src.common.data_models.reply_generation_data_models import ( GenerationMetrics, LLMCompletionResult, @@ -32,9 +31,10 @@ from src.maisaka.context_messages import ( ReferenceMessage, SessionBackedMessage, ToolResultMessage, + build_llm_message_from_context, ) from src.maisaka.display.prompt_cli_renderer import PromptCLIVisualizer -from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content +from src.maisaka.message_adapter import parse_speaker_content from src.plugin_runtime.hook_payloads import serialize_prompt_messages from .maisaka_expression_selector import maisaka_expression_selector @@ -253,28 +253,6 @@ class BaseMaisakaReplyGenerator: def _build_reply_instruction(self) -> str: return "请自然地回复。不要输出多余说明、括号、@ 或额外标记,只输出实际要发送的内容。" - def _build_visual_user_message( - self, - message: SessionBackedMessage, - enable_visual_message: bool, - ) -> Optional[Message]: - if not enable_visual_message: - return None - - raw_message = clone_message_sequence(message.raw_message) - if not raw_message.components: - raw_message = MessageSequence([TextComponent(message.processed_plain_text)]) - - visual_message = SessionBackedMessage( - raw_message=raw_message, - visible_text=message.processed_plain_text, - timestamp=message.timestamp, - message_id=message.message_id, - original_message=message.original_message, - source_kind=message.source_kind, - ) - return visual_message.to_llm_message() - def _build_history_messages( self, chat_history: List[LLMContextMessage], @@ -294,12 +272,10 @@ class BaseMaisakaReplyGenerator: ) continue - visual_message = self._build_visual_user_message(message, enable_visual_message) - if visual_message is not None: - messages.append(visual_message) - continue - - llm_message = message.to_llm_message() + llm_message = build_llm_message_from_context( + message, + enable_visual_message=enable_visual_message, + ) if llm_message is not None: messages.append(llm_message) continue diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py index f12fb049..f4a8a7db 100644 --- a/src/maisaka/chat_loop_service.py +++ b/src/maisaka/chat_loop_service.py @@ -30,9 +30,15 @@ from src.plugin_runtime.host.hook_spec_registry import HookSpec, HookSpecRegistr from src.services.llm_service import LLMServiceClient from .builtin_tool import get_builtin_tools -from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage +from .context_messages import ( + AssistantMessage, + LLMContextMessage, + ToolResultMessage, + build_llm_message_from_context, +) from .history_utils import drop_orphan_tool_results from .display.prompt_cli_renderer import PromptCLIVisualizer +from .visual_mode_utils import resolve_enable_visual_planner TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"} @@ -395,6 +401,7 @@ class MaisakaChatLoopService: self, selected_history: List[LLMContextMessage], *, + enable_visual_message: bool, injected_user_messages: Sequence[str] | None = None, system_prompt: Optional[str] = None, ) -> List[Message]: @@ -413,7 +420,10 @@ class MaisakaChatLoopService: messages.append(system_msg.build()) for msg in selected_history: - llm_message = msg.to_llm_message() + llm_message = build_llm_message_from_context( + msg, + enable_visual_message=enable_visual_message, + ) if llm_message is not None: messages.append(llm_message) @@ -475,12 +485,15 @@ class MaisakaChatLoopService: if not self._prompts_loaded: await self.ensure_chat_prompt_loaded() + enable_visual_message = self._resolve_enable_visual_message(request_kind) selected_history, selection_reason = self.select_llm_context_messages( chat_history, request_kind=request_kind, + enable_visual_message=enable_visual_message, ) built_messages = self._build_request_messages( selected_history, + enable_visual_message=enable_visual_message, injected_user_messages=injected_user_messages, ) @@ -602,6 +615,7 @@ class MaisakaChatLoopService: def select_llm_context_messages( chat_history: List[LLMContextMessage], *, + enable_visual_message: Optional[bool] = None, request_kind: str = "planner", max_context_size: Optional[int] = None, ) -> tuple[List[LLMContextMessage], str]: @@ -615,9 +629,21 @@ class MaisakaChatLoopService: selected_indices: List[int] = [] counted_message_count = 0 + active_enable_visual_message = ( + enable_visual_message + if enable_visual_message is not None + else MaisakaChatLoopService._resolve_enable_visual_message(request_kind) + ) + for index in range(len(filtered_history) - 1, -1, -1): message = filtered_history[index] - if message.to_llm_message() is None: + if ( + build_llm_message_from_context( + message, + enable_visual_message=active_enable_visual_message, + ) + is None + ): continue selected_indices.append(index) @@ -683,6 +709,12 @@ class MaisakaChatLoopService: return filtered_history + @staticmethod + def _resolve_enable_visual_message(request_kind: str) -> bool: + if request_kind in {"planner", "timing_gate"}: + return resolve_enable_visual_planner() + return True + @staticmethod def _hide_early_assistant_messages( selected_history: List[LLMContextMessage], diff --git a/src/maisaka/context_messages.py b/src/maisaka/context_messages.py index c96e9993..cefa7dc4 100644 --- a/src/maisaka/context_messages.py +++ b/src/maisaka/context_messages.py @@ -40,10 +40,15 @@ def _guess_image_format(image_bytes: bytes) -> Optional[str]: return None -def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent) -> bool: +def _append_emoji_component( + builder: MessageBuilder, + component: EmojiComponent, + *, + enable_visual_message: bool, +) -> bool: """将表情组件追加到 LLM 消息构建器。""" image_format = _guess_image_format(component.binary_data) - if image_format and component.binary_data: + if enable_visual_message and image_format and component.binary_data: builder.add_text_content("[消息类型]表情包") builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8")) return True @@ -56,10 +61,15 @@ def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent) return True -def _append_image_component(builder: MessageBuilder, component: ImageComponent) -> bool: +def _append_image_component( + builder: MessageBuilder, + component: ImageComponent, + *, + enable_visual_message: bool, +) -> bool: """将图片组件追加到 LLM 消息构建器。""" image_format = _guess_image_format(component.binary_data) - if image_format and component.binary_data: + if enable_visual_message and image_format and component.binary_data: builder.add_text_content("[消息类型]图片") builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8")) return True @@ -216,6 +226,7 @@ def _build_message_from_sequence( message_sequence: MessageSequence, fallback_text: str, *, + enable_visual_message: bool = True, tool_call_id: Optional[str] = None, tool_name: Optional[str] = None, tool_calls: Optional[list[ToolCall]] = None, @@ -238,11 +249,25 @@ def _build_message_from_sequence( continue if isinstance(component, EmojiComponent): - has_content = _append_emoji_component(builder, component) or has_content + has_content = ( + _append_emoji_component( + builder, + component, + enable_visual_message=enable_visual_message, + ) + or has_content + ) continue if isinstance(component, ImageComponent): - has_content = _append_image_component(builder, component) or has_content + has_content = ( + _append_image_component( + builder, + component, + enable_visual_message=enable_visual_message, + ) + or has_content + ) continue if isinstance(component, AtComponent): @@ -297,7 +322,7 @@ class LLMContextMessage(ABC): return self.__class__.__name__ @abstractmethod - def to_llm_message(self) -> Optional[Message]: + def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]: """转换为统一 LLM 消息。""" def consume_once(self) -> bool: @@ -328,11 +353,12 @@ class SessionBackedMessage(LLMContextMessage): def source(self) -> str: return self.source_kind - def to_llm_message(self) -> Optional[Message]: + def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]: return _build_message_from_sequence( RoleType.User, self.raw_message, self.processed_plain_text, + enable_visual_message=enable_visual_message, ) @classmethod @@ -366,7 +392,8 @@ class ComplexSessionMessage(SessionBackedMessage): def source(self) -> str: return f"{self.source_kind}:{self.complex_message_type}" - def to_llm_message(self) -> Optional[Message]: + def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]: + del enable_visual_message message_sequence = MessageSequence([TextComponent(self.prompt_text)]) return _build_message_from_sequence( RoleType.User, @@ -426,7 +453,8 @@ class ReferenceMessage(LLMContextMessage): def source(self) -> str: return self.reference_type.value - def to_llm_message(self) -> Optional[Message]: + def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]: + del enable_visual_message message_sequence = MessageSequence([TextComponent(self.processed_plain_text)]) return _build_message_from_sequence(RoleType.User, message_sequence, self.processed_plain_text) @@ -463,7 +491,8 @@ class AssistantMessage(LLMContextMessage): def source(self) -> str: return self.source_kind - def to_llm_message(self) -> Optional[Message]: + def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]: + del enable_visual_message message_sequence = MessageSequence([]) if self.content: message_sequence.text(self.content) @@ -501,7 +530,8 @@ class ToolResultMessage(LLMContextMessage): def source(self) -> str: return self.tool_name or "tool" - def to_llm_message(self) -> Optional[Message]: + def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]: + del enable_visual_message message_sequence = MessageSequence([TextComponent(self.content)]) return _build_message_from_sequence( RoleType.Tool, @@ -510,3 +540,13 @@ class ToolResultMessage(LLMContextMessage): tool_call_id=self.tool_call_id, tool_name=self.tool_name, ) + + +def build_llm_message_from_context( + context_message: LLMContextMessage, + *, + enable_visual_message: bool = True, +) -> Optional[Message]: + """将 Maisaka 内部上下文消息转换为发给 LLM 的统一消息。""" + + return context_message.to_llm_message(enable_visual_message=enable_visual_message) diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py index b30008ab..0632a77f 100644 --- a/src/maisaka/reasoning_engine.py +++ b/src/maisaka/reasoning_engine.py @@ -14,7 +14,7 @@ from src.chat.message_receive.message import SessionMessage from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence from src.common.logger import get_logger from src.common.prompt_i18n import load_prompt -from src.config.config import config_manager, global_config +from src.config.config import global_config from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec from src.llm_models.exceptions import ReqAbortException from src.llm_models.payload_content.tool_option import ToolCall @@ -43,6 +43,7 @@ from .monitor_events import ( emit_timing_gate_result, ) from .planner_message_utils import build_planner_user_prefix_from_session_message +from .visual_mode_utils import resolve_enable_visual_planner if TYPE_CHECKING: from .runtime import MaisakaHeartFlowChatting @@ -738,47 +739,10 @@ class MaisakaReasoningEngine: planner_prefix: str, ) -> MessageSequence: message_sequence = build_prefixed_message_sequence(message.raw_message, planner_prefix) - if self._resolve_enable_visual_planner(): + if resolve_enable_visual_planner(): await self._hydrate_visual_components(message_sequence.components) return message_sequence - @staticmethod - def _resolve_enable_visual_planner() -> bool: - planner_mode = global_config.visual.planner_mode - planner_task_config = config_manager.get_model_config().model_task_config.planner - models_by_name = {model.name: model for model in config_manager.get_model_config().models} - - if planner_mode == "text": - return False - - planner_models: list[str] = list(planner_task_config.model_list) - missing_models = [model_name for model_name in planner_models if model_name not in models_by_name] - non_visual_models = [ - model_name for model_name in planner_models if model_name in models_by_name and not models_by_name[model_name].visual - ] - - if planner_mode == "multimodal": - if missing_models: - raise ValueError( - "planner_mode=multimodal,但 planner 任务存在未定义的模型:" - f"{', '.join(missing_models)}" - ) - if non_visual_models: - raise ValueError( - "planner_mode=multimodal,但 planner 任务存在未开启 visual 的模型:" - f"{', '.join(non_visual_models)}" - ) - return True - - if missing_models: - logger.warning( - "planner_mode=auto 时发现 planner 任务存在未定义模型:" - f"{', '.join(missing_models)},将退化为纯文本 planner" - ) - return False - - return bool(planner_models) and not non_visual_models - async def _hydrate_visual_components(self, planner_components: list[object]) -> None: """在 Maisaka 真正需要图片或表情时,按需回填二进制数据。""" load_tasks: list[asyncio.Task[None]] = [] diff --git a/src/maisaka/visual_mode_utils.py b/src/maisaka/visual_mode_utils.py new file mode 100644 index 00000000..d9c15a6e --- /dev/null +++ b/src/maisaka/visual_mode_utils.py @@ -0,0 +1,43 @@ +from src.common.logger import get_logger +from src.config.config import config_manager, global_config + +logger = get_logger("maisaka_visual_mode") + + +def resolve_enable_visual_planner() -> bool: + """根据 planner 配置解析当前是否应启用视觉消息。""" + + planner_mode = global_config.visual.planner_mode + planner_task_config = config_manager.get_model_config().model_task_config.planner + models_by_name = {model.name: model for model in config_manager.get_model_config().models} + + if planner_mode == "text": + return False + + planner_models: list[str] = list(planner_task_config.model_list) + missing_models = [model_name for model_name in planner_models if model_name not in models_by_name] + non_visual_models = [ + model_name for model_name in planner_models if model_name in models_by_name and not models_by_name[model_name].visual + ] + + if planner_mode == "multimodal": + if missing_models: + raise ValueError( + "planner_mode=multimodal,但 planner 任务存在未定义的模型:" + f"{', '.join(missing_models)}" + ) + if non_visual_models: + raise ValueError( + "planner_mode=multimodal,但 planner 任务存在未开启 visual 的模型:" + f"{', '.join(non_visual_models)}" + ) + return True + + if missing_models: + logger.warning( + "planner_mode=auto 时发现 planner 任务存在未定义模型:" + f"{', '.join(missing_models)},将退化为纯文本 planner" + ) + return False + + return bool(planner_models) and not non_visual_models