fix:修复非多模态模型意外传入图片的问题
This commit is contained in:
@@ -13,7 +13,6 @@ from src.chat.message_receive.chat_manager import BotChatSession
|
||||
from src.chat.message_receive.message import SessionMessage
|
||||
from src.chat.utils.utils import get_chat_type_and_target_info
|
||||
from src.cli.console import console
|
||||
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
|
||||
from src.common.data_models.reply_generation_data_models import (
|
||||
GenerationMetrics,
|
||||
LLMCompletionResult,
|
||||
@@ -32,9 +31,10 @@ from src.maisaka.context_messages import (
|
||||
ReferenceMessage,
|
||||
SessionBackedMessage,
|
||||
ToolResultMessage,
|
||||
build_llm_message_from_context,
|
||||
)
|
||||
from src.maisaka.display.prompt_cli_renderer import PromptCLIVisualizer
|
||||
from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
|
||||
from src.maisaka.message_adapter import parse_speaker_content
|
||||
from src.plugin_runtime.hook_payloads import serialize_prompt_messages
|
||||
|
||||
from .maisaka_expression_selector import maisaka_expression_selector
|
||||
@@ -253,28 +253,6 @@ class BaseMaisakaReplyGenerator:
|
||||
def _build_reply_instruction(self) -> str:
|
||||
return "请自然地回复。不要输出多余说明、括号、@ 或额外标记,只输出实际要发送的内容。"
|
||||
|
||||
def _build_visual_user_message(
|
||||
self,
|
||||
message: SessionBackedMessage,
|
||||
enable_visual_message: bool,
|
||||
) -> Optional[Message]:
|
||||
if not enable_visual_message:
|
||||
return None
|
||||
|
||||
raw_message = clone_message_sequence(message.raw_message)
|
||||
if not raw_message.components:
|
||||
raw_message = MessageSequence([TextComponent(message.processed_plain_text)])
|
||||
|
||||
visual_message = SessionBackedMessage(
|
||||
raw_message=raw_message,
|
||||
visible_text=message.processed_plain_text,
|
||||
timestamp=message.timestamp,
|
||||
message_id=message.message_id,
|
||||
original_message=message.original_message,
|
||||
source_kind=message.source_kind,
|
||||
)
|
||||
return visual_message.to_llm_message()
|
||||
|
||||
def _build_history_messages(
|
||||
self,
|
||||
chat_history: List[LLMContextMessage],
|
||||
@@ -294,12 +272,10 @@ class BaseMaisakaReplyGenerator:
|
||||
)
|
||||
continue
|
||||
|
||||
visual_message = self._build_visual_user_message(message, enable_visual_message)
|
||||
if visual_message is not None:
|
||||
messages.append(visual_message)
|
||||
continue
|
||||
|
||||
llm_message = message.to_llm_message()
|
||||
llm_message = build_llm_message_from_context(
|
||||
message,
|
||||
enable_visual_message=enable_visual_message,
|
||||
)
|
||||
if llm_message is not None:
|
||||
messages.append(llm_message)
|
||||
continue
|
||||
|
||||
@@ -30,9 +30,15 @@ from src.plugin_runtime.host.hook_spec_registry import HookSpec, HookSpecRegistr
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
|
||||
from .builtin_tool import get_builtin_tools
|
||||
from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
|
||||
from .context_messages import (
|
||||
AssistantMessage,
|
||||
LLMContextMessage,
|
||||
ToolResultMessage,
|
||||
build_llm_message_from_context,
|
||||
)
|
||||
from .history_utils import drop_orphan_tool_results
|
||||
from .display.prompt_cli_renderer import PromptCLIVisualizer
|
||||
from .visual_mode_utils import resolve_enable_visual_planner
|
||||
|
||||
TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"}
|
||||
|
||||
@@ -395,6 +401,7 @@ class MaisakaChatLoopService:
|
||||
self,
|
||||
selected_history: List[LLMContextMessage],
|
||||
*,
|
||||
enable_visual_message: bool,
|
||||
injected_user_messages: Sequence[str] | None = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> List[Message]:
|
||||
@@ -413,7 +420,10 @@ class MaisakaChatLoopService:
|
||||
messages.append(system_msg.build())
|
||||
|
||||
for msg in selected_history:
|
||||
llm_message = msg.to_llm_message()
|
||||
llm_message = build_llm_message_from_context(
|
||||
msg,
|
||||
enable_visual_message=enable_visual_message,
|
||||
)
|
||||
if llm_message is not None:
|
||||
messages.append(llm_message)
|
||||
|
||||
@@ -475,12 +485,15 @@ class MaisakaChatLoopService:
|
||||
|
||||
if not self._prompts_loaded:
|
||||
await self.ensure_chat_prompt_loaded()
|
||||
enable_visual_message = self._resolve_enable_visual_message(request_kind)
|
||||
selected_history, selection_reason = self.select_llm_context_messages(
|
||||
chat_history,
|
||||
request_kind=request_kind,
|
||||
enable_visual_message=enable_visual_message,
|
||||
)
|
||||
built_messages = self._build_request_messages(
|
||||
selected_history,
|
||||
enable_visual_message=enable_visual_message,
|
||||
injected_user_messages=injected_user_messages,
|
||||
)
|
||||
|
||||
@@ -602,6 +615,7 @@ class MaisakaChatLoopService:
|
||||
def select_llm_context_messages(
|
||||
chat_history: List[LLMContextMessage],
|
||||
*,
|
||||
enable_visual_message: Optional[bool] = None,
|
||||
request_kind: str = "planner",
|
||||
max_context_size: Optional[int] = None,
|
||||
) -> tuple[List[LLMContextMessage], str]:
|
||||
@@ -615,9 +629,21 @@ class MaisakaChatLoopService:
|
||||
selected_indices: List[int] = []
|
||||
counted_message_count = 0
|
||||
|
||||
active_enable_visual_message = (
|
||||
enable_visual_message
|
||||
if enable_visual_message is not None
|
||||
else MaisakaChatLoopService._resolve_enable_visual_message(request_kind)
|
||||
)
|
||||
|
||||
for index in range(len(filtered_history) - 1, -1, -1):
|
||||
message = filtered_history[index]
|
||||
if message.to_llm_message() is None:
|
||||
if (
|
||||
build_llm_message_from_context(
|
||||
message,
|
||||
enable_visual_message=active_enable_visual_message,
|
||||
)
|
||||
is None
|
||||
):
|
||||
continue
|
||||
|
||||
selected_indices.append(index)
|
||||
@@ -683,6 +709,12 @@ class MaisakaChatLoopService:
|
||||
|
||||
return filtered_history
|
||||
|
||||
@staticmethod
|
||||
def _resolve_enable_visual_message(request_kind: str) -> bool:
|
||||
if request_kind in {"planner", "timing_gate"}:
|
||||
return resolve_enable_visual_planner()
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _hide_early_assistant_messages(
|
||||
selected_history: List[LLMContextMessage],
|
||||
|
||||
@@ -40,10 +40,15 @@ def _guess_image_format(image_bytes: bytes) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent) -> bool:
|
||||
def _append_emoji_component(
|
||||
builder: MessageBuilder,
|
||||
component: EmojiComponent,
|
||||
*,
|
||||
enable_visual_message: bool,
|
||||
) -> bool:
|
||||
"""将表情组件追加到 LLM 消息构建器。"""
|
||||
image_format = _guess_image_format(component.binary_data)
|
||||
if image_format and component.binary_data:
|
||||
if enable_visual_message and image_format and component.binary_data:
|
||||
builder.add_text_content("[消息类型]表情包")
|
||||
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
|
||||
return True
|
||||
@@ -56,10 +61,15 @@ def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent)
|
||||
return True
|
||||
|
||||
|
||||
def _append_image_component(builder: MessageBuilder, component: ImageComponent) -> bool:
|
||||
def _append_image_component(
|
||||
builder: MessageBuilder,
|
||||
component: ImageComponent,
|
||||
*,
|
||||
enable_visual_message: bool,
|
||||
) -> bool:
|
||||
"""将图片组件追加到 LLM 消息构建器。"""
|
||||
image_format = _guess_image_format(component.binary_data)
|
||||
if image_format and component.binary_data:
|
||||
if enable_visual_message and image_format and component.binary_data:
|
||||
builder.add_text_content("[消息类型]图片")
|
||||
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
|
||||
return True
|
||||
@@ -216,6 +226,7 @@ def _build_message_from_sequence(
|
||||
message_sequence: MessageSequence,
|
||||
fallback_text: str,
|
||||
*,
|
||||
enable_visual_message: bool = True,
|
||||
tool_call_id: Optional[str] = None,
|
||||
tool_name: Optional[str] = None,
|
||||
tool_calls: Optional[list[ToolCall]] = None,
|
||||
@@ -238,11 +249,25 @@ def _build_message_from_sequence(
|
||||
continue
|
||||
|
||||
if isinstance(component, EmojiComponent):
|
||||
has_content = _append_emoji_component(builder, component) or has_content
|
||||
has_content = (
|
||||
_append_emoji_component(
|
||||
builder,
|
||||
component,
|
||||
enable_visual_message=enable_visual_message,
|
||||
)
|
||||
or has_content
|
||||
)
|
||||
continue
|
||||
|
||||
if isinstance(component, ImageComponent):
|
||||
has_content = _append_image_component(builder, component) or has_content
|
||||
has_content = (
|
||||
_append_image_component(
|
||||
builder,
|
||||
component,
|
||||
enable_visual_message=enable_visual_message,
|
||||
)
|
||||
or has_content
|
||||
)
|
||||
continue
|
||||
|
||||
if isinstance(component, AtComponent):
|
||||
@@ -297,7 +322,7 @@ class LLMContextMessage(ABC):
|
||||
return self.__class__.__name__
|
||||
|
||||
@abstractmethod
|
||||
def to_llm_message(self) -> Optional[Message]:
|
||||
def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]:
|
||||
"""转换为统一 LLM 消息。"""
|
||||
|
||||
def consume_once(self) -> bool:
|
||||
@@ -328,11 +353,12 @@ class SessionBackedMessage(LLMContextMessage):
|
||||
def source(self) -> str:
|
||||
return self.source_kind
|
||||
|
||||
def to_llm_message(self) -> Optional[Message]:
|
||||
def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]:
|
||||
return _build_message_from_sequence(
|
||||
RoleType.User,
|
||||
self.raw_message,
|
||||
self.processed_plain_text,
|
||||
enable_visual_message=enable_visual_message,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -366,7 +392,8 @@ class ComplexSessionMessage(SessionBackedMessage):
|
||||
def source(self) -> str:
|
||||
return f"{self.source_kind}:{self.complex_message_type}"
|
||||
|
||||
def to_llm_message(self) -> Optional[Message]:
|
||||
def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]:
|
||||
del enable_visual_message
|
||||
message_sequence = MessageSequence([TextComponent(self.prompt_text)])
|
||||
return _build_message_from_sequence(
|
||||
RoleType.User,
|
||||
@@ -426,7 +453,8 @@ class ReferenceMessage(LLMContextMessage):
|
||||
def source(self) -> str:
|
||||
return self.reference_type.value
|
||||
|
||||
def to_llm_message(self) -> Optional[Message]:
|
||||
def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]:
|
||||
del enable_visual_message
|
||||
message_sequence = MessageSequence([TextComponent(self.processed_plain_text)])
|
||||
return _build_message_from_sequence(RoleType.User, message_sequence, self.processed_plain_text)
|
||||
|
||||
@@ -463,7 +491,8 @@ class AssistantMessage(LLMContextMessage):
|
||||
def source(self) -> str:
|
||||
return self.source_kind
|
||||
|
||||
def to_llm_message(self) -> Optional[Message]:
|
||||
def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]:
|
||||
del enable_visual_message
|
||||
message_sequence = MessageSequence([])
|
||||
if self.content:
|
||||
message_sequence.text(self.content)
|
||||
@@ -501,7 +530,8 @@ class ToolResultMessage(LLMContextMessage):
|
||||
def source(self) -> str:
|
||||
return self.tool_name or "tool"
|
||||
|
||||
def to_llm_message(self) -> Optional[Message]:
|
||||
def to_llm_message(self, enable_visual_message: bool = True) -> Optional[Message]:
|
||||
del enable_visual_message
|
||||
message_sequence = MessageSequence([TextComponent(self.content)])
|
||||
return _build_message_from_sequence(
|
||||
RoleType.Tool,
|
||||
@@ -510,3 +540,13 @@ class ToolResultMessage(LLMContextMessage):
|
||||
tool_call_id=self.tool_call_id,
|
||||
tool_name=self.tool_name,
|
||||
)
|
||||
|
||||
|
||||
def build_llm_message_from_context(
|
||||
context_message: LLMContextMessage,
|
||||
*,
|
||||
enable_visual_message: bool = True,
|
||||
) -> Optional[Message]:
|
||||
"""将 Maisaka 内部上下文消息转换为发给 LLM 的统一消息。"""
|
||||
|
||||
return context_message.to_llm_message(enable_visual_message=enable_visual_message)
|
||||
|
||||
@@ -14,7 +14,7 @@ from src.chat.message_receive.message import SessionMessage
|
||||
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence
|
||||
from src.common.logger import get_logger
|
||||
from src.common.prompt_i18n import load_prompt
|
||||
from src.config.config import config_manager, global_config
|
||||
from src.config.config import global_config
|
||||
from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec
|
||||
from src.llm_models.exceptions import ReqAbortException
|
||||
from src.llm_models.payload_content.tool_option import ToolCall
|
||||
@@ -43,6 +43,7 @@ from .monitor_events import (
|
||||
emit_timing_gate_result,
|
||||
)
|
||||
from .planner_message_utils import build_planner_user_prefix_from_session_message
|
||||
from .visual_mode_utils import resolve_enable_visual_planner
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .runtime import MaisakaHeartFlowChatting
|
||||
@@ -738,47 +739,10 @@ class MaisakaReasoningEngine:
|
||||
planner_prefix: str,
|
||||
) -> MessageSequence:
|
||||
message_sequence = build_prefixed_message_sequence(message.raw_message, planner_prefix)
|
||||
if self._resolve_enable_visual_planner():
|
||||
if resolve_enable_visual_planner():
|
||||
await self._hydrate_visual_components(message_sequence.components)
|
||||
return message_sequence
|
||||
|
||||
@staticmethod
|
||||
def _resolve_enable_visual_planner() -> bool:
|
||||
planner_mode = global_config.visual.planner_mode
|
||||
planner_task_config = config_manager.get_model_config().model_task_config.planner
|
||||
models_by_name = {model.name: model for model in config_manager.get_model_config().models}
|
||||
|
||||
if planner_mode == "text":
|
||||
return False
|
||||
|
||||
planner_models: list[str] = list(planner_task_config.model_list)
|
||||
missing_models = [model_name for model_name in planner_models if model_name not in models_by_name]
|
||||
non_visual_models = [
|
||||
model_name for model_name in planner_models if model_name in models_by_name and not models_by_name[model_name].visual
|
||||
]
|
||||
|
||||
if planner_mode == "multimodal":
|
||||
if missing_models:
|
||||
raise ValueError(
|
||||
"planner_mode=multimodal,但 planner 任务存在未定义的模型:"
|
||||
f"{', '.join(missing_models)}"
|
||||
)
|
||||
if non_visual_models:
|
||||
raise ValueError(
|
||||
"planner_mode=multimodal,但 planner 任务存在未开启 visual 的模型:"
|
||||
f"{', '.join(non_visual_models)}"
|
||||
)
|
||||
return True
|
||||
|
||||
if missing_models:
|
||||
logger.warning(
|
||||
"planner_mode=auto 时发现 planner 任务存在未定义模型:"
|
||||
f"{', '.join(missing_models)},将退化为纯文本 planner"
|
||||
)
|
||||
return False
|
||||
|
||||
return bool(planner_models) and not non_visual_models
|
||||
|
||||
async def _hydrate_visual_components(self, planner_components: list[object]) -> None:
|
||||
"""在 Maisaka 真正需要图片或表情时,按需回填二进制数据。"""
|
||||
load_tasks: list[asyncio.Task[None]] = []
|
||||
|
||||
43
src/maisaka/visual_mode_utils.py
Normal file
43
src/maisaka/visual_mode_utils.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import config_manager, global_config
|
||||
|
||||
logger = get_logger("maisaka_visual_mode")
|
||||
|
||||
|
||||
def resolve_enable_visual_planner() -> bool:
|
||||
"""根据 planner 配置解析当前是否应启用视觉消息。"""
|
||||
|
||||
planner_mode = global_config.visual.planner_mode
|
||||
planner_task_config = config_manager.get_model_config().model_task_config.planner
|
||||
models_by_name = {model.name: model for model in config_manager.get_model_config().models}
|
||||
|
||||
if planner_mode == "text":
|
||||
return False
|
||||
|
||||
planner_models: list[str] = list(planner_task_config.model_list)
|
||||
missing_models = [model_name for model_name in planner_models if model_name not in models_by_name]
|
||||
non_visual_models = [
|
||||
model_name for model_name in planner_models if model_name in models_by_name and not models_by_name[model_name].visual
|
||||
]
|
||||
|
||||
if planner_mode == "multimodal":
|
||||
if missing_models:
|
||||
raise ValueError(
|
||||
"planner_mode=multimodal,但 planner 任务存在未定义的模型:"
|
||||
f"{', '.join(missing_models)}"
|
||||
)
|
||||
if non_visual_models:
|
||||
raise ValueError(
|
||||
"planner_mode=multimodal,但 planner 任务存在未开启 visual 的模型:"
|
||||
f"{', '.join(non_visual_models)}"
|
||||
)
|
||||
return True
|
||||
|
||||
if missing_models:
|
||||
logger.warning(
|
||||
"planner_mode=auto 时发现 planner 任务存在未定义模型:"
|
||||
f"{', '.join(missing_models)},将退化为纯文本 planner"
|
||||
)
|
||||
return False
|
||||
|
||||
return bool(planner_models) and not non_visual_models
|
||||
Reference in New Issue
Block a user