feat:maisak正确解析图片原始数据,reply信息

This commit is contained in:
SengokuCola
2026-04-03 17:22:53 +08:00
parent b74b60cb1a
commit fc753f7e9f
11 changed files with 237 additions and 61 deletions

View File

@@ -6,7 +6,6 @@ from base64 import b64decode
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from src.chat.message_receive.message import SessionMessage
from src.chat.utils.utils import process_llm_response
from src.common.data_models.message_component_data_model import EmojiComponent, MessageSequence, TextComponent
from src.config.config import global_config

View File

@@ -15,7 +15,6 @@ from rich.panel import Panel
from src.cli.console import console
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt
from src.common.utils.utils_session import SessionUtils
@@ -38,9 +37,7 @@ from src.plugin_runtime.host.hook_spec_registry import HookSpec, HookSpecRegistr
from src.services.llm_service import LLMServiceClient
from .builtin_tool import get_builtin_tools
from .context_messages import AssistantMessage, LLMContextMessage, SessionBackedMessage, ToolResultMessage
from .message_adapter import format_speaker_content
from .planner_message_utils import build_session_backed_text_message
from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
from .prompt_cli_renderer import PromptCLIVisualizer
@@ -324,7 +321,7 @@ class MaisakaChatLoopService:
if not prompt_lines:
return ""
return f"在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n"
return "在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n"
@staticmethod
def _get_chat_prompt_for_chat(chat_id: str, is_group_chat: Optional[bool]) -> str:

View File

@@ -11,7 +11,13 @@ import base64
from PIL import Image as PILImage
from src.chat.message_receive.message import SessionMessage
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
from src.common.data_models.message_component_data_model import (
EmojiComponent,
ImageComponent,
MessageSequence,
ReplyComponent,
TextComponent,
)
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.llm_models.payload_content.tool_option import ToolCall
@@ -27,11 +33,42 @@ def _guess_image_format(image_bytes: bytes) -> Optional[str]:
return None
def _build_binary_component_type_text(component: EmojiComponent | ImageComponent) -> str:
"""为图片类消息组件构造显式的消息类型标记"""
if isinstance(component, EmojiComponent):
return "[消息类型]表情包"
return "[消息类型]图片"
def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent) -> bool:
"""将表情组件追加到 LLM 消息构建器"""
image_format = _guess_image_format(component.binary_data)
if image_format and component.binary_data:
builder.add_text_content("[消息类型]表情包")
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
return True
if component.content:
builder.add_text_content(component.content)
return True
return False
def _append_image_component(builder: MessageBuilder, component: ImageComponent) -> bool:
"""将图片组件追加到 LLM 消息构建器。"""
image_format = _guess_image_format(component.binary_data)
if image_format and component.binary_data:
builder.add_text_content("[消息类型]图片")
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
return True
if component.content:
builder.add_text_content(component.content)
return True
return False
def _append_reply_component(builder: MessageBuilder, component: ReplyComponent) -> bool:
"""将回复组件追加到 LLM 消息构建器。"""
target_message_id = component.target_message_id.strip()
if not target_message_id:
return False
builder.add_text_content(f"[引用回复]({target_message_id})")
return True
def _build_message_from_sequence(
@@ -57,17 +94,17 @@ def _build_message_from_sequence(
has_content = True
continue
if isinstance(component, (EmojiComponent, ImageComponent)):
image_format = _guess_image_format(component.binary_data)
if image_format and component.binary_data:
builder.add_text_content(_build_binary_component_type_text(component))
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
has_content = True
continue
if isinstance(component, EmojiComponent):
has_content = _append_emoji_component(builder, component) or has_content
continue
if component.content:
builder.add_text_content(component.content)
has_content = True
if isinstance(component, ImageComponent):
has_content = _append_image_component(builder, component) or has_content
continue
if isinstance(component, ReplyComponent):
has_content = _append_reply_component(builder, component) or has_content
continue
if not has_content and fallback_text:
builder.add_text_content(fallback_text)

View File

@@ -5,7 +5,13 @@ from datetime import datetime
from typing import Optional
import re
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
from src.common.data_models.message_component_data_model import (
EmojiComponent,
ImageComponent,
MessageSequence,
ReplyComponent,
TextComponent,
)
SPEAKER_PREFIX_PATTERN = re.compile(
r"^(?:(?P<timestamp>\d{2}:\d{2}:\d{2}))?(?:\[msg_id:(?P<message_id>[^\]]+)\])?\[(?P<speaker>[^\]]+)\](?P<content>.*)$",
@@ -65,5 +71,11 @@ def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str:
if isinstance(component, ImageComponent):
parts.append("[图片]")
continue
if isinstance(component, ReplyComponent):
target_message_id = component.target_message_id.strip()
if target_message_id:
parts.append(f"[引用回复]({target_message_id})")
return "".join(parts)

View File

@@ -12,7 +12,7 @@ import traceback
from src.chat.heart_flow.heartFC_utils import CycleDetail
from src.chat.message_receive.message import SessionMessage
from src.chat.utils.utils import process_llm_response
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
from src.common.logger import get_logger
from src.config.config import global_config
from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec
@@ -230,12 +230,10 @@ class MaisakaReasoningEngine:
planner_prefix = build_planner_user_prefix_from_session_message(message)
appended_component = False
if global_config.maisaka.direct_image_input:
source_sequence = getattr(message, "maisaka_original_raw_message", message.raw_message)
else:
source_sequence = message.raw_message
source_sequence = message.raw_message
planner_components = clone_message_sequence(source_sequence).components
if global_config.maisaka.direct_image_input:
await self._hydrate_visual_components(planner_components)
if planner_components and isinstance(planner_components[0], TextComponent):
planner_components[0].text = planner_prefix + planner_components[0].text
else:
@@ -256,6 +254,24 @@ class MaisakaReasoningEngine:
return message_sequence, legacy_visible_text
async def _hydrate_visual_components(self, planner_components: list[object]) -> None:
"""在 Maisaka 真正需要图片或表情时,按需回填二进制数据。"""
load_tasks: list[asyncio.Task[None]] = []
for component in planner_components:
if isinstance(component, ImageComponent) and not component.binary_data:
load_tasks.append(asyncio.create_task(component.load_image_binary()))
continue
if isinstance(component, EmojiComponent) and not component.binary_data:
load_tasks.append(asyncio.create_task(component.load_emoji_binary()))
if not load_tasks:
return
results = await asyncio.gather(*load_tasks, return_exceptions=True)
for result in results:
if isinstance(result, Exception):
logger.warning(f"{self._runtime.log_prefix} 回填图片或表情二进制数据失败Maisaka 将退化为文本占位: {result}")
def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str:
user_info = message.message_info.user_info
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id