diff --git a/src/chat/image_system/image_manager.py b/src/chat/image_system/image_manager.py index 492886d4..c15f647f 100644 --- a/src/chat/image_system/image_manager.py +++ b/src/chat/image_system/image_manager.py @@ -44,6 +44,12 @@ class ImageManager: logger.info("图片管理器初始化完成") + def _get_image_record(self, image_hash: str) -> Optional[Images]: + """根据哈希获取图片记录。""" + with get_db_session() as session: + statement = select(Images).filter_by(image_hash=image_hash, image_type=ImageType.IMAGE).limit(1) + return session.exec(statement).first() + async def get_image_description( self, *, @@ -76,9 +82,8 @@ class ImageManager: hash_str = hashlib.sha256(image_bytes).hexdigest() try: - with get_db_session() as session: - statement = select(Images).filter_by(image_hash=hash_str, image_type=ImageType.IMAGE).limit(1) - if record := session.exec(statement).first(): + if record := self._get_image_record(hash_str): + if record.vlm_processed and record.description: return record.description except Exception as e: logger.error(f"查询图片描述时发生错误: {e}") @@ -86,12 +91,17 @@ class ImageManager: if not image_bytes: logger.warning("图片哈希值未找到,且未提供图片字节数据,返回无描述") return "" + try: + await self.ensure_image_saved(image_bytes) + except Exception as e: + logger.error(f"保存图片文件时发生错误: {e}") + return "" if not wait_for_build: self._schedule_description_build(hash_str, image_bytes) return "" logger.info(f"图片描述未找到,哈希值: {hash_str},准备生成新描述") try: - image = await self.save_image_and_process(image_bytes) + image = await self.build_image_description(image_bytes) return image.description except Exception as e: logger.error(f"生成图片描述时发生错误: {e}") @@ -120,7 +130,7 @@ class ImageManager: """ try: logger.info(f"图片描述后台构建已开始,哈希值: {image_hash}") - await self.save_image_and_process(image_bytes) + await self.build_image_description(image_bytes) logger.info(f"图片描述后台构建完成,哈希值: {image_hash}") except Exception as exc: logger.warning(f"图片描述后台构建失败,哈希值: {image_hash},错误: {exc}") @@ -201,6 +211,7 @@ class ImageManager: return False record.description = image.description record.last_used_time = datetime.now() + record.vlm_processed = image.vlm_processed session.add(record) logger.info(f"成功更新图片描述: {image.file_hash},新描述: {image.description}") except Exception as e: @@ -239,22 +250,13 @@ class ImageManager: return False return True - async def save_image_and_process(self, image_bytes: bytes) -> MaiImage: - """ - 保存图片并生成描述 - - Args: - image_bytes (bytes): 图片的字节数据 - Returns: - return (MaiImage): 包含图片信息的 MaiImage 对象 - Raises: - Exception: 如果在保存或处理过程中发生错误 - """ + async def ensure_image_saved(self, image_bytes: bytes) -> MaiImage: + """先保存图片记录,确保后续可以按哈希回填图片内容。""" hash_str = hashlib.sha256(image_bytes).hexdigest() try: with get_db_session() as session: - statement = select(Images).filter_by(image_hash=hash_str).limit(1) + statement = select(Images).filter_by(image_hash=hash_str, image_type=ImageType.IMAGE).limit(1) if record := session.exec(statement).first(): logger.info(f"图片已存在于数据库中,哈希值: {hash_str}") record.last_used_time = datetime.now() @@ -270,18 +272,38 @@ class ImageManager: tmp_file_path = IMAGE_DIR / f"{hash_str}.tmp" with tmp_file_path.open("wb") as f: f.write(image_bytes) - mai_image = MaiImage(full_path=(IMAGE_DIR / f"{hash_str}.tmp"), image_bytes=image_bytes) + mai_image = MaiImage(full_path=tmp_file_path, image_bytes=image_bytes) await mai_image.calculate_hash_format() + if not self.register_image_to_db(mai_image): + raise RuntimeError(f"保存图片记录到数据库失败: {hash_str}") + return mai_image + + async def build_image_description(self, image_bytes: bytes) -> MaiImage: + """在图片已保存的前提下生成或补齐图片描述。""" + mai_image = await self.ensure_image_saved(image_bytes) + if mai_image.vlm_processed and mai_image.description: + return mai_image + desc = await self._generate_image_description(image_bytes, mai_image.image_format) mai_image.description = desc mai_image.vlm_processed = True - try: - self.register_image_to_db(mai_image) - except Exception as e: - logger.error(f"保存新图片记录到数据库时发生错误: {e}") - raise e + if not self.update_image_description(mai_image): + raise RuntimeError(f"更新图片描述失败: {mai_image.file_hash}") return mai_image + async def save_image_and_process(self, image_bytes: bytes) -> MaiImage: + """ + 保存图片并生成描述 + + Args: + image_bytes (bytes): 图片的字节数据 + Returns: + return (MaiImage): 包含图片信息的 MaiImage 对象 + Raises: + Exception: 如果在保存或处理过程中发生错误 + """ + return await self.build_image_description(image_bytes) + def cleanup_invalid_descriptions_in_db(self): """ 清理数据库中无效的图片记录 diff --git a/src/chat/message_receive/bot.py b/src/chat/message_receive/bot.py index 27a13821..1015bf07 100644 --- a/src/chat/message_receive/bot.py +++ b/src/chat/message_receive/bot.py @@ -1,7 +1,6 @@ """聊天消息入口与主链路调度。""" from contextlib import suppress -from copy import deepcopy from typing import Any, Dict, List, Optional import os @@ -550,9 +549,9 @@ class ChatBot: # if await self.handle_notice_message(message): # pass - # 处理消息内容,识别表情包等二进制数据并转化为文本描述 - if global_config.maisaka.direct_image_input: - message.maisaka_original_raw_message = deepcopy(message.raw_message) # type: ignore[attr-defined] + # 处理消息内容,识别表情包等二进制数据并转化为文本描述。 + # 如果 Maisaka 需要直接消费图片,会在后续构建 prompt 时按需回填图片二进制数据, + # 这里不再复制整条原始消息。 # 入站主链优先保证消息尽快入队,避免图片、表情包、语音分析阻塞适配器超时。 await message.process( enable_heavy_media_analysis=False, diff --git a/src/chat/message_receive/message.py b/src/chat/message_receive/message.py index 3cf5fdf5..b2cb3aa6 100644 --- a/src/chat/message_receive/message.py +++ b/src/chat/message_receive/message.py @@ -1,11 +1,10 @@ +import asyncio from asyncio import Task from typing import Dict, List, Sequence, Tuple from rich.traceback import install from sqlmodel import select -import asyncio - from src.common.logger import get_logger from src.common.database.database import get_db_session from src.common.database.database_model import Messages @@ -36,6 +35,102 @@ class MsgIDMapping: class SessionMessage(MaiMessage): + + #便于调试的打印函数 + def __str__(self) -> str: + """返回适合日志输出的消息摘要。""" + return self.to_debug_string() + + def __repr__(self) -> str: + """返回适合调试场景的消息摘要。""" + return self.to_debug_string() + + def to_debug_string(self) -> str: + """构建包含引用信息的调试字符串。 + + Returns: + str: 适合记录日志的消息摘要。 + """ + user_info = self.message_info.user_info + group_info = self.message_info.group_info + chat_type = "group" if group_info else "private" + group_id = group_info.group_id if group_info else None + group_name = group_info.group_name if group_info else None + component_summaries = [self._summarize_component(component) for component in self.raw_message.components] + raw_components = ", ".join(component_summaries) if component_summaries else "empty" + + return ( + "SessionMessage(" + f"message_id={self.message_id!r}, " + f"platform={self.platform!r}, " + f"chat_type={chat_type!r}, " + f"group_id={group_id!r}, " + f"group_name={group_name!r}, " + f"user_id={user_info.user_id!r}, " + f"user_nickname={user_info.user_nickname!r}, " + f"user_cardname={user_info.user_cardname!r}, " + f"reply_to={self.reply_to!r}, " + f"processed_plain_text={self._truncate_text(self.processed_plain_text)}, " + f"raw_components=[{raw_components}]" + ")" + ) + + @staticmethod + def _truncate_text(text: str | None, max_length: int = 120) -> str: + """截断较长文本,避免日志过长。 + + Args: + text: 原始文本。 + max_length: 最大保留长度。 + + Returns: + str: 截断后的文本表示。 + """ + if text is None: + return "None" + normalized_text = text.replace("\r", "\\r").replace("\n", "\\n") + if len(normalized_text) <= max_length: + return repr(normalized_text) + return repr(f"{normalized_text[:max_length]}...") + + def _summarize_component(self, component: StandardMessageComponents) -> str: + """生成单个消息组件的调试摘要。 + + Args: + component: 消息组件对象。 + + Returns: + str: 组件摘要文本。 + """ + if isinstance(component, TextComponent): + return f"Text(text={self._truncate_text(component.text, 80)})" + if isinstance(component, ImageComponent): + return f"Image(content={self._truncate_text(component.content or None, 60)})" + if isinstance(component, EmojiComponent): + return f"Emoji(content={self._truncate_text(component.content or None, 60)})" + if isinstance(component, AtComponent): + target_name = component.target_user_cardname or component.target_user_nickname or component.target_user_id + return f"At(target={target_name!r})" + if isinstance(component, VoiceComponent): + return f"Voice(content={self._truncate_text(component.content or None, 60)})" + if isinstance(component, ReplyComponent): + sender_name = ( + component.target_message_sender_cardname + or component.target_message_sender_nickname + or component.target_message_sender_id + ) + return ( + "Reply(" + f"target_message_id={component.target_message_id!r}, " + f"target_sender={sender_name!r}, " + f"target_content={self._truncate_text(component.target_message_content, 80)}" + ")" + ) + if isinstance(component, ForwardNodeComponent): + return f"ForwardNode(count={len(component.forward_components)})" + return f"{component.__class__.__name__}" + #便于调试的打印函数end + async def process( self, *, diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index 6858ca19..48b3a2cb 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -35,7 +35,7 @@ from src.services import llm_service as llm_api from src.chat.logger.plan_reply_logger import PlanReplyLogger from src.memory_system.memory_retrieval import init_memory_retrieval_sys, build_memory_retrieval_prompt -from src.learners.jargon_explainer_old import explain_jargon_in_context, retrieve_concepts_with_jargon +from src.learners.jargon_explainer_old import explain_jargon_in_context from src.chat.utils.common_utils import TempMethodsExpression init_memory_retrieval_sys() diff --git a/src/chat/utils/statistic.py b/src/chat/utils/statistic.py index 25add4bf..28982f22 100644 --- a/src/chat/utils/statistic.py +++ b/src/chat/utils/statistic.py @@ -15,7 +15,6 @@ from src.common.database.database import get_db_session from src.common.database.database_model import Messages, ModelUsage, OnlineTime, ToolRecord from src.manager.async_task_manager import AsyncTask from src.manager.local_store_manager import local_storage -from src.config.config import global_config logger = get_logger("maibot_statistic") diff --git a/src/maisaka/builtin_tool/context.py b/src/maisaka/builtin_tool/context.py index 6bf3443d..4cf37986 100644 --- a/src/maisaka/builtin_tool/context.py +++ b/src/maisaka/builtin_tool/context.py @@ -6,7 +6,6 @@ from base64 import b64decode from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, List, Optional -from src.chat.message_receive.message import SessionMessage from src.chat.utils.utils import process_llm_response from src.common.data_models.message_component_data_model import EmojiComponent, MessageSequence, TextComponent from src.config.config import global_config diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py index 8efb16bc..236277b6 100644 --- a/src/maisaka/chat_loop_service.py +++ b/src/maisaka/chat_loop_service.py @@ -15,7 +15,6 @@ from rich.panel import Panel from src.cli.console import console from src.common.data_models.llm_service_data_models import LLMGenerationOptions -from src.common.data_models.message_component_data_model import MessageSequence, TextComponent from src.common.logger import get_logger from src.common.prompt_i18n import load_prompt from src.common.utils.utils_session import SessionUtils @@ -38,9 +37,7 @@ from src.plugin_runtime.host.hook_spec_registry import HookSpec, HookSpecRegistr from src.services.llm_service import LLMServiceClient from .builtin_tool import get_builtin_tools -from .context_messages import AssistantMessage, LLMContextMessage, SessionBackedMessage, ToolResultMessage -from .message_adapter import format_speaker_content -from .planner_message_utils import build_session_backed_text_message +from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage from .prompt_cli_renderer import PromptCLIVisualizer @@ -324,7 +321,7 @@ class MaisakaChatLoopService: if not prompt_lines: return "" - return f"在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n" + return "在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n" @staticmethod def _get_chat_prompt_for_chat(chat_id: str, is_group_chat: Optional[bool]) -> str: diff --git a/src/maisaka/context_messages.py b/src/maisaka/context_messages.py index 174da097..65404d20 100644 --- a/src/maisaka/context_messages.py +++ b/src/maisaka/context_messages.py @@ -11,7 +11,13 @@ import base64 from PIL import Image as PILImage from src.chat.message_receive.message import SessionMessage -from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent +from src.common.data_models.message_component_data_model import ( + EmojiComponent, + ImageComponent, + MessageSequence, + ReplyComponent, + TextComponent, +) from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType from src.llm_models.payload_content.tool_option import ToolCall @@ -27,11 +33,42 @@ def _guess_image_format(image_bytes: bytes) -> Optional[str]: return None -def _build_binary_component_type_text(component: EmojiComponent | ImageComponent) -> str: - """为图片类消息组件构造显式的消息类型标记。""" - if isinstance(component, EmojiComponent): - return "[消息类型]表情包" - return "[消息类型]图片" +def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent) -> bool: + """将表情组件追加到 LLM 消息构建器。""" + image_format = _guess_image_format(component.binary_data) + if image_format and component.binary_data: + builder.add_text_content("[消息类型]表情包") + builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8")) + return True + + if component.content: + builder.add_text_content(component.content) + return True + return False + + +def _append_image_component(builder: MessageBuilder, component: ImageComponent) -> bool: + """将图片组件追加到 LLM 消息构建器。""" + image_format = _guess_image_format(component.binary_data) + if image_format and component.binary_data: + builder.add_text_content("[消息类型]图片") + builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8")) + return True + + if component.content: + builder.add_text_content(component.content) + return True + return False + + +def _append_reply_component(builder: MessageBuilder, component: ReplyComponent) -> bool: + """将回复组件追加到 LLM 消息构建器。""" + target_message_id = component.target_message_id.strip() + if not target_message_id: + return False + + builder.add_text_content(f"[引用回复]({target_message_id})") + return True def _build_message_from_sequence( @@ -57,17 +94,17 @@ def _build_message_from_sequence( has_content = True continue - if isinstance(component, (EmojiComponent, ImageComponent)): - image_format = _guess_image_format(component.binary_data) - if image_format and component.binary_data: - builder.add_text_content(_build_binary_component_type_text(component)) - builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8")) - has_content = True - continue + if isinstance(component, EmojiComponent): + has_content = _append_emoji_component(builder, component) or has_content + continue - if component.content: - builder.add_text_content(component.content) - has_content = True + if isinstance(component, ImageComponent): + has_content = _append_image_component(builder, component) or has_content + continue + + if isinstance(component, ReplyComponent): + has_content = _append_reply_component(builder, component) or has_content + continue if not has_content and fallback_text: builder.add_text_content(fallback_text) diff --git a/src/maisaka/message_adapter.py b/src/maisaka/message_adapter.py index b52d1baa..995f5b34 100644 --- a/src/maisaka/message_adapter.py +++ b/src/maisaka/message_adapter.py @@ -5,7 +5,13 @@ from datetime import datetime from typing import Optional import re -from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent +from src.common.data_models.message_component_data_model import ( + EmojiComponent, + ImageComponent, + MessageSequence, + ReplyComponent, + TextComponent, +) SPEAKER_PREFIX_PATTERN = re.compile( r"^(?:(?P\d{2}:\d{2}:\d{2}))?(?:\[msg_id:(?P[^\]]+)\])?\[(?P[^\]]+)\](?P.*)$", @@ -65,5 +71,11 @@ def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str: if isinstance(component, ImageComponent): parts.append("[图片]") + continue + + if isinstance(component, ReplyComponent): + target_message_id = component.target_message_id.strip() + if target_message_id: + parts.append(f"[引用回复]({target_message_id})") return "".join(parts) diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py index 1643f8c9..72539a5b 100644 --- a/src/maisaka/reasoning_engine.py +++ b/src/maisaka/reasoning_engine.py @@ -12,7 +12,7 @@ import traceback from src.chat.heart_flow.heartFC_utils import CycleDetail from src.chat.message_receive.message import SessionMessage from src.chat.utils.utils import process_llm_response -from src.common.data_models.message_component_data_model import MessageSequence, TextComponent +from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent from src.common.logger import get_logger from src.config.config import global_config from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec @@ -230,12 +230,10 @@ class MaisakaReasoningEngine: planner_prefix = build_planner_user_prefix_from_session_message(message) appended_component = False - if global_config.maisaka.direct_image_input: - source_sequence = getattr(message, "maisaka_original_raw_message", message.raw_message) - else: - source_sequence = message.raw_message - + source_sequence = message.raw_message planner_components = clone_message_sequence(source_sequence).components + if global_config.maisaka.direct_image_input: + await self._hydrate_visual_components(planner_components) if planner_components and isinstance(planner_components[0], TextComponent): planner_components[0].text = planner_prefix + planner_components[0].text else: @@ -256,6 +254,24 @@ class MaisakaReasoningEngine: return message_sequence, legacy_visible_text + async def _hydrate_visual_components(self, planner_components: list[object]) -> None: + """在 Maisaka 真正需要图片或表情时,按需回填二进制数据。""" + load_tasks: list[asyncio.Task[None]] = [] + for component in planner_components: + if isinstance(component, ImageComponent) and not component.binary_data: + load_tasks.append(asyncio.create_task(component.load_image_binary())) + continue + if isinstance(component, EmojiComponent) and not component.binary_data: + load_tasks.append(asyncio.create_task(component.load_emoji_binary())) + + if not load_tasks: + return + + results = await asyncio.gather(*load_tasks, return_exceptions=True) + for result in results: + if isinstance(result, Exception): + logger.warning(f"{self._runtime.log_prefix} 回填图片或表情二进制数据失败,Maisaka 将退化为文本占位: {result}") + def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str: user_info = message.message_info.user_info speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id diff --git a/src/mcp_module/connection.py b/src/mcp_module/connection.py index c598e8bc..10a1ea5e 100644 --- a/src/mcp_module/connection.py +++ b/src/mcp_module/connection.py @@ -14,7 +14,7 @@ import httpx from src.cli.console import console from src.core.tooling import ToolExecutionResult -from .config import MCPClientRuntimeConfig, MCPRootRuntimeConfig, MCPServerRuntimeConfig +from .config import MCPClientRuntimeConfig, MCPServerRuntimeConfig from .hooks import MCPHostCallbacks from .models import ( MCPPromptResult,