feat:maisak正确解析图片原始数据,reply信息

This commit is contained in:
SengokuCola
2026-04-03 17:22:53 +08:00
parent b74b60cb1a
commit fc753f7e9f
11 changed files with 237 additions and 61 deletions

View File

@@ -44,6 +44,12 @@ class ImageManager:
logger.info("图片管理器初始化完成") logger.info("图片管理器初始化完成")
def _get_image_record(self, image_hash: str) -> Optional[Images]:
"""根据哈希获取图片记录。"""
with get_db_session() as session:
statement = select(Images).filter_by(image_hash=image_hash, image_type=ImageType.IMAGE).limit(1)
return session.exec(statement).first()
async def get_image_description( async def get_image_description(
self, self,
*, *,
@@ -76,9 +82,8 @@ class ImageManager:
hash_str = hashlib.sha256(image_bytes).hexdigest() hash_str = hashlib.sha256(image_bytes).hexdigest()
try: try:
with get_db_session() as session: if record := self._get_image_record(hash_str):
statement = select(Images).filter_by(image_hash=hash_str, image_type=ImageType.IMAGE).limit(1) if record.vlm_processed and record.description:
if record := session.exec(statement).first():
return record.description return record.description
except Exception as e: except Exception as e:
logger.error(f"查询图片描述时发生错误: {e}") logger.error(f"查询图片描述时发生错误: {e}")
@@ -86,12 +91,17 @@ class ImageManager:
if not image_bytes: if not image_bytes:
logger.warning("图片哈希值未找到,且未提供图片字节数据,返回无描述") logger.warning("图片哈希值未找到,且未提供图片字节数据,返回无描述")
return "" return ""
try:
await self.ensure_image_saved(image_bytes)
except Exception as e:
logger.error(f"保存图片文件时发生错误: {e}")
return ""
if not wait_for_build: if not wait_for_build:
self._schedule_description_build(hash_str, image_bytes) self._schedule_description_build(hash_str, image_bytes)
return "" return ""
logger.info(f"图片描述未找到,哈希值: {hash_str},准备生成新描述") logger.info(f"图片描述未找到,哈希值: {hash_str},准备生成新描述")
try: try:
image = await self.save_image_and_process(image_bytes) image = await self.build_image_description(image_bytes)
return image.description return image.description
except Exception as e: except Exception as e:
logger.error(f"生成图片描述时发生错误: {e}") logger.error(f"生成图片描述时发生错误: {e}")
@@ -120,7 +130,7 @@ class ImageManager:
""" """
try: try:
logger.info(f"图片描述后台构建已开始,哈希值: {image_hash}") logger.info(f"图片描述后台构建已开始,哈希值: {image_hash}")
await self.save_image_and_process(image_bytes) await self.build_image_description(image_bytes)
logger.info(f"图片描述后台构建完成,哈希值: {image_hash}") logger.info(f"图片描述后台构建完成,哈希值: {image_hash}")
except Exception as exc: except Exception as exc:
logger.warning(f"图片描述后台构建失败,哈希值: {image_hash},错误: {exc}") logger.warning(f"图片描述后台构建失败,哈希值: {image_hash},错误: {exc}")
@@ -201,6 +211,7 @@ class ImageManager:
return False return False
record.description = image.description record.description = image.description
record.last_used_time = datetime.now() record.last_used_time = datetime.now()
record.vlm_processed = image.vlm_processed
session.add(record) session.add(record)
logger.info(f"成功更新图片描述: {image.file_hash},新描述: {image.description}") logger.info(f"成功更新图片描述: {image.file_hash},新描述: {image.description}")
except Exception as e: except Exception as e:
@@ -239,22 +250,13 @@ class ImageManager:
return False return False
return True return True
async def save_image_and_process(self, image_bytes: bytes) -> MaiImage: async def ensure_image_saved(self, image_bytes: bytes) -> MaiImage:
""" """先保存图片记录,确保后续可以按哈希回填图片内容。"""
保存图片并生成描述
Args:
image_bytes (bytes): 图片的字节数据
Returns:
return (MaiImage): 包含图片信息的 MaiImage 对象
Raises:
Exception: 如果在保存或处理过程中发生错误
"""
hash_str = hashlib.sha256(image_bytes).hexdigest() hash_str = hashlib.sha256(image_bytes).hexdigest()
try: try:
with get_db_session() as session: with get_db_session() as session:
statement = select(Images).filter_by(image_hash=hash_str).limit(1) statement = select(Images).filter_by(image_hash=hash_str, image_type=ImageType.IMAGE).limit(1)
if record := session.exec(statement).first(): if record := session.exec(statement).first():
logger.info(f"图片已存在于数据库中,哈希值: {hash_str}") logger.info(f"图片已存在于数据库中,哈希值: {hash_str}")
record.last_used_time = datetime.now() record.last_used_time = datetime.now()
@@ -270,18 +272,38 @@ class ImageManager:
tmp_file_path = IMAGE_DIR / f"{hash_str}.tmp" tmp_file_path = IMAGE_DIR / f"{hash_str}.tmp"
with tmp_file_path.open("wb") as f: with tmp_file_path.open("wb") as f:
f.write(image_bytes) f.write(image_bytes)
mai_image = MaiImage(full_path=(IMAGE_DIR / f"{hash_str}.tmp"), image_bytes=image_bytes) mai_image = MaiImage(full_path=tmp_file_path, image_bytes=image_bytes)
await mai_image.calculate_hash_format() await mai_image.calculate_hash_format()
if not self.register_image_to_db(mai_image):
raise RuntimeError(f"保存图片记录到数据库失败: {hash_str}")
return mai_image
async def build_image_description(self, image_bytes: bytes) -> MaiImage:
"""在图片已保存的前提下生成或补齐图片描述。"""
mai_image = await self.ensure_image_saved(image_bytes)
if mai_image.vlm_processed and mai_image.description:
return mai_image
desc = await self._generate_image_description(image_bytes, mai_image.image_format) desc = await self._generate_image_description(image_bytes, mai_image.image_format)
mai_image.description = desc mai_image.description = desc
mai_image.vlm_processed = True mai_image.vlm_processed = True
try: if not self.update_image_description(mai_image):
self.register_image_to_db(mai_image) raise RuntimeError(f"更新图片描述失败: {mai_image.file_hash}")
except Exception as e:
logger.error(f"保存新图片记录到数据库时发生错误: {e}")
raise e
return mai_image return mai_image
async def save_image_and_process(self, image_bytes: bytes) -> MaiImage:
"""
保存图片并生成描述
Args:
image_bytes (bytes): 图片的字节数据
Returns:
return (MaiImage): 包含图片信息的 MaiImage 对象
Raises:
Exception: 如果在保存或处理过程中发生错误
"""
return await self.build_image_description(image_bytes)
def cleanup_invalid_descriptions_in_db(self): def cleanup_invalid_descriptions_in_db(self):
""" """
清理数据库中无效的图片记录 清理数据库中无效的图片记录

View File

@@ -1,7 +1,6 @@
"""聊天消息入口与主链路调度。""" """聊天消息入口与主链路调度。"""
from contextlib import suppress from contextlib import suppress
from copy import deepcopy
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import os import os
@@ -550,9 +549,9 @@ class ChatBot:
# if await self.handle_notice_message(message): # if await self.handle_notice_message(message):
# pass # pass
# 处理消息内容,识别表情包等二进制数据并转化为文本描述 # 处理消息内容,识别表情包等二进制数据并转化为文本描述
if global_config.maisaka.direct_image_input: # 如果 Maisaka 需要直接消费图片,会在后续构建 prompt 时按需回填图片二进制数据,
message.maisaka_original_raw_message = deepcopy(message.raw_message) # type: ignore[attr-defined] # 这里不再复制整条原始消息。
# 入站主链优先保证消息尽快入队,避免图片、表情包、语音分析阻塞适配器超时。 # 入站主链优先保证消息尽快入队,避免图片、表情包、语音分析阻塞适配器超时。
await message.process( await message.process(
enable_heavy_media_analysis=False, enable_heavy_media_analysis=False,

View File

@@ -1,11 +1,10 @@
import asyncio
from asyncio import Task from asyncio import Task
from typing import Dict, List, Sequence, Tuple from typing import Dict, List, Sequence, Tuple
from rich.traceback import install from rich.traceback import install
from sqlmodel import select from sqlmodel import select
import asyncio
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.database.database import get_db_session from src.common.database.database import get_db_session
from src.common.database.database_model import Messages from src.common.database.database_model import Messages
@@ -36,6 +35,102 @@ class MsgIDMapping:
class SessionMessage(MaiMessage): class SessionMessage(MaiMessage):
#便于调试的打印函数
def __str__(self) -> str:
"""返回适合日志输出的消息摘要。"""
return self.to_debug_string()
def __repr__(self) -> str:
"""返回适合调试场景的消息摘要。"""
return self.to_debug_string()
def to_debug_string(self) -> str:
"""构建包含引用信息的调试字符串。
Returns:
str: 适合记录日志的消息摘要。
"""
user_info = self.message_info.user_info
group_info = self.message_info.group_info
chat_type = "group" if group_info else "private"
group_id = group_info.group_id if group_info else None
group_name = group_info.group_name if group_info else None
component_summaries = [self._summarize_component(component) for component in self.raw_message.components]
raw_components = ", ".join(component_summaries) if component_summaries else "empty"
return (
"SessionMessage("
f"message_id={self.message_id!r}, "
f"platform={self.platform!r}, "
f"chat_type={chat_type!r}, "
f"group_id={group_id!r}, "
f"group_name={group_name!r}, "
f"user_id={user_info.user_id!r}, "
f"user_nickname={user_info.user_nickname!r}, "
f"user_cardname={user_info.user_cardname!r}, "
f"reply_to={self.reply_to!r}, "
f"processed_plain_text={self._truncate_text(self.processed_plain_text)}, "
f"raw_components=[{raw_components}]"
")"
)
@staticmethod
def _truncate_text(text: str | None, max_length: int = 120) -> str:
"""截断较长文本,避免日志过长。
Args:
text: 原始文本。
max_length: 最大保留长度。
Returns:
str: 截断后的文本表示。
"""
if text is None:
return "None"
normalized_text = text.replace("\r", "\\r").replace("\n", "\\n")
if len(normalized_text) <= max_length:
return repr(normalized_text)
return repr(f"{normalized_text[:max_length]}...")
def _summarize_component(self, component: StandardMessageComponents) -> str:
"""生成单个消息组件的调试摘要。
Args:
component: 消息组件对象。
Returns:
str: 组件摘要文本。
"""
if isinstance(component, TextComponent):
return f"Text(text={self._truncate_text(component.text, 80)})"
if isinstance(component, ImageComponent):
return f"Image(content={self._truncate_text(component.content or None, 60)})"
if isinstance(component, EmojiComponent):
return f"Emoji(content={self._truncate_text(component.content or None, 60)})"
if isinstance(component, AtComponent):
target_name = component.target_user_cardname or component.target_user_nickname or component.target_user_id
return f"At(target={target_name!r})"
if isinstance(component, VoiceComponent):
return f"Voice(content={self._truncate_text(component.content or None, 60)})"
if isinstance(component, ReplyComponent):
sender_name = (
component.target_message_sender_cardname
or component.target_message_sender_nickname
or component.target_message_sender_id
)
return (
"Reply("
f"target_message_id={component.target_message_id!r}, "
f"target_sender={sender_name!r}, "
f"target_content={self._truncate_text(component.target_message_content, 80)}"
")"
)
if isinstance(component, ForwardNodeComponent):
return f"ForwardNode(count={len(component.forward_components)})"
return f"{component.__class__.__name__}"
#便于调试的打印函数end
async def process( async def process(
self, self,
*, *,

View File

@@ -35,7 +35,7 @@ from src.services import llm_service as llm_api
from src.chat.logger.plan_reply_logger import PlanReplyLogger from src.chat.logger.plan_reply_logger import PlanReplyLogger
from src.memory_system.memory_retrieval import init_memory_retrieval_sys, build_memory_retrieval_prompt from src.memory_system.memory_retrieval import init_memory_retrieval_sys, build_memory_retrieval_prompt
from src.learners.jargon_explainer_old import explain_jargon_in_context, retrieve_concepts_with_jargon from src.learners.jargon_explainer_old import explain_jargon_in_context
from src.chat.utils.common_utils import TempMethodsExpression from src.chat.utils.common_utils import TempMethodsExpression
init_memory_retrieval_sys() init_memory_retrieval_sys()

View File

@@ -15,7 +15,6 @@ from src.common.database.database import get_db_session
from src.common.database.database_model import Messages, ModelUsage, OnlineTime, ToolRecord from src.common.database.database_model import Messages, ModelUsage, OnlineTime, ToolRecord
from src.manager.async_task_manager import AsyncTask from src.manager.async_task_manager import AsyncTask
from src.manager.local_store_manager import local_storage from src.manager.local_store_manager import local_storage
from src.config.config import global_config
logger = get_logger("maibot_statistic") logger = get_logger("maibot_statistic")

View File

@@ -6,7 +6,6 @@ from base64 import b64decode
from datetime import datetime from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional from typing import TYPE_CHECKING, Any, Dict, List, Optional
from src.chat.message_receive.message import SessionMessage
from src.chat.utils.utils import process_llm_response from src.chat.utils.utils import process_llm_response
from src.common.data_models.message_component_data_model import EmojiComponent, MessageSequence, TextComponent from src.common.data_models.message_component_data_model import EmojiComponent, MessageSequence, TextComponent
from src.config.config import global_config from src.config.config import global_config

View File

@@ -15,7 +15,6 @@ from rich.panel import Panel
from src.cli.console import console from src.cli.console import console
from src.common.data_models.llm_service_data_models import LLMGenerationOptions from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt from src.common.prompt_i18n import load_prompt
from src.common.utils.utils_session import SessionUtils from src.common.utils.utils_session import SessionUtils
@@ -38,9 +37,7 @@ from src.plugin_runtime.host.hook_spec_registry import HookSpec, HookSpecRegistr
from src.services.llm_service import LLMServiceClient from src.services.llm_service import LLMServiceClient
from .builtin_tool import get_builtin_tools from .builtin_tool import get_builtin_tools
from .context_messages import AssistantMessage, LLMContextMessage, SessionBackedMessage, ToolResultMessage from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
from .message_adapter import format_speaker_content
from .planner_message_utils import build_session_backed_text_message
from .prompt_cli_renderer import PromptCLIVisualizer from .prompt_cli_renderer import PromptCLIVisualizer
@@ -324,7 +321,7 @@ class MaisakaChatLoopService:
if not prompt_lines: if not prompt_lines:
return "" return ""
return f"在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n" return "在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n"
@staticmethod @staticmethod
def _get_chat_prompt_for_chat(chat_id: str, is_group_chat: Optional[bool]) -> str: def _get_chat_prompt_for_chat(chat_id: str, is_group_chat: Optional[bool]) -> str:

View File

@@ -11,7 +11,13 @@ import base64
from PIL import Image as PILImage from PIL import Image as PILImage
from src.chat.message_receive.message import SessionMessage from src.chat.message_receive.message import SessionMessage
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent from src.common.data_models.message_component_data_model import (
EmojiComponent,
ImageComponent,
MessageSequence,
ReplyComponent,
TextComponent,
)
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.llm_models.payload_content.tool_option import ToolCall from src.llm_models.payload_content.tool_option import ToolCall
@@ -27,11 +33,42 @@ def _guess_image_format(image_bytes: bytes) -> Optional[str]:
return None return None
def _build_binary_component_type_text(component: EmojiComponent | ImageComponent) -> str: def _append_emoji_component(builder: MessageBuilder, component: EmojiComponent) -> bool:
"""为图片类消息组件构造显式的消息类型标记""" """将表情组件追加到 LLM 消息构建器"""
if isinstance(component, EmojiComponent): image_format = _guess_image_format(component.binary_data)
return "[消息类型]表情包" if image_format and component.binary_data:
return "[消息类型]图片" builder.add_text_content("[消息类型]表情包")
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
return True
if component.content:
builder.add_text_content(component.content)
return True
return False
def _append_image_component(builder: MessageBuilder, component: ImageComponent) -> bool:
"""将图片组件追加到 LLM 消息构建器。"""
image_format = _guess_image_format(component.binary_data)
if image_format and component.binary_data:
builder.add_text_content("[消息类型]图片")
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
return True
if component.content:
builder.add_text_content(component.content)
return True
return False
def _append_reply_component(builder: MessageBuilder, component: ReplyComponent) -> bool:
"""将回复组件追加到 LLM 消息构建器。"""
target_message_id = component.target_message_id.strip()
if not target_message_id:
return False
builder.add_text_content(f"[引用回复]({target_message_id})")
return True
def _build_message_from_sequence( def _build_message_from_sequence(
@@ -57,17 +94,17 @@ def _build_message_from_sequence(
has_content = True has_content = True
continue continue
if isinstance(component, (EmojiComponent, ImageComponent)): if isinstance(component, EmojiComponent):
image_format = _guess_image_format(component.binary_data) has_content = _append_emoji_component(builder, component) or has_content
if image_format and component.binary_data: continue
builder.add_text_content(_build_binary_component_type_text(component))
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
has_content = True
continue
if component.content: if isinstance(component, ImageComponent):
builder.add_text_content(component.content) has_content = _append_image_component(builder, component) or has_content
has_content = True continue
if isinstance(component, ReplyComponent):
has_content = _append_reply_component(builder, component) or has_content
continue
if not has_content and fallback_text: if not has_content and fallback_text:
builder.add_text_content(fallback_text) builder.add_text_content(fallback_text)

View File

@@ -5,7 +5,13 @@ from datetime import datetime
from typing import Optional from typing import Optional
import re import re
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent from src.common.data_models.message_component_data_model import (
EmojiComponent,
ImageComponent,
MessageSequence,
ReplyComponent,
TextComponent,
)
SPEAKER_PREFIX_PATTERN = re.compile( SPEAKER_PREFIX_PATTERN = re.compile(
r"^(?:(?P<timestamp>\d{2}:\d{2}:\d{2}))?(?:\[msg_id:(?P<message_id>[^\]]+)\])?\[(?P<speaker>[^\]]+)\](?P<content>.*)$", r"^(?:(?P<timestamp>\d{2}:\d{2}:\d{2}))?(?:\[msg_id:(?P<message_id>[^\]]+)\])?\[(?P<speaker>[^\]]+)\](?P<content>.*)$",
@@ -65,5 +71,11 @@ def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str:
if isinstance(component, ImageComponent): if isinstance(component, ImageComponent):
parts.append("[图片]") parts.append("[图片]")
continue
if isinstance(component, ReplyComponent):
target_message_id = component.target_message_id.strip()
if target_message_id:
parts.append(f"[引用回复]({target_message_id})")
return "".join(parts) return "".join(parts)

View File

@@ -12,7 +12,7 @@ import traceback
from src.chat.heart_flow.heartFC_utils import CycleDetail from src.chat.heart_flow.heartFC_utils import CycleDetail
from src.chat.message_receive.message import SessionMessage from src.chat.message_receive.message import SessionMessage
from src.chat.utils.utils import process_llm_response from src.chat.utils.utils import process_llm_response
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
from src.common.logger import get_logger from src.common.logger import get_logger
from src.config.config import global_config from src.config.config import global_config
from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec
@@ -230,12 +230,10 @@ class MaisakaReasoningEngine:
planner_prefix = build_planner_user_prefix_from_session_message(message) planner_prefix = build_planner_user_prefix_from_session_message(message)
appended_component = False appended_component = False
if global_config.maisaka.direct_image_input: source_sequence = message.raw_message
source_sequence = getattr(message, "maisaka_original_raw_message", message.raw_message)
else:
source_sequence = message.raw_message
planner_components = clone_message_sequence(source_sequence).components planner_components = clone_message_sequence(source_sequence).components
if global_config.maisaka.direct_image_input:
await self._hydrate_visual_components(planner_components)
if planner_components and isinstance(planner_components[0], TextComponent): if planner_components and isinstance(planner_components[0], TextComponent):
planner_components[0].text = planner_prefix + planner_components[0].text planner_components[0].text = planner_prefix + planner_components[0].text
else: else:
@@ -256,6 +254,24 @@ class MaisakaReasoningEngine:
return message_sequence, legacy_visible_text return message_sequence, legacy_visible_text
async def _hydrate_visual_components(self, planner_components: list[object]) -> None:
"""在 Maisaka 真正需要图片或表情时,按需回填二进制数据。"""
load_tasks: list[asyncio.Task[None]] = []
for component in planner_components:
if isinstance(component, ImageComponent) and not component.binary_data:
load_tasks.append(asyncio.create_task(component.load_image_binary()))
continue
if isinstance(component, EmojiComponent) and not component.binary_data:
load_tasks.append(asyncio.create_task(component.load_emoji_binary()))
if not load_tasks:
return
results = await asyncio.gather(*load_tasks, return_exceptions=True)
for result in results:
if isinstance(result, Exception):
logger.warning(f"{self._runtime.log_prefix} 回填图片或表情二进制数据失败Maisaka 将退化为文本占位: {result}")
def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str: def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str:
user_info = message.message_info.user_info user_info = message.message_info.user_info
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id

View File

@@ -14,7 +14,7 @@ import httpx
from src.cli.console import console from src.cli.console import console
from src.core.tooling import ToolExecutionResult from src.core.tooling import ToolExecutionResult
from .config import MCPClientRuntimeConfig, MCPRootRuntimeConfig, MCPServerRuntimeConfig from .config import MCPClientRuntimeConfig, MCPServerRuntimeConfig
from .hooks import MCPHostCallbacks from .hooks import MCPHostCallbacks
from .models import ( from .models import (
MCPPromptResult, MCPPromptResult,