feat:maisak正确解析图片原始数据,reply信息
This commit is contained in:
@@ -44,6 +44,12 @@ class ImageManager:
|
||||
|
||||
logger.info("图片管理器初始化完成")
|
||||
|
||||
def _get_image_record(self, image_hash: str) -> Optional[Images]:
|
||||
"""根据哈希获取图片记录。"""
|
||||
with get_db_session() as session:
|
||||
statement = select(Images).filter_by(image_hash=image_hash, image_type=ImageType.IMAGE).limit(1)
|
||||
return session.exec(statement).first()
|
||||
|
||||
async def get_image_description(
|
||||
self,
|
||||
*,
|
||||
@@ -76,9 +82,8 @@ class ImageManager:
|
||||
hash_str = hashlib.sha256(image_bytes).hexdigest()
|
||||
|
||||
try:
|
||||
with get_db_session() as session:
|
||||
statement = select(Images).filter_by(image_hash=hash_str, image_type=ImageType.IMAGE).limit(1)
|
||||
if record := session.exec(statement).first():
|
||||
if record := self._get_image_record(hash_str):
|
||||
if record.vlm_processed and record.description:
|
||||
return record.description
|
||||
except Exception as e:
|
||||
logger.error(f"查询图片描述时发生错误: {e}")
|
||||
@@ -86,12 +91,17 @@ class ImageManager:
|
||||
if not image_bytes:
|
||||
logger.warning("图片哈希值未找到,且未提供图片字节数据,返回无描述")
|
||||
return ""
|
||||
try:
|
||||
await self.ensure_image_saved(image_bytes)
|
||||
except Exception as e:
|
||||
logger.error(f"保存图片文件时发生错误: {e}")
|
||||
return ""
|
||||
if not wait_for_build:
|
||||
self._schedule_description_build(hash_str, image_bytes)
|
||||
return ""
|
||||
logger.info(f"图片描述未找到,哈希值: {hash_str},准备生成新描述")
|
||||
try:
|
||||
image = await self.save_image_and_process(image_bytes)
|
||||
image = await self.build_image_description(image_bytes)
|
||||
return image.description
|
||||
except Exception as e:
|
||||
logger.error(f"生成图片描述时发生错误: {e}")
|
||||
@@ -120,7 +130,7 @@ class ImageManager:
|
||||
"""
|
||||
try:
|
||||
logger.info(f"图片描述后台构建已开始,哈希值: {image_hash}")
|
||||
await self.save_image_and_process(image_bytes)
|
||||
await self.build_image_description(image_bytes)
|
||||
logger.info(f"图片描述后台构建完成,哈希值: {image_hash}")
|
||||
except Exception as exc:
|
||||
logger.warning(f"图片描述后台构建失败,哈希值: {image_hash},错误: {exc}")
|
||||
@@ -201,6 +211,7 @@ class ImageManager:
|
||||
return False
|
||||
record.description = image.description
|
||||
record.last_used_time = datetime.now()
|
||||
record.vlm_processed = image.vlm_processed
|
||||
session.add(record)
|
||||
logger.info(f"成功更新图片描述: {image.file_hash},新描述: {image.description}")
|
||||
except Exception as e:
|
||||
@@ -239,22 +250,13 @@ class ImageManager:
|
||||
return False
|
||||
return True
|
||||
|
||||
async def save_image_and_process(self, image_bytes: bytes) -> MaiImage:
|
||||
"""
|
||||
保存图片并生成描述
|
||||
|
||||
Args:
|
||||
image_bytes (bytes): 图片的字节数据
|
||||
Returns:
|
||||
return (MaiImage): 包含图片信息的 MaiImage 对象
|
||||
Raises:
|
||||
Exception: 如果在保存或处理过程中发生错误
|
||||
"""
|
||||
async def ensure_image_saved(self, image_bytes: bytes) -> MaiImage:
|
||||
"""先保存图片记录,确保后续可以按哈希回填图片内容。"""
|
||||
hash_str = hashlib.sha256(image_bytes).hexdigest()
|
||||
|
||||
try:
|
||||
with get_db_session() as session:
|
||||
statement = select(Images).filter_by(image_hash=hash_str).limit(1)
|
||||
statement = select(Images).filter_by(image_hash=hash_str, image_type=ImageType.IMAGE).limit(1)
|
||||
if record := session.exec(statement).first():
|
||||
logger.info(f"图片已存在于数据库中,哈希值: {hash_str}")
|
||||
record.last_used_time = datetime.now()
|
||||
@@ -270,18 +272,38 @@ class ImageManager:
|
||||
tmp_file_path = IMAGE_DIR / f"{hash_str}.tmp"
|
||||
with tmp_file_path.open("wb") as f:
|
||||
f.write(image_bytes)
|
||||
mai_image = MaiImage(full_path=(IMAGE_DIR / f"{hash_str}.tmp"), image_bytes=image_bytes)
|
||||
mai_image = MaiImage(full_path=tmp_file_path, image_bytes=image_bytes)
|
||||
await mai_image.calculate_hash_format()
|
||||
if not self.register_image_to_db(mai_image):
|
||||
raise RuntimeError(f"保存图片记录到数据库失败: {hash_str}")
|
||||
return mai_image
|
||||
|
||||
async def build_image_description(self, image_bytes: bytes) -> MaiImage:
|
||||
"""在图片已保存的前提下生成或补齐图片描述。"""
|
||||
mai_image = await self.ensure_image_saved(image_bytes)
|
||||
if mai_image.vlm_processed and mai_image.description:
|
||||
return mai_image
|
||||
|
||||
desc = await self._generate_image_description(image_bytes, mai_image.image_format)
|
||||
mai_image.description = desc
|
||||
mai_image.vlm_processed = True
|
||||
try:
|
||||
self.register_image_to_db(mai_image)
|
||||
except Exception as e:
|
||||
logger.error(f"保存新图片记录到数据库时发生错误: {e}")
|
||||
raise e
|
||||
if not self.update_image_description(mai_image):
|
||||
raise RuntimeError(f"更新图片描述失败: {mai_image.file_hash}")
|
||||
return mai_image
|
||||
|
||||
async def save_image_and_process(self, image_bytes: bytes) -> MaiImage:
|
||||
"""
|
||||
保存图片并生成描述
|
||||
|
||||
Args:
|
||||
image_bytes (bytes): 图片的字节数据
|
||||
Returns:
|
||||
return (MaiImage): 包含图片信息的 MaiImage 对象
|
||||
Raises:
|
||||
Exception: 如果在保存或处理过程中发生错误
|
||||
"""
|
||||
return await self.build_image_description(image_bytes)
|
||||
|
||||
def cleanup_invalid_descriptions_in_db(self):
|
||||
"""
|
||||
清理数据库中无效的图片记录
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"""聊天消息入口与主链路调度。"""
|
||||
|
||||
from contextlib import suppress
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import os
|
||||
@@ -550,9 +549,9 @@ class ChatBot:
|
||||
# if await self.handle_notice_message(message):
|
||||
# pass
|
||||
|
||||
# 处理消息内容,识别表情包等二进制数据并转化为文本描述
|
||||
if global_config.maisaka.direct_image_input:
|
||||
message.maisaka_original_raw_message = deepcopy(message.raw_message) # type: ignore[attr-defined]
|
||||
# 处理消息内容,识别表情包等二进制数据并转化为文本描述。
|
||||
# 如果 Maisaka 需要直接消费图片,会在后续构建 prompt 时按需回填图片二进制数据,
|
||||
# 这里不再复制整条原始消息。
|
||||
# 入站主链优先保证消息尽快入队,避免图片、表情包、语音分析阻塞适配器超时。
|
||||
await message.process(
|
||||
enable_heavy_media_analysis=False,
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import asyncio
|
||||
from asyncio import Task
|
||||
from typing import Dict, List, Sequence, Tuple
|
||||
|
||||
from rich.traceback import install
|
||||
from sqlmodel import select
|
||||
|
||||
import asyncio
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Messages
|
||||
@@ -36,6 +35,102 @@ class MsgIDMapping:
|
||||
|
||||
|
||||
class SessionMessage(MaiMessage):
|
||||
|
||||
#便于调试的打印函数
|
||||
def __str__(self) -> str:
|
||||
"""返回适合日志输出的消息摘要。"""
|
||||
return self.to_debug_string()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""返回适合调试场景的消息摘要。"""
|
||||
return self.to_debug_string()
|
||||
|
||||
def to_debug_string(self) -> str:
|
||||
"""构建包含引用信息的调试字符串。
|
||||
|
||||
Returns:
|
||||
str: 适合记录日志的消息摘要。
|
||||
"""
|
||||
user_info = self.message_info.user_info
|
||||
group_info = self.message_info.group_info
|
||||
chat_type = "group" if group_info else "private"
|
||||
group_id = group_info.group_id if group_info else None
|
||||
group_name = group_info.group_name if group_info else None
|
||||
component_summaries = [self._summarize_component(component) for component in self.raw_message.components]
|
||||
raw_components = ", ".join(component_summaries) if component_summaries else "empty"
|
||||
|
||||
return (
|
||||
"SessionMessage("
|
||||
f"message_id={self.message_id!r}, "
|
||||
f"platform={self.platform!r}, "
|
||||
f"chat_type={chat_type!r}, "
|
||||
f"group_id={group_id!r}, "
|
||||
f"group_name={group_name!r}, "
|
||||
f"user_id={user_info.user_id!r}, "
|
||||
f"user_nickname={user_info.user_nickname!r}, "
|
||||
f"user_cardname={user_info.user_cardname!r}, "
|
||||
f"reply_to={self.reply_to!r}, "
|
||||
f"processed_plain_text={self._truncate_text(self.processed_plain_text)}, "
|
||||
f"raw_components=[{raw_components}]"
|
||||
")"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _truncate_text(text: str | None, max_length: int = 120) -> str:
|
||||
"""截断较长文本,避免日志过长。
|
||||
|
||||
Args:
|
||||
text: 原始文本。
|
||||
max_length: 最大保留长度。
|
||||
|
||||
Returns:
|
||||
str: 截断后的文本表示。
|
||||
"""
|
||||
if text is None:
|
||||
return "None"
|
||||
normalized_text = text.replace("\r", "\\r").replace("\n", "\\n")
|
||||
if len(normalized_text) <= max_length:
|
||||
return repr(normalized_text)
|
||||
return repr(f"{normalized_text[:max_length]}...")
|
||||
|
||||
def _summarize_component(self, component: StandardMessageComponents) -> str:
|
||||
"""生成单个消息组件的调试摘要。
|
||||
|
||||
Args:
|
||||
component: 消息组件对象。
|
||||
|
||||
Returns:
|
||||
str: 组件摘要文本。
|
||||
"""
|
||||
if isinstance(component, TextComponent):
|
||||
return f"Text(text={self._truncate_text(component.text, 80)})"
|
||||
if isinstance(component, ImageComponent):
|
||||
return f"Image(content={self._truncate_text(component.content or None, 60)})"
|
||||
if isinstance(component, EmojiComponent):
|
||||
return f"Emoji(content={self._truncate_text(component.content or None, 60)})"
|
||||
if isinstance(component, AtComponent):
|
||||
target_name = component.target_user_cardname or component.target_user_nickname or component.target_user_id
|
||||
return f"At(target={target_name!r})"
|
||||
if isinstance(component, VoiceComponent):
|
||||
return f"Voice(content={self._truncate_text(component.content or None, 60)})"
|
||||
if isinstance(component, ReplyComponent):
|
||||
sender_name = (
|
||||
component.target_message_sender_cardname
|
||||
or component.target_message_sender_nickname
|
||||
or component.target_message_sender_id
|
||||
)
|
||||
return (
|
||||
"Reply("
|
||||
f"target_message_id={component.target_message_id!r}, "
|
||||
f"target_sender={sender_name!r}, "
|
||||
f"target_content={self._truncate_text(component.target_message_content, 80)}"
|
||||
")"
|
||||
)
|
||||
if isinstance(component, ForwardNodeComponent):
|
||||
return f"ForwardNode(count={len(component.forward_components)})"
|
||||
return f"{component.__class__.__name__}"
|
||||
#便于调试的打印函数end
|
||||
|
||||
async def process(
|
||||
self,
|
||||
*,
|
||||
|
||||
@@ -35,7 +35,7 @@ from src.services import llm_service as llm_api
|
||||
|
||||
from src.chat.logger.plan_reply_logger import PlanReplyLogger
|
||||
from src.memory_system.memory_retrieval import init_memory_retrieval_sys, build_memory_retrieval_prompt
|
||||
from src.learners.jargon_explainer_old import explain_jargon_in_context, retrieve_concepts_with_jargon
|
||||
from src.learners.jargon_explainer_old import explain_jargon_in_context
|
||||
from src.chat.utils.common_utils import TempMethodsExpression
|
||||
|
||||
init_memory_retrieval_sys()
|
||||
|
||||
@@ -15,7 +15,6 @@ from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Messages, ModelUsage, OnlineTime, ToolRecord
|
||||
from src.manager.async_task_manager import AsyncTask
|
||||
from src.manager.local_store_manager import local_storage
|
||||
from src.config.config import global_config
|
||||
|
||||
logger = get_logger("maibot_statistic")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user