feat;replyer采用多模态

This commit is contained in:
SengokuCola
2026-04-03 23:47:19 +08:00
parent 8255b13cea
commit 87bdb1f12a
6 changed files with 73 additions and 13 deletions

View File

@@ -1,7 +1,6 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import random import random
import time import time
@@ -9,6 +8,7 @@ from sqlmodel import select
from src.chat.message_receive.chat_manager import BotChatSession from src.chat.message_receive.chat_manager import BotChatSession
from src.common.database.database import get_db_session from src.common.database.database import get_db_session
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
from src.common.database.database_model import Expression from src.common.database.database_model import Expression
from src.common.data_models.reply_generation_data_models import ( from src.common.data_models.reply_generation_data_models import (
GenerationMetrics, GenerationMetrics,
@@ -19,12 +19,12 @@ from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt from src.common.prompt_i18n import load_prompt
from src.config.config import global_config from src.config.config import global_config
from src.core.types import ActionInfo from src.core.types import ActionInfo
from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart
from src.services.llm_service import LLMServiceClient from src.services.llm_service import LLMServiceClient
from src.chat.message_receive.message import SessionMessage from src.chat.message_receive.message import SessionMessage
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.maisaka.context_messages import AssistantMessage, LLMContextMessage, ReferenceMessage, SessionBackedMessage, ToolResultMessage from src.maisaka.context_messages import AssistantMessage, LLMContextMessage, ReferenceMessage, SessionBackedMessage, ToolResultMessage
from src.maisaka.message_adapter import parse_speaker_content from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
logger = get_logger("replyer") logger = get_logger("replyer")
@@ -159,6 +159,34 @@ class MaisakaReplyGenerator:
"""构建追加在上下文末尾的回复指令。""" """构建追加在上下文末尾的回复指令。"""
return "请基于以上逐条对话消息,自然地继续回复。直接输出你要说的话,不要额外解释。" return "请基于以上逐条对话消息,自然地继续回复。直接输出你要说的话,不要额外解释。"
def _build_multimodal_user_message(
self,
message: SessionBackedMessage,
default_user_name: str,
) -> Optional[Message]:
"""构建保留图片等多模态片段的用户消息。"""
speaker_name, _ = parse_speaker_content(message.processed_plain_text.strip())
visible_speaker = speaker_name or default_user_name
raw_message = clone_message_sequence(message.raw_message)
if not raw_message.components:
raw_message = MessageSequence([TextComponent(f"[{visible_speaker}]")])
elif isinstance(raw_message.components[0], TextComponent):
first_text = raw_message.components[0].text or ""
raw_message.components[0] = TextComponent(f"[{visible_speaker}]{first_text}")
else:
raw_message.components.insert(0, TextComponent(f"[{visible_speaker}]"))
multimodal_message = SessionBackedMessage(
raw_message=raw_message,
visible_text=f"[{visible_speaker}]{message.processed_plain_text}",
timestamp=message.timestamp,
message_id=message.message_id,
original_message=message.original_message,
source_kind=message.source_kind,
)
return multimodal_message.to_llm_message()
def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]: def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
"""将 replyer 上下文拆成多条 LLM 消息。""" """将 replyer 上下文拆成多条 LLM 消息。"""
bot_nickname = global_config.bot.nickname.strip() or "Bot" bot_nickname = global_config.bot.nickname.strip() or "Bot"
@@ -177,6 +205,11 @@ class MaisakaReplyGenerator:
) )
continue continue
multimodal_message = self._build_multimodal_user_message(message, default_user_name)
if multimodal_message is not None:
messages.append(multimodal_message)
continue
for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text): for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text):
content = self._normalize_content(content_body) content = self._normalize_content(content_body)
if not content: if not content:
@@ -227,7 +260,14 @@ class MaisakaReplyGenerator:
preview_lines: List[str] = [] preview_lines: List[str] = []
for message in messages: for message in messages:
role_name = message.role.value.capitalize() role_name = message.role.value.capitalize()
preview_lines.append(f"{role_name}: {message.get_text_content()}") part_previews: List[str] = []
for part in message.parts:
if isinstance(part, TextMessagePart):
part_previews.append(part.text)
continue
if isinstance(part, ImageMessagePart):
part_previews.append(f"[图片:{part.normalized_image_format}]")
preview_lines.append(f"{role_name}: {''.join(part_previews)}")
return "\n\n".join(preview_lines) return "\n\n".join(preview_lines)
def _resolve_session_id(self, stream_id: Optional[str]) -> str: def _resolve_session_id(self, stream_id: Optional[str]) -> str:

View File

@@ -5,8 +5,8 @@ from src.config.config import global_config
def get_maisaka_replyer_class() -> Type[object]: def get_maisaka_replyer_class() -> Type[object]:
"""根据配置返回 Maisaka replyer 类。""" """根据配置返回 Maisaka replyer 类。"""
generator_type = global_config.maisaka.replyer_generator_type generator_type = get_maisaka_replyer_generator_type()
if generator_type == "multi": if generator_type == "multimodal":
from .maisaka_generator_multi import MaisakaReplyGenerator from .maisaka_generator_multi import MaisakaReplyGenerator
return MaisakaReplyGenerator return MaisakaReplyGenerator
@@ -18,4 +18,4 @@ def get_maisaka_replyer_class() -> Type[object]:
def get_maisaka_replyer_generator_type() -> str: def get_maisaka_replyer_generator_type() -> str:
"""返回当前配置的 Maisaka replyer 生成器类型。""" """返回当前配置的 Maisaka replyer 生成器类型。"""
return global_config.maisaka.replyer_generator_type return global_config.chat.replyer_generator_type

View File

@@ -16,8 +16,6 @@ class ExampleConfig(ConfigBase):
\"""This is an example field\""" \"""This is an example field\"""
- 注释前面增加_warp_标记可以实现配置文件中注释在配置项前面单独一行显示 - 注释前面增加_warp_标记可以实现配置文件中注释在配置项前面单独一行显示
""" """
class BotConfig(ConfigBase): class BotConfig(ConfigBase):
"""机器人配置类""" """机器人配置类"""
@@ -283,7 +281,7 @@ class ChatConfig(ConfigBase):
}, },
) )
direct_image_input: bool = Field( multimodal_planner: bool = Field(
default=True, default=True,
json_schema_extra={ json_schema_extra={
"x-widget": "switch", "x-widget": "switch",
@@ -292,14 +290,14 @@ class ChatConfig(ConfigBase):
) )
"""是否直接输入图片""" """是否直接输入图片"""
replyer_generator_type: Literal["legacy", "multi"] = Field( replyer_generator_type: Literal["legacy", "multimodal"] = Field(
default="legacy", default="legacy",
json_schema_extra={ json_schema_extra={
"x-widget": "select", "x-widget": "select",
"x-icon": "git-branch", "x-icon": "git-branch",
}, },
) )
"""Maisaka replyer 生成器类型legacy旧版单 prompt/ multi(多消息版""" """Maisaka replyer 生成器类型legacy旧版单 prompt/ multimodal多模态版适合主循环直接展示图片"""
enable_talk_value_rules: bool = Field( enable_talk_value_rules: bool = Field(
default=True, default=True,

View File

@@ -1,5 +1,6 @@
"""reply 内置工具。""" """reply 内置工具。"""
import traceback
from typing import Optional from typing import Optional
from src.chat.replyer.replyer_manager import replyer_manager from src.chat.replyer.replyer_manager import replyer_manager
@@ -82,6 +83,7 @@ async def handle_tool(
logger.exception( logger.exception(
f"{tool_ctx.runtime.log_prefix} 获取回复生成器时发生异常: 目标消息编号={target_message_id}" f"{tool_ctx.runtime.log_prefix} 获取回复生成器时发生异常: 目标消息编号={target_message_id}"
) )
logger.info(traceback.format_exc())
return tool_ctx.build_failure_result( return tool_ctx.build_failure_result(
invocation.tool_name, invocation.tool_name,
"获取 Maisaka 回复生成器时发生异常。", "获取 Maisaka 回复生成器时发生异常。",

View File

@@ -266,7 +266,7 @@ class MaisakaReasoningEngine:
source_sequence = message.raw_message source_sequence = message.raw_message
planner_components = clone_message_sequence(source_sequence).components planner_components = clone_message_sequence(source_sequence).components
if global_config.chat.direct_image_input: if global_config.chat.multimodal_planner:
await self._hydrate_visual_components(planner_components) await self._hydrate_visual_components(planner_components)
if planner_components and isinstance(planner_components[0], TextComponent): if planner_components and isinstance(planner_components[0], TextComponent):
planner_components[0].text = planner_prefix + planner_components[0].text planner_components[0].text = planner_prefix + planner_components[0].text

View File

@@ -235,8 +235,18 @@ class PluginMessageUtils:
if isinstance(raw_forward_nodes, list): if isinstance(raw_forward_nodes, list):
for node in raw_forward_nodes: for node in raw_forward_nodes:
if not isinstance(node, dict): if not isinstance(node, dict):
logger.info(f"解析转发节点时跳过非字典节点: {node!r}")
continue continue
raw_content = node.get("content", []) raw_content = node.get("content", [])
logger.info(
"开始解析转发节点: "
f"message_id={node.get('message_id')!r} "
f"user_id={node.get('user_id')!r} "
f"user_nickname={node.get('user_nickname')!r} "
f"user_cardname={node.get('user_cardname')!r} "
f"raw_content_type={type(raw_content).__name__} "
f"raw_content={raw_content!r}"
)
node_components: List[StandardMessageComponents] = [] node_components: List[StandardMessageComponents] = []
if isinstance(raw_content, list): if isinstance(raw_content, list):
node_components = [ node_components = [
@@ -244,7 +254,17 @@ class PluginMessageUtils:
for content in raw_content for content in raw_content
if isinstance(content, dict) if isinstance(content, dict)
] ]
logger.info(
"转发节点解析结果: "
f"message_id={node.get('message_id')!r} "
f"component_types={[component.__class__.__name__ for component in node_components]!r} "
f"component_values={[getattr(component, 'text', None) for component in node_components]!r}"
)
if not node_components: if not node_components:
logger.warning(
"转发节点内容为空,使用占位文本回退: "
f"message_id={node.get('message_id')!r} raw_content={raw_content!r}"
)
node_components = [TextComponent(text="[empty forward node]")] node_components = [TextComponent(text="[empty forward node]")]
forward_nodes.append( forward_nodes.append(
ForwardComponent( ForwardComponent(