feat;replyer采用多模态

This commit is contained in:
SengokuCola
2026-04-03 23:47:19 +08:00
parent 8255b13cea
commit 87bdb1f12a
6 changed files with 73 additions and 13 deletions

View File

@@ -1,7 +1,6 @@
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import random
import time
@@ -9,6 +8,7 @@ from sqlmodel import select
from src.chat.message_receive.chat_manager import BotChatSession
from src.common.database.database import get_db_session
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
from src.common.database.database_model import Expression
from src.common.data_models.reply_generation_data_models import (
GenerationMetrics,
@@ -19,12 +19,12 @@ from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt
from src.config.config import global_config
from src.core.types import ActionInfo
from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart
from src.services.llm_service import LLMServiceClient
from src.chat.message_receive.message import SessionMessage
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.maisaka.context_messages import AssistantMessage, LLMContextMessage, ReferenceMessage, SessionBackedMessage, ToolResultMessage
from src.maisaka.message_adapter import parse_speaker_content
from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
logger = get_logger("replyer")
@@ -159,6 +159,34 @@ class MaisakaReplyGenerator:
"""构建追加在上下文末尾的回复指令。"""
return "请基于以上逐条对话消息,自然地继续回复。直接输出你要说的话,不要额外解释。"
def _build_multimodal_user_message(
self,
message: SessionBackedMessage,
default_user_name: str,
) -> Optional[Message]:
"""构建保留图片等多模态片段的用户消息。"""
speaker_name, _ = parse_speaker_content(message.processed_plain_text.strip())
visible_speaker = speaker_name or default_user_name
raw_message = clone_message_sequence(message.raw_message)
if not raw_message.components:
raw_message = MessageSequence([TextComponent(f"[{visible_speaker}]")])
elif isinstance(raw_message.components[0], TextComponent):
first_text = raw_message.components[0].text or ""
raw_message.components[0] = TextComponent(f"[{visible_speaker}]{first_text}")
else:
raw_message.components.insert(0, TextComponent(f"[{visible_speaker}]"))
multimodal_message = SessionBackedMessage(
raw_message=raw_message,
visible_text=f"[{visible_speaker}]{message.processed_plain_text}",
timestamp=message.timestamp,
message_id=message.message_id,
original_message=message.original_message,
source_kind=message.source_kind,
)
return multimodal_message.to_llm_message()
def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
"""将 replyer 上下文拆成多条 LLM 消息。"""
bot_nickname = global_config.bot.nickname.strip() or "Bot"
@@ -177,6 +205,11 @@ class MaisakaReplyGenerator:
)
continue
multimodal_message = self._build_multimodal_user_message(message, default_user_name)
if multimodal_message is not None:
messages.append(multimodal_message)
continue
for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text):
content = self._normalize_content(content_body)
if not content:
@@ -227,7 +260,14 @@ class MaisakaReplyGenerator:
preview_lines: List[str] = []
for message in messages:
role_name = message.role.value.capitalize()
preview_lines.append(f"{role_name}: {message.get_text_content()}")
part_previews: List[str] = []
for part in message.parts:
if isinstance(part, TextMessagePart):
part_previews.append(part.text)
continue
if isinstance(part, ImageMessagePart):
part_previews.append(f"[图片:{part.normalized_image_format}]")
preview_lines.append(f"{role_name}: {''.join(part_previews)}")
return "\n\n".join(preview_lines)
def _resolve_session_id(self, stream_id: Optional[str]) -> str:

View File

@@ -5,8 +5,8 @@ from src.config.config import global_config
def get_maisaka_replyer_class() -> Type[object]:
"""根据配置返回 Maisaka replyer 类。"""
generator_type = global_config.maisaka.replyer_generator_type
if generator_type == "multi":
generator_type = get_maisaka_replyer_generator_type()
if generator_type == "multimodal":
from .maisaka_generator_multi import MaisakaReplyGenerator
return MaisakaReplyGenerator
@@ -18,4 +18,4 @@ def get_maisaka_replyer_class() -> Type[object]:
def get_maisaka_replyer_generator_type() -> str:
"""返回当前配置的 Maisaka replyer 生成器类型。"""
return global_config.maisaka.replyer_generator_type
return global_config.chat.replyer_generator_type

View File

@@ -16,8 +16,6 @@ class ExampleConfig(ConfigBase):
\"""This is an example field\"""
- 注释前面增加_warp_标记可以实现配置文件中注释在配置项前面单独一行显示
"""
class BotConfig(ConfigBase):
"""机器人配置类"""
@@ -283,7 +281,7 @@ class ChatConfig(ConfigBase):
},
)
direct_image_input: bool = Field(
multimodal_planner: bool = Field(
default=True,
json_schema_extra={
"x-widget": "switch",
@@ -292,14 +290,14 @@ class ChatConfig(ConfigBase):
)
"""是否直接输入图片"""
replyer_generator_type: Literal["legacy", "multi"] = Field(
replyer_generator_type: Literal["legacy", "multimodal"] = Field(
default="legacy",
json_schema_extra={
"x-widget": "select",
"x-icon": "git-branch",
},
)
"""Maisaka replyer 生成器类型legacy旧版单 prompt/ multi(多消息版"""
"""Maisaka replyer 生成器类型legacy旧版单 prompt/ multimodal多模态版适合主循环直接展示图片"""
enable_talk_value_rules: bool = Field(
default=True,

View File

@@ -1,5 +1,6 @@
"""reply 内置工具。"""
import traceback
from typing import Optional
from src.chat.replyer.replyer_manager import replyer_manager
@@ -82,6 +83,7 @@ async def handle_tool(
logger.exception(
f"{tool_ctx.runtime.log_prefix} 获取回复生成器时发生异常: 目标消息编号={target_message_id}"
)
logger.info(traceback.format_exc())
return tool_ctx.build_failure_result(
invocation.tool_name,
"获取 Maisaka 回复生成器时发生异常。",

View File

@@ -266,7 +266,7 @@ class MaisakaReasoningEngine:
source_sequence = message.raw_message
planner_components = clone_message_sequence(source_sequence).components
if global_config.chat.direct_image_input:
if global_config.chat.multimodal_planner:
await self._hydrate_visual_components(planner_components)
if planner_components and isinstance(planner_components[0], TextComponent):
planner_components[0].text = planner_prefix + planner_components[0].text

View File

@@ -235,8 +235,18 @@ class PluginMessageUtils:
if isinstance(raw_forward_nodes, list):
for node in raw_forward_nodes:
if not isinstance(node, dict):
logger.info(f"解析转发节点时跳过非字典节点: {node!r}")
continue
raw_content = node.get("content", [])
logger.info(
"开始解析转发节点: "
f"message_id={node.get('message_id')!r} "
f"user_id={node.get('user_id')!r} "
f"user_nickname={node.get('user_nickname')!r} "
f"user_cardname={node.get('user_cardname')!r} "
f"raw_content_type={type(raw_content).__name__} "
f"raw_content={raw_content!r}"
)
node_components: List[StandardMessageComponents] = []
if isinstance(raw_content, list):
node_components = [
@@ -244,7 +254,17 @@ class PluginMessageUtils:
for content in raw_content
if isinstance(content, dict)
]
logger.info(
"转发节点解析结果: "
f"message_id={node.get('message_id')!r} "
f"component_types={[component.__class__.__name__ for component in node_components]!r} "
f"component_values={[getattr(component, 'text', None) for component in node_components]!r}"
)
if not node_components:
logger.warning(
"转发节点内容为空,使用占位文本回退: "
f"message_id={node.get('message_id')!r} raw_content={raw_content!r}"
)
node_components = [TextComponent(text="[empty forward node]")]
forward_nodes.append(
ForwardComponent(