feat;replyer采用多模态
This commit is contained in:
@@ -1,7 +1,6 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@@ -9,6 +8,7 @@ from sqlmodel import select
|
|||||||
|
|
||||||
from src.chat.message_receive.chat_manager import BotChatSession
|
from src.chat.message_receive.chat_manager import BotChatSession
|
||||||
from src.common.database.database import get_db_session
|
from src.common.database.database import get_db_session
|
||||||
|
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
|
||||||
from src.common.database.database_model import Expression
|
from src.common.database.database_model import Expression
|
||||||
from src.common.data_models.reply_generation_data_models import (
|
from src.common.data_models.reply_generation_data_models import (
|
||||||
GenerationMetrics,
|
GenerationMetrics,
|
||||||
@@ -19,12 +19,12 @@ from src.common.logger import get_logger
|
|||||||
from src.common.prompt_i18n import load_prompt
|
from src.common.prompt_i18n import load_prompt
|
||||||
from src.config.config import global_config
|
from src.config.config import global_config
|
||||||
from src.core.types import ActionInfo
|
from src.core.types import ActionInfo
|
||||||
|
from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart
|
||||||
from src.services.llm_service import LLMServiceClient
|
from src.services.llm_service import LLMServiceClient
|
||||||
|
|
||||||
from src.chat.message_receive.message import SessionMessage
|
from src.chat.message_receive.message import SessionMessage
|
||||||
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
|
|
||||||
from src.maisaka.context_messages import AssistantMessage, LLMContextMessage, ReferenceMessage, SessionBackedMessage, ToolResultMessage
|
from src.maisaka.context_messages import AssistantMessage, LLMContextMessage, ReferenceMessage, SessionBackedMessage, ToolResultMessage
|
||||||
from src.maisaka.message_adapter import parse_speaker_content
|
from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
|
||||||
|
|
||||||
logger = get_logger("replyer")
|
logger = get_logger("replyer")
|
||||||
|
|
||||||
@@ -159,6 +159,34 @@ class MaisakaReplyGenerator:
|
|||||||
"""构建追加在上下文末尾的回复指令。"""
|
"""构建追加在上下文末尾的回复指令。"""
|
||||||
return "请基于以上逐条对话消息,自然地继续回复。直接输出你要说的话,不要额外解释。"
|
return "请基于以上逐条对话消息,自然地继续回复。直接输出你要说的话,不要额外解释。"
|
||||||
|
|
||||||
|
def _build_multimodal_user_message(
|
||||||
|
self,
|
||||||
|
message: SessionBackedMessage,
|
||||||
|
default_user_name: str,
|
||||||
|
) -> Optional[Message]:
|
||||||
|
"""构建保留图片等多模态片段的用户消息。"""
|
||||||
|
speaker_name, _ = parse_speaker_content(message.processed_plain_text.strip())
|
||||||
|
visible_speaker = speaker_name or default_user_name
|
||||||
|
|
||||||
|
raw_message = clone_message_sequence(message.raw_message)
|
||||||
|
if not raw_message.components:
|
||||||
|
raw_message = MessageSequence([TextComponent(f"[{visible_speaker}]")])
|
||||||
|
elif isinstance(raw_message.components[0], TextComponent):
|
||||||
|
first_text = raw_message.components[0].text or ""
|
||||||
|
raw_message.components[0] = TextComponent(f"[{visible_speaker}]{first_text}")
|
||||||
|
else:
|
||||||
|
raw_message.components.insert(0, TextComponent(f"[{visible_speaker}]"))
|
||||||
|
|
||||||
|
multimodal_message = SessionBackedMessage(
|
||||||
|
raw_message=raw_message,
|
||||||
|
visible_text=f"[{visible_speaker}]{message.processed_plain_text}",
|
||||||
|
timestamp=message.timestamp,
|
||||||
|
message_id=message.message_id,
|
||||||
|
original_message=message.original_message,
|
||||||
|
source_kind=message.source_kind,
|
||||||
|
)
|
||||||
|
return multimodal_message.to_llm_message()
|
||||||
|
|
||||||
def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
|
def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
|
||||||
"""将 replyer 上下文拆成多条 LLM 消息。"""
|
"""将 replyer 上下文拆成多条 LLM 消息。"""
|
||||||
bot_nickname = global_config.bot.nickname.strip() or "Bot"
|
bot_nickname = global_config.bot.nickname.strip() or "Bot"
|
||||||
@@ -177,6 +205,11 @@ class MaisakaReplyGenerator:
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
multimodal_message = self._build_multimodal_user_message(message, default_user_name)
|
||||||
|
if multimodal_message is not None:
|
||||||
|
messages.append(multimodal_message)
|
||||||
|
continue
|
||||||
|
|
||||||
for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text):
|
for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text):
|
||||||
content = self._normalize_content(content_body)
|
content = self._normalize_content(content_body)
|
||||||
if not content:
|
if not content:
|
||||||
@@ -227,7 +260,14 @@ class MaisakaReplyGenerator:
|
|||||||
preview_lines: List[str] = []
|
preview_lines: List[str] = []
|
||||||
for message in messages:
|
for message in messages:
|
||||||
role_name = message.role.value.capitalize()
|
role_name = message.role.value.capitalize()
|
||||||
preview_lines.append(f"{role_name}: {message.get_text_content()}")
|
part_previews: List[str] = []
|
||||||
|
for part in message.parts:
|
||||||
|
if isinstance(part, TextMessagePart):
|
||||||
|
part_previews.append(part.text)
|
||||||
|
continue
|
||||||
|
if isinstance(part, ImageMessagePart):
|
||||||
|
part_previews.append(f"[图片:{part.normalized_image_format}]")
|
||||||
|
preview_lines.append(f"{role_name}: {''.join(part_previews)}")
|
||||||
return "\n\n".join(preview_lines)
|
return "\n\n".join(preview_lines)
|
||||||
|
|
||||||
def _resolve_session_id(self, stream_id: Optional[str]) -> str:
|
def _resolve_session_id(self, stream_id: Optional[str]) -> str:
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ from src.config.config import global_config
|
|||||||
|
|
||||||
def get_maisaka_replyer_class() -> Type[object]:
|
def get_maisaka_replyer_class() -> Type[object]:
|
||||||
"""根据配置返回 Maisaka replyer 类。"""
|
"""根据配置返回 Maisaka replyer 类。"""
|
||||||
generator_type = global_config.maisaka.replyer_generator_type
|
generator_type = get_maisaka_replyer_generator_type()
|
||||||
if generator_type == "multi":
|
if generator_type == "multimodal":
|
||||||
from .maisaka_generator_multi import MaisakaReplyGenerator
|
from .maisaka_generator_multi import MaisakaReplyGenerator
|
||||||
|
|
||||||
return MaisakaReplyGenerator
|
return MaisakaReplyGenerator
|
||||||
@@ -18,4 +18,4 @@ def get_maisaka_replyer_class() -> Type[object]:
|
|||||||
|
|
||||||
def get_maisaka_replyer_generator_type() -> str:
|
def get_maisaka_replyer_generator_type() -> str:
|
||||||
"""返回当前配置的 Maisaka replyer 生成器类型。"""
|
"""返回当前配置的 Maisaka replyer 生成器类型。"""
|
||||||
return global_config.maisaka.replyer_generator_type
|
return global_config.chat.replyer_generator_type
|
||||||
|
|||||||
@@ -16,8 +16,6 @@ class ExampleConfig(ConfigBase):
|
|||||||
\"""This is an example field\"""
|
\"""This is an example field\"""
|
||||||
- 注释前面增加_warp_标记可以实现配置文件中注释在配置项前面单独一行显示
|
- 注释前面增加_warp_标记可以实现配置文件中注释在配置项前面单独一行显示
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class BotConfig(ConfigBase):
|
class BotConfig(ConfigBase):
|
||||||
"""机器人配置类"""
|
"""机器人配置类"""
|
||||||
|
|
||||||
@@ -283,7 +281,7 @@ class ChatConfig(ConfigBase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
direct_image_input: bool = Field(
|
multimodal_planner: bool = Field(
|
||||||
default=True,
|
default=True,
|
||||||
json_schema_extra={
|
json_schema_extra={
|
||||||
"x-widget": "switch",
|
"x-widget": "switch",
|
||||||
@@ -292,14 +290,14 @@ class ChatConfig(ConfigBase):
|
|||||||
)
|
)
|
||||||
"""是否直接输入图片"""
|
"""是否直接输入图片"""
|
||||||
|
|
||||||
replyer_generator_type: Literal["legacy", "multi"] = Field(
|
replyer_generator_type: Literal["legacy", "multimodal"] = Field(
|
||||||
default="legacy",
|
default="legacy",
|
||||||
json_schema_extra={
|
json_schema_extra={
|
||||||
"x-widget": "select",
|
"x-widget": "select",
|
||||||
"x-icon": "git-branch",
|
"x-icon": "git-branch",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
"""Maisaka replyer 生成器类型:legacy(旧版单 prompt)/ multi(多消息版)"""
|
"""Maisaka replyer 生成器类型:legacy(旧版单 prompt)/ multimodal(多模态版,适合主循环直接展示图片)"""
|
||||||
|
|
||||||
enable_talk_value_rules: bool = Field(
|
enable_talk_value_rules: bool = Field(
|
||||||
default=True,
|
default=True,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""reply 内置工具。"""
|
"""reply 内置工具。"""
|
||||||
|
|
||||||
|
import traceback
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from src.chat.replyer.replyer_manager import replyer_manager
|
from src.chat.replyer.replyer_manager import replyer_manager
|
||||||
@@ -82,6 +83,7 @@ async def handle_tool(
|
|||||||
logger.exception(
|
logger.exception(
|
||||||
f"{tool_ctx.runtime.log_prefix} 获取回复生成器时发生异常: 目标消息编号={target_message_id}"
|
f"{tool_ctx.runtime.log_prefix} 获取回复生成器时发生异常: 目标消息编号={target_message_id}"
|
||||||
)
|
)
|
||||||
|
logger.info(traceback.format_exc())
|
||||||
return tool_ctx.build_failure_result(
|
return tool_ctx.build_failure_result(
|
||||||
invocation.tool_name,
|
invocation.tool_name,
|
||||||
"获取 Maisaka 回复生成器时发生异常。",
|
"获取 Maisaka 回复生成器时发生异常。",
|
||||||
|
|||||||
@@ -266,7 +266,7 @@ class MaisakaReasoningEngine:
|
|||||||
source_sequence = message.raw_message
|
source_sequence = message.raw_message
|
||||||
|
|
||||||
planner_components = clone_message_sequence(source_sequence).components
|
planner_components = clone_message_sequence(source_sequence).components
|
||||||
if global_config.chat.direct_image_input:
|
if global_config.chat.multimodal_planner:
|
||||||
await self._hydrate_visual_components(planner_components)
|
await self._hydrate_visual_components(planner_components)
|
||||||
if planner_components and isinstance(planner_components[0], TextComponent):
|
if planner_components and isinstance(planner_components[0], TextComponent):
|
||||||
planner_components[0].text = planner_prefix + planner_components[0].text
|
planner_components[0].text = planner_prefix + planner_components[0].text
|
||||||
|
|||||||
@@ -235,8 +235,18 @@ class PluginMessageUtils:
|
|||||||
if isinstance(raw_forward_nodes, list):
|
if isinstance(raw_forward_nodes, list):
|
||||||
for node in raw_forward_nodes:
|
for node in raw_forward_nodes:
|
||||||
if not isinstance(node, dict):
|
if not isinstance(node, dict):
|
||||||
|
logger.info(f"解析转发节点时跳过非字典节点: {node!r}")
|
||||||
continue
|
continue
|
||||||
raw_content = node.get("content", [])
|
raw_content = node.get("content", [])
|
||||||
|
logger.info(
|
||||||
|
"开始解析转发节点: "
|
||||||
|
f"message_id={node.get('message_id')!r} "
|
||||||
|
f"user_id={node.get('user_id')!r} "
|
||||||
|
f"user_nickname={node.get('user_nickname')!r} "
|
||||||
|
f"user_cardname={node.get('user_cardname')!r} "
|
||||||
|
f"raw_content_type={type(raw_content).__name__} "
|
||||||
|
f"raw_content={raw_content!r}"
|
||||||
|
)
|
||||||
node_components: List[StandardMessageComponents] = []
|
node_components: List[StandardMessageComponents] = []
|
||||||
if isinstance(raw_content, list):
|
if isinstance(raw_content, list):
|
||||||
node_components = [
|
node_components = [
|
||||||
@@ -244,7 +254,17 @@ class PluginMessageUtils:
|
|||||||
for content in raw_content
|
for content in raw_content
|
||||||
if isinstance(content, dict)
|
if isinstance(content, dict)
|
||||||
]
|
]
|
||||||
|
logger.info(
|
||||||
|
"转发节点解析结果: "
|
||||||
|
f"message_id={node.get('message_id')!r} "
|
||||||
|
f"component_types={[component.__class__.__name__ for component in node_components]!r} "
|
||||||
|
f"component_values={[getattr(component, 'text', None) for component in node_components]!r}"
|
||||||
|
)
|
||||||
if not node_components:
|
if not node_components:
|
||||||
|
logger.warning(
|
||||||
|
"转发节点内容为空,使用占位文本回退: "
|
||||||
|
f"message_id={node.get('message_id')!r} raw_content={raw_content!r}"
|
||||||
|
)
|
||||||
node_components = [TextComponent(text="[empty forward node]")]
|
node_components = [TextComponent(text="[empty forward node]")]
|
||||||
forward_nodes.append(
|
forward_nodes.append(
|
||||||
ForwardComponent(
|
ForwardComponent(
|
||||||
|
|||||||
Reference in New Issue
Block a user