feat:统一replyer在是否多模态下的表现,提高一致性和通用性,新增模型visual参数

This commit is contained in:
SengokuCola
2026-04-11 16:41:00 +08:00
parent 3ea2bf1059
commit c0230fc313
20 changed files with 323 additions and 1802 deletions

View File

@@ -5,8 +5,7 @@ import pytest
from rich.panel import Panel
from rich.text import Text
from src.chat.replyer import maisaka_generator as legacy_replyer_module
from src.chat.replyer import maisaka_generator_multi as multimodal_replyer_module
from src.chat.replyer import maisaka_generator as replyer_module
from src.common.data_models.reply_generation_data_models import (
GenerationMetrics,
LLMCompletionResult,
@@ -37,8 +36,8 @@ class _FakeLegacyLLMServiceClient:
del args
del kwargs
async def generate_response(self, prompt: str) -> _FakeLLMResult:
assert prompt
async def generate_response_with_messages(self, *, message_factory: Callable[[object], list[Any]]) -> _FakeLLMResult:
assert message_factory(object())
return _FakeLLMResult()
@@ -54,13 +53,21 @@ class _FakeMultimodalLLMServiceClient:
@pytest.mark.asyncio
async def test_legacy_and_multimodal_replyer_monitor_detail_have_same_shape(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(legacy_replyer_module, "LLMServiceClient", _FakeLegacyLLMServiceClient)
monkeypatch.setattr(multimodal_replyer_module, "LLMServiceClient", _FakeMultimodalLLMServiceClient)
monkeypatch.setattr(legacy_replyer_module, "load_prompt", lambda *args, **kwargs: "legacy prompt")
monkeypatch.setattr(multimodal_replyer_module, "load_prompt", lambda *args, **kwargs: "multi prompt")
monkeypatch.setattr(replyer_module, "LLMServiceClient", _FakeLegacyLLMServiceClient)
monkeypatch.setattr(replyer_module, "load_prompt", lambda *args, **kwargs: "legacy prompt")
legacy_generator = legacy_replyer_module.MaisakaReplyGenerator(chat_stream=None, request_type="test_legacy")
multimodal_generator = multimodal_replyer_module.MaisakaReplyGenerator(chat_stream=None, request_type="test_multi")
legacy_generator = replyer_module.MaisakaReplyGenerator(
chat_stream=None,
request_type="test_legacy",
enable_visual_message=False,
)
multimodal_generator = replyer_module.MaisakaReplyGenerator(
chat_stream=None,
request_type="test_multi",
llm_client_cls=_FakeMultimodalLLMServiceClient,
load_prompt_func=lambda *args, **kwargs: "multi prompt",
enable_visual_message=True,
)
legacy_success, legacy_result = await legacy_generator.generate_reply_with_context(
stream_id="session-legacy",
@@ -84,6 +91,40 @@ async def test_legacy_and_multimodal_replyer_monitor_detail_have_same_shape(monk
assert legacy_result.monitor_detail["metrics"]["total_tokens"] == 19
def test_legacy_replyer_builds_message_sequence_like_multimodal() -> None:
legacy_generator = replyer_module.MaisakaReplyGenerator(
chat_stream=None,
request_type="test_legacy",
enable_visual_message=False,
)
legacy_prompt_loader = replyer_module.load_prompt
replyer_module.load_prompt = lambda *args, **kwargs: "legacy prompt"
try:
session_message = replyer_module.SessionBackedMessage(
raw_message=SimpleNamespace(),
visible_text="[Alice]你好\n[Bob]在吗",
timestamp=replyer_module.datetime.now(),
source_kind="user",
)
request_messages = legacy_generator._build_request_messages(
chat_history=[session_message],
reply_message=None,
reply_reason="测试原因",
stream_id="session-legacy",
)
finally:
replyer_module.load_prompt = legacy_prompt_loader
assert len(request_messages) == 4
assert request_messages[0].role.value == "system"
assert request_messages[1].role.value == "user"
assert request_messages[1].get_text_content() == "[Alice]你好"
assert request_messages[2].role.value == "user"
assert request_messages[2].get_text_content() == "[Bob]在吗"
assert request_messages[3].role.value == "user"
@pytest.mark.asyncio
async def test_reply_tool_puts_monitor_detail_into_metadata(monkeypatch: pytest.MonkeyPatch) -> None:
fake_monitor_detail = {
@@ -324,7 +365,7 @@ def test_reasoning_engine_build_tool_monitor_result_keeps_non_reply_tool_without
def test_runtime_build_tool_detail_panels_renders_reply_monitor_detail() -> None:
runtime = object.__new__(MaisakaHeartFlowChatting)
runtime.session_id = "session-1"
panels = runtime._build_tool_detail_panels(
panels = runtime._build_tool_detail_cards(
[
{
"tool_call_id": "call-reply-1",
@@ -348,7 +389,8 @@ def test_runtime_build_tool_detail_panels_renders_reply_monitor_detail() -> None
},
},
}
]
],
stage_title="工具调用",
)
assert len(panels) == 1
@@ -387,7 +429,7 @@ def test_runtime_build_tool_detail_panels_uses_prompt_access_panel(monkeypatch:
_fake_build_text_access_panel,
)
panels = runtime._build_tool_detail_panels(
panels = runtime._build_tool_detail_cards(
[
{
"tool_call_id": "call-reply-2",
@@ -401,7 +443,8 @@ def test_runtime_build_tool_detail_panels_uses_prompt_access_panel(monkeypatch:
"output_text": "reply output",
},
}
]
],
stage_title="工具调用",
)
assert len(panels) == 1
@@ -425,7 +468,7 @@ def test_runtime_build_tool_detail_panels_uses_emotion_prompt_access_panel(monke
_fake_build_text_access_panel,
)
panels = runtime._build_tool_detail_panels(
panels = runtime._build_tool_detail_cards(
[
{
"tool_call_id": "call-emoji-1",
@@ -439,7 +482,8 @@ def test_runtime_build_tool_detail_panels_uses_emotion_prompt_access_panel(monke
"output_text": '{"emoji_index": 1}',
},
}
]
],
stage_title="工具调用",
)
assert len(panels) == 1
@@ -448,6 +492,63 @@ def test_runtime_build_tool_detail_panels_uses_emotion_prompt_access_panel(monke
assert captured["kwargs"]["request_kind"] == "emotion"
def test_runtime_build_tool_detail_cards_uses_structured_prompt_messages_with_images(
monkeypatch: pytest.MonkeyPatch,
) -> None:
runtime = object.__new__(MaisakaHeartFlowChatting)
runtime.session_id = "session-image"
captured: dict[str, Any] = {}
def _fake_build_prompt_access_panel(messages: list[Any], **kwargs: Any) -> str:
captured["messages"] = messages
captured["kwargs"] = kwargs
return "IMAGE_PROMPT_LINK"
def _fake_build_text_access_panel(content: str, **kwargs: Any) -> str:
captured["text_content"] = content
captured["text_kwargs"] = kwargs
return "TEXT_PROMPT_LINK"
monkeypatch.setattr(
"src.maisaka.runtime.PromptCLIVisualizer.build_prompt_access_panel",
_fake_build_prompt_access_panel,
)
monkeypatch.setattr(
"src.maisaka.runtime.PromptCLIVisualizer.build_text_access_panel",
_fake_build_text_access_panel,
)
panels = runtime._build_tool_detail_cards(
[
{
"tool_call_id": "call-reply-image-1",
"tool_name": "reply",
"tool_args": {"msg_id": "m3"},
"success": True,
"duration_ms": 22.0,
"summary": "- reply [成功]: 已回复",
"detail": {
"prompt_text": "reply prompt image",
"request_messages": [
{
"role": "user",
"content": ["前缀文本", ["png", "ZmFrZQ=="]],
}
],
"output_text": "reply output",
},
}
],
stage_title="工具调用",
)
assert len(panels) == 1
assert "messages" in captured
assert "text_content" not in captured
assert captured["kwargs"]["chat_id"] == "session-image"
assert captured["kwargs"]["request_kind"] == "replyer"
def test_runtime_render_context_usage_panel_merges_timing_and_planner(monkeypatch: pytest.MonkeyPatch) -> None:
runtime = object.__new__(MaisakaHeartFlowChatting)
runtime.session_id = "session-merged"

View File

@@ -0,0 +1,18 @@
from src.llm_models.payload_content.message import MessageBuilder, RoleType
from src.plugin_runtime.hook_payloads import deserialize_prompt_messages, serialize_prompt_messages
def test_prompt_messages_roundtrip_preserves_image_parts() -> None:
messages = [
MessageBuilder().set_role(RoleType.User).add_text_content("你好").add_image_content("png", "ZmFrZQ==").build(),
]
serialized_messages = serialize_prompt_messages(messages)
restored_messages = deserialize_prompt_messages(serialized_messages)
assert len(restored_messages) == 1
assert restored_messages[0].role == RoleType.User
assert restored_messages[0].get_text_content() == "你好"
assert len(restored_messages[0].parts) == 2
assert restored_messages[0].parts[1].image_format == "png"
assert restored_messages[0].parts[1].image_base64 == "ZmFrZQ=="

File diff suppressed because it is too large Load Diff

View File

@@ -1,530 +1,34 @@
from dataclasses import dataclass, field
from datetime import datetime
from typing import Awaitable, Callable, Dict, List, Optional, Tuple
import random
import time
from rich.panel import Panel
from typing import Any, Callable, Optional
from src.chat.message_receive.chat_manager import BotChatSession
from src.chat.message_receive.message import SessionMessage
from src.chat.utils.utils import get_chat_type_and_target_info
from src.cli.console import console
from src.common.data_models.reply_generation_data_models import (
GenerationMetrics,
LLMCompletionResult,
ReplyGenerationResult,
build_reply_monitor_detail,
)
from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt
from src.common.utils.utils_session import SessionUtils
from src.config.config import global_config
from src.core.types import ActionInfo
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.maisaka.context_messages import SessionBackedMessage
from src.services.llm_service import LLMServiceClient
from src.maisaka.context_messages import (
AssistantMessage,
LLMContextMessage,
ReferenceMessage,
SessionBackedMessage,
ToolResultMessage,
)
from src.maisaka.message_adapter import parse_speaker_content
from src.maisaka.prompt_cli_renderer import PromptCLIVisualizer
from src.plugin_runtime.hook_payloads import serialize_prompt_messages
from .maisaka_expression_selector import maisaka_expression_selector
logger = get_logger("replyer")
from .maisaka_generator_base import BaseMaisakaReplyGenerator
@dataclass
class MaisakaReplyContext:
"""Maisaka replyer 使用的回复上下文。"""
expression_habits: str = ""
selected_expression_ids: List[int] = field(default_factory=list)
class MaisakaReplyGenerator:
"""生成 Maisaka 的最终可见回复。"""
class MaisakaReplyGenerator(BaseMaisakaReplyGenerator):
"""Maisaka replyer。"""
def __init__(
self,
chat_stream: Optional[BotChatSession] = None,
request_type: str = "maisaka_replyer",
llm_client_cls: Optional[Any] = None,
load_prompt_func: Optional[Callable[..., str]] = None,
enable_visual_message: Optional[bool] = None,
) -> None:
self.chat_stream = chat_stream
self.request_type = request_type
self.express_model = LLMServiceClient(
task_name="replyer",
super().__init__(
chat_stream=chat_stream,
request_type=request_type,
llm_client_cls=llm_client_cls or LLMServiceClient,
load_prompt_func=load_prompt_func or load_prompt,
enable_visual_message=(
global_config.visual.multimodal_replyer
if enable_visual_message is None
else enable_visual_message
),
)
self._personality_prompt = self._build_personality_prompt()
def _build_personality_prompt(self) -> str:
"""构建 replyer 使用的人设提示。"""
try:
bot_name = global_config.bot.nickname
alias_names = global_config.bot.alias_names
bot_aliases = f",也有人叫你{','.join(alias_names)}" if alias_names else ""
prompt_personality = global_config.personality.personality
if (
hasattr(global_config.personality, "states")
and global_config.personality.states
and hasattr(global_config.personality, "state_probability")
and global_config.personality.state_probability > 0
and random.random() < global_config.personality.state_probability
):
prompt_personality = random.choice(global_config.personality.states)
return f"你的名字是{bot_name}{bot_aliases},你{prompt_personality};"
except Exception as exc:
logger.warning(f"构建 Maisaka 人设提示词失败: {exc}")
return "你的名字是麦麦,你是一个活泼可爱的 AI 助手。"
@staticmethod
def _normalize_content(content: str, limit: int = 500) -> str:
normalized = " ".join((content or "").split())
if len(normalized) > limit:
return normalized[:limit] + "..."
return normalized
@staticmethod
def _format_message_time(message: LLMContextMessage) -> str:
return message.timestamp.strftime("%H:%M:%S")
@staticmethod
def _extract_visible_assistant_reply(message: AssistantMessage) -> str:
del message
return ""
def _extract_guided_bot_reply(self, message: SessionBackedMessage) -> str:
speaker_name, body = parse_speaker_content(message.processed_plain_text.strip())
bot_nickname = global_config.bot.nickname.strip() or "Bot"
if speaker_name == bot_nickname:
return self._normalize_content(body.strip())
return ""
@staticmethod
def _split_user_message_segments(raw_content: str) -> List[tuple[Optional[str], str]]:
"""按说话人拆分用户消息。"""
segments: List[tuple[Optional[str], str]] = []
current_speaker: Optional[str] = None
current_lines: List[str] = []
for raw_line in raw_content.splitlines():
speaker_name, content_body = parse_speaker_content(raw_line)
if speaker_name is not None:
if current_lines:
segments.append((current_speaker, "\n".join(current_lines)))
current_speaker = speaker_name
current_lines = [content_body]
continue
current_lines.append(raw_line)
if current_lines:
segments.append((current_speaker, "\n".join(current_lines)))
return segments
def _format_chat_history(self, messages: List[LLMContextMessage]) -> str:
"""格式化 replyer 使用的可见聊天记录。"""
bot_nickname = global_config.bot.nickname.strip() or "Bot"
parts: List[str] = []
for message in messages:
timestamp = self._format_message_time(message)
if isinstance(message, (ReferenceMessage, ToolResultMessage)):
continue
if isinstance(message, SessionBackedMessage):
guided_reply = self._extract_guided_bot_reply(message)
if guided_reply:
parts.append(f"{timestamp} {bot_nickname}(you): {guided_reply}")
continue
raw_content = message.processed_plain_text
for speaker_name, content_body in self._split_user_message_segments(raw_content):
content = self._normalize_content(content_body)
if not content:
continue
visible_speaker = speaker_name or global_config.maisaka.cli_user_name.strip() or "User"
parts.append(f"{timestamp} {visible_speaker}: {content}")
continue
if isinstance(message, AssistantMessage):
visible_reply = self._extract_visible_assistant_reply(message)
if visible_reply:
parts.append(f"{timestamp} {bot_nickname}(you): {visible_reply}")
return "\n".join(parts)
def _build_target_message_block(self, reply_message: Optional[SessionMessage]) -> str:
"""构建当前需要回复的目标消息摘要。"""
if reply_message is None:
return ""
user_info = reply_message.message_info.user_info
sender_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
target_message_id = reply_message.message_id.strip() if reply_message.message_id else "未知"
target_content = self._normalize_content((reply_message.processed_plain_text or "").strip(), limit=300)
if not target_content:
target_content = "[无可见文本内容]"
return (
"【本次回复目标】\n"
f"- 目标消息ID{target_message_id}\n"
f"- 发送者:{sender_name}\n"
f"- 消息内容:{target_content}\n"
"- 你这次要回复的就是这条目标消息,请结合整段上下文理解,但不要误把其他历史消息当成当前回复对象。"
)
@staticmethod
def _get_chat_prompt_for_chat(chat_id: str, is_group_chat: Optional[bool]) -> str:
"""根据聊天流 ID 获取匹配的额外 prompt。"""
if not global_config.chat.chat_prompts:
return ""
for chat_prompt_item in global_config.chat.chat_prompts:
if hasattr(chat_prompt_item, "platform"):
platform = str(chat_prompt_item.platform or "").strip()
item_id = str(chat_prompt_item.item_id or "").strip()
rule_type = str(chat_prompt_item.rule_type or "").strip()
prompt_content = str(chat_prompt_item.prompt or "").strip()
elif isinstance(chat_prompt_item, str):
parts = chat_prompt_item.split(":", 3)
if len(parts) != 4:
continue
platform, item_id, rule_type, prompt_content = parts
platform = platform.strip()
item_id = item_id.strip()
rule_type = rule_type.strip()
prompt_content = prompt_content.strip()
else:
continue
if not platform or not item_id or not prompt_content:
continue
if rule_type == "group":
config_is_group = True
config_chat_id = SessionUtils.calculate_session_id(platform, group_id=item_id)
elif rule_type == "private":
config_is_group = False
config_chat_id = SessionUtils.calculate_session_id(platform, user_id=item_id)
else:
continue
if config_is_group != is_group_chat:
continue
if config_chat_id == chat_id:
return prompt_content
return ""
def _build_group_chat_attention_block(self, session_id: str) -> str:
"""构建当前聊天场景下的额外注意事项块。"""
if not session_id:
return ""
try:
is_group_chat, _ = get_chat_type_and_target_info(session_id)
except Exception:
is_group_chat = None
prompt_lines: List[str] = []
if is_group_chat is True:
if group_chat_prompt := global_config.chat.group_chat_prompt.strip():
prompt_lines.append(f"通用注意事项:\n{group_chat_prompt}")
elif is_group_chat is False:
if private_chat_prompt := global_config.chat.private_chat_prompts.strip():
prompt_lines.append(f"通用注意事项:\n{private_chat_prompt}")
if chat_prompt := self._get_chat_prompt_for_chat(session_id, is_group_chat).strip():
prompt_lines.append(f"当前聊天额外注意事项:\n{chat_prompt}")
if not prompt_lines:
return ""
return "在该聊天中的注意事项:\n" + "\n\n".join(prompt_lines) + "\n"
def _build_request_messages(
self,
chat_history: List[LLMContextMessage],
reply_message: Optional[SessionMessage],
reply_reason: str,
expression_habits: str = "",
stream_id: Optional[str] = None,
) -> List[Message]:
"""构建 Maisaka replyer 请求消息列表。"""
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
formatted_history = self._format_chat_history(chat_history)
target_message_block = self._build_target_message_block(reply_message)
session_id = self._resolve_session_id(stream_id)
try:
system_prompt = load_prompt(
"maisaka_replyer",
bot_name=global_config.bot.nickname,
group_chat_attention_block=self._build_group_chat_attention_block(session_id),
time_block=f"当前时间:{current_time}",
identity=self._personality_prompt,
reply_style=global_config.personality.reply_style,
)
except Exception:
system_prompt = "你是一个友好的 AI 助手,请根据聊天记录自然回复。"
extra_sections: List[str] = []
if expression_habits.strip():
extra_sections.append(expression_habits.strip())
user_sections = [
f"当前时间:{current_time}",
f"【聊天记录】\n{formatted_history}",
]
if target_message_block:
user_sections.append(target_message_block)
if extra_sections:
user_sections.append("\n\n".join(extra_sections))
user_sections.append(f"【回复信息参考】\n{reply_reason}")
user_sections.append("现在,你说:")
user_prompt = "\n\n".join(user_sections)
return [
MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build(),
MessageBuilder().set_role(RoleType.User).add_text_content(user_prompt).build(),
]
def _resolve_session_id(self, stream_id: Optional[str]) -> str:
"""解析当前回复使用的会话 ID。"""
if stream_id:
return stream_id
if self.chat_stream is not None:
return self.chat_stream.session_id
return ""
async def _build_reply_context(
self,
chat_history: List[LLMContextMessage],
reply_message: Optional[SessionMessage],
reply_reason: str,
stream_id: Optional[str],
sub_agent_runner: Optional[Callable[[str], Awaitable[str]]],
) -> MaisakaReplyContext:
"""构建回复上下文:表达习惯和已选表达 ID。"""
session_id = self._resolve_session_id(stream_id)
if not session_id:
logger.warning("构建 Maisaka 回复上下文失败:缺少会话标识")
return MaisakaReplyContext()
if sub_agent_runner is None:
logger.info("表达方式选择跳过:缺少子代理执行器")
return MaisakaReplyContext()
selection_result = await maisaka_expression_selector.select_for_reply(
session_id=session_id,
chat_history=chat_history,
reply_message=reply_message,
reply_reason=reply_reason,
sub_agent_runner=sub_agent_runner,
)
return MaisakaReplyContext(
expression_habits=selection_result.expression_habits,
selected_expression_ids=selection_result.selected_expression_ids,
)
async def generate_reply_with_context(
self,
extra_info: str = "",
reply_reason: str = "",
available_actions: Optional[Dict[str, ActionInfo]] = None,
chosen_actions: Optional[List[object]] = None,
from_plugin: bool = True,
stream_id: Optional[str] = None,
reply_message: Optional[SessionMessage] = None,
reply_time_point: Optional[float] = None,
think_level: int = 1,
unknown_words: Optional[List[str]] = None,
log_reply: bool = True,
chat_history: Optional[List[LLMContextMessage]] = None,
expression_habits: str = "",
selected_expression_ids: Optional[List[int]] = None,
sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None,
) -> Tuple[bool, ReplyGenerationResult]:
"""结合上下文生成 Maisaka 的最终可见回复。"""
def finalize(success_value: bool) -> Tuple[bool, ReplyGenerationResult]:
result.monitor_detail = build_reply_monitor_detail(result)
return success_value, result
del available_actions
del chosen_actions
del extra_info
del from_plugin
del log_reply
del reply_time_point
del think_level
del unknown_words
result = ReplyGenerationResult()
overall_started_at = time.perf_counter()
if chat_history is None:
result.error_message = "聊天历史为空"
return finalize(False)
logger.info(
f"Maisaka 回复器开始生成: 会话流标识={stream_id} 回复原因={reply_reason!r} "
f"历史消息数={len(chat_history)} 目标消息编号={reply_message.message_id if reply_message else None}"
)
filtered_history = [
message
for message in chat_history
if not isinstance(message, (ReferenceMessage, ToolResultMessage))
]
logger.debug(f"Maisaka 回复器过滤后历史消息数={len(filtered_history)}")
if self.express_model is None:
logger.error("Maisaka 回复器的回复模型未初始化")
result.error_message = "回复模型尚未初始化"
return finalize(False)
try:
reply_context = await self._build_reply_context(
chat_history=filtered_history,
reply_message=reply_message,
reply_reason=reply_reason or "",
stream_id=stream_id,
sub_agent_runner=sub_agent_runner,
)
except Exception as exc:
import traceback
logger.error(f"Maisaka 回复器构建回复上下文失败: {exc}\n{traceback.format_exc()}")
result.error_message = f"构建回复上下文失败: {exc}"
result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
merged_expression_habits = expression_habits.strip() or reply_context.expression_habits
result.selected_expression_ids = (
list(selected_expression_ids)
if selected_expression_ids is not None
else list(reply_context.selected_expression_ids)
)
logger.info(
f"Maisaka 回复上下文构建完成: 会话流标识={stream_id} "
f"已选表达编号={result.selected_expression_ids!r}"
)
prompt_started_at = time.perf_counter()
try:
request_messages = self._build_request_messages(
chat_history=filtered_history,
reply_message=reply_message,
reply_reason=reply_reason or "",
expression_habits=merged_expression_habits,
stream_id=stream_id,
)
except Exception as exc:
import traceback
logger.error(f"Maisaka 回复器构建提示词失败: {exc}\n{traceback.format_exc()}")
result.error_message = f"构建提示词失败: {exc}"
result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
prompt_ms = round((time.perf_counter() - prompt_started_at) * 1000, 2)
request_prompt = PromptCLIVisualizer._build_prompt_dump_text(request_messages)
result.completion.request_prompt = request_prompt
result.request_messages = serialize_prompt_messages(request_messages)
show_replyer_prompt = bool(getattr(global_config.debug, "show_replyer_prompt", False))
show_replyer_reasoning = bool(getattr(global_config.debug, "show_replyer_reasoning", False))
preview_chat_id = self._resolve_session_id(stream_id) or "unknown"
if show_replyer_prompt:
console.print(
Panel(
PromptCLIVisualizer.build_prompt_access_panel(
request_messages,
category="replyer",
chat_id=preview_chat_id,
request_kind="replyer",
selection_reason=f"ID: {preview_chat_id}",
image_display_mode="path_link" if global_config.maisaka.show_image_path else "legacy",
),
title="Maisaka Replyer Prompt",
border_style="bright_yellow",
padding=(0, 1),
)
)
def message_factory(_client: object) -> List[Message]:
return request_messages
llm_started_at = time.perf_counter()
try:
generation_result = await self.express_model.generate_response_with_messages(
message_factory=message_factory
)
except Exception as exc:
logger.exception("Maisaka 回复器调用失败")
result.error_message = str(exc)
result.metrics = GenerationMetrics(
prompt_ms=prompt_ms,
llm_ms=round((time.perf_counter() - llm_started_at) * 1000, 2),
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
llm_ms = round((time.perf_counter() - llm_started_at) * 1000, 2)
response_text = (generation_result.response or "").strip()
result.success = bool(response_text)
result.completion = LLMCompletionResult(
request_prompt=request_prompt,
response_text=response_text,
reasoning_text=generation_result.reasoning or "",
model_name=generation_result.model_name or "",
tool_calls=generation_result.tool_calls or [],
prompt_tokens=generation_result.prompt_tokens,
completion_tokens=generation_result.completion_tokens,
total_tokens=generation_result.total_tokens,
)
result.metrics = GenerationMetrics(
prompt_ms=prompt_ms,
llm_ms=llm_ms,
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
stage_logs=[
f"prompt: {prompt_ms} ms",
f"llm: {llm_ms} ms",
],
)
if show_replyer_reasoning and result.completion.reasoning_text:
logger.info(f"Maisaka 回复器思考内容:\n{result.completion.reasoning_text}")
if not result.success:
result.error_message = "回复器返回了空内容"
logger.warning("Maisaka 回复器返回了空内容")
return finalize(False)
logger.info(
f"Maisaka 回复器生成成功: 回复文本={response_text!r} "
f"总耗时毫秒={result.metrics.overall_ms} "
f"已选表达编号={result.selected_expression_ids!r}"
)
result.text_fragments = [response_text]
return finalize(True)

View File

@@ -1,8 +1,9 @@
import random
import time
from dataclasses import dataclass, field
from datetime import datetime
from typing import Awaitable, Callable, Dict, List, Optional, Tuple
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
import random
from rich.console import Group, RenderableType
from rich.panel import Panel
@@ -20,13 +21,10 @@ from src.common.data_models.reply_generation_data_models import (
build_reply_monitor_detail,
)
from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt
from src.common.utils.utils_session import SessionUtils
from src.config.config import global_config
from src.core.types import ActionInfo
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.services.llm_service import LLMServiceClient
from src.maisaka.context_messages import (
AssistantMessage,
LLMContextMessage,
@@ -34,8 +32,8 @@ from src.maisaka.context_messages import (
SessionBackedMessage,
ToolResultMessage,
)
from src.maisaka.display.prompt_cli_renderer import PromptCLIVisualizer
from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
from src.maisaka.prompt_cli_renderer import PromptCLIVisualizer
from src.plugin_runtime.hook_payloads import serialize_prompt_messages
from .maisaka_expression_selector import maisaka_expression_selector
@@ -51,17 +49,24 @@ class MaisakaReplyContext:
selected_expression_ids: List[int] = field(default_factory=list)
class MaisakaReplyGenerator:
"""生成 Maisaka 的最终可见回复(多模态管线)"""
class BaseMaisakaReplyGenerator:
"""Maisaka replyer 的共享实现"""
def __init__(
self,
*,
chat_stream: Optional[BotChatSession] = None,
request_type: str = "maisaka_replyer",
llm_client_cls: Any,
load_prompt_func: Callable[..., str],
enable_visual_message: bool,
) -> None:
self.chat_stream = chat_stream
self.request_type = request_type
self.express_model = LLMServiceClient(
self._llm_client_cls = llm_client_cls
self._load_prompt = load_prompt_func
self._enable_visual_message = enable_visual_message
self.express_model = llm_client_cls(
task_name="replyer",
request_type=request_type,
)
@@ -232,7 +237,7 @@ class MaisakaReplyGenerator:
session_id = self._resolve_session_id(stream_id)
try:
system_prompt = load_prompt(
system_prompt = self._load_prompt(
"maisaka_replyer",
bot_name=global_config.bot.nickname,
group_chat_attention_block=self._build_group_chat_attention_block(session_id),
@@ -255,17 +260,20 @@ class MaisakaReplyGenerator:
return f"{system_prompt}\n\n" + "\n\n".join(sections)
def _build_reply_instruction(self) -> str:
return "请自然地回复。不要输出多余说明、括号、at 或额外标记,只输出实际要发送的内容。"
return "请自然地回复。不要输出多余说明、括号、@ 或额外标记,只输出实际要发送的内容。"
def _build_multimodal_user_message(
def _build_visual_user_message(
self,
message: SessionBackedMessage,
) -> Optional[Message]:
if not self._enable_visual_message:
return None
raw_message = clone_message_sequence(message.raw_message)
if not raw_message.components:
raw_message = MessageSequence([TextComponent(message.processed_plain_text)])
multimodal_message = SessionBackedMessage(
visual_message = SessionBackedMessage(
raw_message=raw_message,
visible_text=message.processed_plain_text,
timestamp=message.timestamp,
@@ -273,7 +281,7 @@ class MaisakaReplyGenerator:
original_message=message.original_message,
source_kind=message.source_kind,
)
return multimodal_message.to_llm_message()
return visual_message.to_llm_message()
def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
bot_nickname = global_config.bot.nickname.strip() or "Bot"
@@ -292,9 +300,9 @@ class MaisakaReplyGenerator:
)
continue
multimodal_message = self._build_multimodal_user_message(message)
if multimodal_message is not None:
messages.append(multimodal_message)
visual_message = self._build_visual_user_message(message)
if visual_message is not None:
messages.append(visual_message)
continue
for speaker_name, content_body in self._split_user_message_segments(message.processed_plain_text):
@@ -398,7 +406,6 @@ class MaisakaReplyGenerator:
selected_expression_ids: Optional[List[int]] = None,
sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None,
) -> Tuple[bool, ReplyGenerationResult]:
def finalize(success_value: bool) -> Tuple[bool, ReplyGenerationResult]:
result.monitor_detail = build_reply_monitor_detail(result)
return success_value, result
@@ -460,7 +467,7 @@ class MaisakaReplyGenerator:
)
logger.info(
f"回复上下文完成: 流={stream_id} 已选表达={result.selected_expression_ids!r}"
f"回复上下文完成 流={stream_id} 已选表达={result.selected_expression_ids!r}"
)
prompt_started_at = time.perf_counter()
@@ -556,7 +563,7 @@ class MaisakaReplyGenerator:
return finalize(False)
logger.info(
f"Maisaka 回复器生成成功: 文本={response_text!r} "
f"Maisaka 回复器生成成功 文本={response_text!r} "
f"总耗时ms={result.metrics.overall_ms} 已选表达={result.selected_expression_ids!r}"
)
if show_replyer_prompt or show_replyer_reasoning:

View File

@@ -1,21 +0,0 @@
from typing import Type
from src.config.config import global_config
def get_maisaka_replyer_class() -> Type[object]:
"""根据配置返回 Maisaka replyer 类。"""
generator_type = get_maisaka_replyer_generator_type()
if generator_type == "multimodal":
from .maisaka_generator_multi import MaisakaReplyGenerator
return MaisakaReplyGenerator
from .maisaka_generator import MaisakaReplyGenerator
return MaisakaReplyGenerator
def get_maisaka_replyer_generator_type() -> str:
"""返回当前配置的 Maisaka replyer 生成器类型。"""
return "multimodal" if global_config.visual.multimodal_replyer else "legacy"

View File

@@ -1,12 +1,11 @@
from typing import TYPE_CHECKING, Any, Dict, Optional
from src.chat.message_receive.chat_manager import BotChatSession, chat_manager as _chat_manager
from src.chat.replyer.maisaka_replyer_factory import (
get_maisaka_replyer_class,
get_maisaka_replyer_generator_type,
)
from src.config.config import global_config
from src.common.logger import get_logger
from .maisaka_generator import MaisakaReplyGenerator
if TYPE_CHECKING:
from src.chat.replyer.group_generator import DefaultReplyer
from src.chat.replyer.private_generator import PrivateReplyer
@@ -20,6 +19,11 @@ class ReplyerManager:
def __init__(self) -> None:
self._repliers: Dict[str, Any] = {}
@staticmethod
def _get_maisaka_generator_type() -> str:
"""返回当前配置下 Maisaka replyer 的消息模式。"""
return "multimodal" if global_config.visual.multimodal_replyer else "legacy"
def get_replyer(
self,
chat_stream: Optional[BotChatSession] = None,
@@ -33,7 +37,7 @@ class ReplyerManager:
logger.warning("[ReplyerManager] 缺少 stream_id无法获取 replyer")
return None
generator_type = get_maisaka_replyer_generator_type() if replyer_type == "maisaka" else ""
generator_type = self._get_maisaka_generator_type() if replyer_type == "maisaka" else ""
cache_key = f"{replyer_type}:{generator_type}:{stream_id}"
if cache_key in self._repliers:
logger.info(f"[ReplyerManager] 命中缓存 replyer: cache_key={cache_key}")
@@ -50,13 +54,14 @@ class ReplyerManager:
)
try:
maisaka_replyer_class = get_maisaka_replyer_class()
replyer = maisaka_replyer_class(
chat_stream=target_stream,
request_type=request_type,
)
if replyer_type == "maisaka":
replyer = MaisakaReplyGenerator(
chat_stream=target_stream,
request_type=request_type,
)
else:
logger.warning(f"[ReplyerManager] 不支持的 replyer_type={replyer_type}")
return None
except Exception:
logger.exception(f"[ReplyerManager] 创建 replyer 失败: cache_key={cache_key}")
raise

View File

@@ -55,7 +55,7 @@ BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
MMC_VERSION: str = "1.0.0"
CONFIG_VERSION: str = "8.5.5"
MODEL_CONFIG_VERSION: str = "1.13.1"
MODEL_CONFIG_VERSION: str = "1.14.0"
logger = get_logger("config")

View File

@@ -307,6 +307,15 @@ class ModelInfo(ConfigBase):
)
"""强制流式输出模式 (若模型不支持非流式输出, 请设置为true启用强制流式输出, 默认值为false)"""
visual: bool = Field(
default=False,
json_schema_extra={
"x-widget": "switch",
"x-icon": "image",
},
)
"""是否为多模态模型。开启后表示该模型支持视觉输入。"""
extra_params: dict[str, Any] = Field(
default_factory=dict,
json_schema_extra={

View File

@@ -228,6 +228,7 @@ def serialize_model_info_snapshot(model_info: ModelInfo) -> dict[str, Any]:
"model_identifier": model_info.model_identifier,
"name": model_info.name,
"temperature": model_info.temperature,
"visual": model_info.visual,
}
@@ -244,6 +245,7 @@ def deserialize_model_info_snapshot(raw_model_info: Any) -> ModelInfo:
model_identifier=str(raw_model_info.get("model_identifier") or ""),
name=str(raw_model_info.get("name") or ""),
temperature=raw_model_info.get("temperature"),
visual=bool(raw_model_info.get("visual", False)),
)

View File

@@ -18,7 +18,7 @@ from src.common.message_server.server import Server, get_global_server
from src.common.remote import TelemetryHeartBeatTask
from src.config.config import config_manager, global_config
from src.manager.async_task_manager import async_task_manager
from src.maisaka.stage_status_board import disable_stage_status_board, enable_stage_status_board
from src.maisaka.display.stage_status_board import disable_stage_status_board, enable_stage_status_board
from src.plugin_runtime.integration import get_plugin_runtime_manager
from src.prompt.prompt_manager import prompt_manager
from src.services.memory_flow_service import memory_automation_service

View File

@@ -32,7 +32,7 @@ from src.services.llm_service import LLMServiceClient
from .builtin_tool import get_builtin_tools
from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
from .history_utils import drop_orphan_tool_results
from .prompt_cli_renderer import PromptCLIVisualizer
from .display.prompt_cli_renderer import PromptCLIVisualizer
TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"}

View File

@@ -0,0 +1,33 @@
"""Maisaka 展示模块。"""
from .display_utils import (
build_tool_call_summary_lines,
format_token_count,
format_tool_call_for_display,
get_request_panel_style,
get_role_badge_label,
get_role_badge_style,
)
from .prompt_cli_renderer import PromptCLIVisualizer
from .prompt_preview_logger import PromptPreviewLogger
from .stage_status_board import (
disable_stage_status_board,
enable_stage_status_board,
remove_stage_status,
update_stage_status,
)
__all__ = [
"PromptCLIVisualizer",
"PromptPreviewLogger",
"build_tool_call_summary_lines",
"disable_stage_status_board",
"enable_stage_status_board",
"format_token_count",
"format_tool_call_for_display",
"get_request_panel_style",
"get_role_badge_label",
"get_role_badge_style",
"remove_stage_status",
"update_stage_status",
]

View File

@@ -4,15 +4,15 @@ from typing import Any
_REQUEST_PANEL_STYLE_MAP: dict[str, tuple[str, str]] = {
"planner": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - \u5bf9\u8bdd\u5355\u6b65", "green"),
"timing_gate": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - Timing Gate \u5b50\u4ee3\u7406", "bright_magenta"),
"replyer": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u56de\u590d\u5668 Prompt", "bright_yellow"),
"planner": ("MaiSaka 大模型请求 - 对话单步", "green"),
"timing_gate": ("MaiSaka 大模型请求 - Timing Gate 子代理", "bright_magenta"),
"replyer": ("MaiSaka 回复器 Prompt", "bright_yellow"),
"emotion": ("MaiSaka Emotion Tool Prompt", "bright_cyan"),
"sub_agent": ("\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - \u5b50\u4ee3\u7406", "bright_blue"),
"sub_agent": ("MaiSaka 大模型请求 - 子代理", "bright_blue"),
}
_DEFAULT_REQUEST_PANEL_STYLE: tuple[str, str] = (
"\u004d\u0061\u0069\u0053\u0061\u006b\u0061 \u5927\u6a21\u578b\u8bf7\u6c42 - \u5bf9\u8bdd\u5355\u6b65",
"MaiSaka 大模型请求 - 对话单步",
"cyan",
)
@@ -24,10 +24,10 @@ _ROLE_BADGE_STYLE_MAP: dict[str, str] = {
}
_ROLE_BADGE_LABEL_MAP: dict[str, str] = {
"system": "\u7cfb\u7edf",
"user": "\u7528\u6237",
"assistant": "\u52a9\u624b",
"tool": "\u5de5\u5177",
"system": "系统",
"user": "用户",
"assistant": "助手",
"tool": "工具",
}
@@ -55,7 +55,7 @@ def get_role_badge_style(role: str) -> str:
def get_role_badge_label(role: str) -> str:
"""返回角色标签对应的展示文案。"""
return _ROLE_BADGE_LABEL_MAP.get(role, "\u672a\u77e5")
return _ROLE_BADGE_LABEL_MAP.get(role, "未知")
def format_tool_call_for_display(tool_call: Any) -> dict[str, Any]:

View File

@@ -181,6 +181,16 @@ class PromptCLIVisualizer:
padding=(0, 1),
)
@staticmethod
def _extract_image_pair(item: Any) -> tuple[str, str] | None:
"""兼容图片片段被序列化为 tuple 或 list 的两种形式。"""
if isinstance(item, (tuple, list)) and len(item) == 2:
image_format, image_base64 = item
if isinstance(image_format, str) and isinstance(image_base64, str):
return image_format, image_base64
return None
@classmethod
def _render_message_content(cls, content: Any, settings: PromptImageDisplaySettings) -> RenderableType:
if isinstance(content, str):
@@ -192,11 +202,11 @@ class PromptCLIVisualizer:
if isinstance(item, str):
parts.append(Text(item))
continue
if isinstance(item, tuple) and len(item) == 2:
image_format, image_base64 = item
if isinstance(image_format, str) and isinstance(image_base64, str):
parts.append(cls._render_image_item(image_format, image_base64, settings))
continue
image_pair = cls._extract_image_pair(item)
if image_pair is not None:
image_format, image_base64 = image_pair
parts.append(cls._render_image_item(image_format, image_base64, settings))
continue
if isinstance(item, dict) and item.get("type") == "text" and isinstance(item.get("text"), str):
parts.append(Text(item["text"]))
else:
@@ -218,8 +228,9 @@ class PromptCLIVisualizer:
if isinstance(item, str):
parts.append(item)
continue
if isinstance(item, tuple) and len(item) == 2:
image_format, image_base64 = item
image_pair = cls._extract_image_pair(item)
if image_pair is not None:
image_format, image_base64 = image_pair
approx_size = max(0, len(str(image_base64)) * 3 // 4)
parts.append(f"[图片 image/{image_format} {approx_size} B]")
continue
@@ -395,8 +406,9 @@ class PromptCLIVisualizer:
if isinstance(item, str):
parts.append(f"<pre>{html.escape(item)}</pre>")
continue
if isinstance(item, tuple) and len(item) == 2:
image_format, image_base64 = item
image_pair = cls._extract_image_pair(item)
if image_pair is not None:
image_format, image_base64 = image_pair
image_html = cls._render_image_item_html(str(image_format), str(image_base64))
parts.append(image_html)
continue

View File

@@ -8,6 +8,7 @@ from pathlib import Path
from typing import Dict
from uuid import uuid4
class PromptPreviewLogger:
"""负责保存 Maisaka Prompt 预览文件并控制目录容量。"""

View File

@@ -8,8 +8,8 @@ from typing import Any
import json
import os
import sys
import traceback
import time
import traceback
def _clear_screen() -> None:

View File

@@ -34,10 +34,10 @@ from src.plugin_runtime.hook_payloads import deserialize_prompt_messages
from .chat_loop_service import ChatResponse, MaisakaChatLoopService
from .context_messages import LLMContextMessage
from .display_utils import build_tool_call_summary_lines, format_token_count
from .prompt_cli_renderer import PromptCLIVisualizer
from .display.display_utils import build_tool_call_summary_lines, format_token_count
from .display.prompt_cli_renderer import PromptCLIVisualizer
from .display.stage_status_board import remove_stage_status, update_stage_status
from .reasoning_engine import MaisakaReasoningEngine
from .stage_status_board import remove_stage_status, update_stage_status
from .tool_provider import MaisakaBuiltinToolProvider
logger = get_logger("maisaka_runtime")

View File

@@ -267,6 +267,46 @@ def _parse_data_url_image(image_url: str) -> Tuple[str, str]:
return image_format, image_base64
def _append_image_content(message_builder: MessageBuilder, content_item: Any) -> bool:
"""向消息构建器追加图片片段。
兼容两种输入格式:
1. 旧序列化格式中的 `(image_format, image_base64)` 元组。
2. 标准字典片段中的 Data URL 或 `image_format`/`image_base64` 字段。
"""
if isinstance(content_item, (tuple, list)) and len(content_item) == 2:
image_format, image_base64 = content_item
if not isinstance(image_format, str) or not isinstance(image_base64, str):
raise ValueError("图片元组片段必须包含字符串类型的 image_format 和 image_base64")
message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
return True
if not isinstance(content_item, dict):
return False
part_type = str(content_item.get("type", "text")).strip().lower()
if part_type not in {"image", "image_url", "input_image"}:
return False
image_url = content_item.get("image_url")
if isinstance(image_url, dict):
image_url = image_url.get("url")
if isinstance(image_url, str):
image_format, image_base64 = _parse_data_url_image(image_url)
message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
return True
image_format = content_item.get("image_format")
image_base64 = content_item.get("image_base64")
if isinstance(image_format, str) and isinstance(image_base64, str):
message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
return True
raise ValueError("图片片段缺少可识别的图片数据")
def _append_content_parts(message_builder: MessageBuilder, content: Any) -> None:
"""将原始消息内容追加到内部消息构建器。
@@ -293,8 +333,10 @@ def _append_content_parts(message_builder: MessageBuilder, content: Any) -> None
if isinstance(content_item, str):
message_builder.add_text_content(content_item)
continue
if _append_image_content(message_builder, content_item):
continue
if not isinstance(content_item, dict):
raise ValueError("消息内容列表中仅支持字符串或字典片段")
raise ValueError("消息内容列表中仅支持字符串、图片元组或字典片段")
part_type = str(content_item.get("type", "text")).strip().lower()
if part_type == "text":
@@ -304,22 +346,6 @@ def _append_content_parts(message_builder: MessageBuilder, content: Any) -> None
message_builder.add_text_content(text_content)
continue
if part_type in {"image", "image_url", "input_image"}:
image_url = content_item.get("image_url")
if isinstance(image_url, dict):
image_url = image_url.get("url")
if isinstance(image_url, str):
image_format, image_base64 = _parse_data_url_image(image_url)
message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
continue
image_format = content_item.get("image_format")
image_base64 = content_item.get("image_base64")
if isinstance(image_format, str) and isinstance(image_base64, str):
message_builder.add_image_content(image_format=image_format, image_base64=image_base64)
continue
raise ValueError("图片片段缺少可识别的图片数据")
raise ValueError(f"不支持的消息片段类型: {part_type}")