feat:优化对多模态/非多模态replyer的配置
This commit is contained in:
@@ -33,3 +33,34 @@ def test_legacy_learning_list_with_numeric_fourth_column_is_migrated():
|
||||
"enable_jargon_learning": False,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_visual_multimodal_replyer_is_migrated_to_replyer_mode() -> None:
|
||||
payload = {
|
||||
"visual": {
|
||||
"multimodal_replyer": True,
|
||||
}
|
||||
}
|
||||
|
||||
result = try_migrate_legacy_bot_config_dict(payload)
|
||||
|
||||
assert result.migrated is True
|
||||
assert "visual.multimodal_replyer_moved_to_visual.replyer_mode" in result.reason
|
||||
assert result.data["visual"]["replyer_mode"] == "multimodal"
|
||||
assert "multimodal_replyer" not in result.data["visual"]
|
||||
|
||||
|
||||
def test_chat_replyer_generator_type_is_migrated_to_replyer_mode() -> None:
|
||||
payload = {
|
||||
"chat": {
|
||||
"replyer_generator_type": "legacy",
|
||||
},
|
||||
"visual": {},
|
||||
}
|
||||
|
||||
result = try_migrate_legacy_bot_config_dict(payload)
|
||||
|
||||
assert result.migrated is True
|
||||
assert "chat.replyer_generator_type_moved_to_visual.replyer_mode" in result.reason
|
||||
assert result.data["visual"]["replyer_mode"] == "text"
|
||||
assert "replyer_generator_type" not in result.data["chat"]
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from src.chat.message_receive.chat_manager import BotChatSession
|
||||
from src.common.prompt_i18n import load_prompt
|
||||
from src.config.config import global_config
|
||||
from src.maisaka.context_messages import SessionBackedMessage
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
|
||||
from .maisaka_generator_base import BaseMaisakaReplyGenerator
|
||||
@@ -26,9 +24,6 @@ class MaisakaReplyGenerator(BaseMaisakaReplyGenerator):
|
||||
request_type=request_type,
|
||||
llm_client_cls=llm_client_cls or LLMServiceClient,
|
||||
load_prompt_func=load_prompt_func or load_prompt,
|
||||
enable_visual_message=(
|
||||
global_config.visual.multimodal_replyer
|
||||
if enable_visual_message is None
|
||||
else enable_visual_message
|
||||
),
|
||||
enable_visual_message=enable_visual_message,
|
||||
replyer_mode=global_config.visual.replyer_mode,
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Literal, Optional, Tuple
|
||||
|
||||
import random
|
||||
|
||||
@@ -23,6 +23,7 @@ from src.common.data_models.reply_generation_data_models import (
|
||||
from src.common.logger import get_logger
|
||||
from src.common.utils.utils_session import SessionUtils
|
||||
from src.config.config import global_config
|
||||
from src.config.model_configs import ModelInfo
|
||||
from src.core.types import ActionInfo
|
||||
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
|
||||
from src.maisaka.context_messages import (
|
||||
@@ -59,13 +60,15 @@ class BaseMaisakaReplyGenerator:
|
||||
request_type: str = "maisaka_replyer",
|
||||
llm_client_cls: Any,
|
||||
load_prompt_func: Callable[..., str],
|
||||
enable_visual_message: bool,
|
||||
enable_visual_message: Optional[bool],
|
||||
replyer_mode: Literal["text", "multimodal", "auto"],
|
||||
) -> None:
|
||||
self.chat_stream = chat_stream
|
||||
self.request_type = request_type
|
||||
self._llm_client_cls = llm_client_cls
|
||||
self._load_prompt = load_prompt_func
|
||||
self._enable_visual_message = enable_visual_message
|
||||
self._replyer_mode = replyer_mode
|
||||
self.express_model = llm_client_cls(
|
||||
task_name="replyer",
|
||||
request_type=request_type,
|
||||
@@ -265,8 +268,9 @@ class BaseMaisakaReplyGenerator:
|
||||
def _build_visual_user_message(
|
||||
self,
|
||||
message: SessionBackedMessage,
|
||||
enable_visual_message: bool,
|
||||
) -> Optional[Message]:
|
||||
if not self._enable_visual_message:
|
||||
if not enable_visual_message:
|
||||
return None
|
||||
|
||||
raw_message = clone_message_sequence(message.raw_message)
|
||||
@@ -283,7 +287,11 @@ class BaseMaisakaReplyGenerator:
|
||||
)
|
||||
return visual_message.to_llm_message()
|
||||
|
||||
def _build_history_messages(self, chat_history: List[LLMContextMessage]) -> List[Message]:
|
||||
def _build_history_messages(
|
||||
self,
|
||||
chat_history: List[LLMContextMessage],
|
||||
enable_visual_message: bool,
|
||||
) -> List[Message]:
|
||||
bot_nickname = global_config.bot.nickname.strip() or "Bot"
|
||||
default_user_name = global_config.maisaka.cli_user_name.strip() or "User"
|
||||
messages: List[Message] = []
|
||||
@@ -300,7 +308,7 @@ class BaseMaisakaReplyGenerator:
|
||||
)
|
||||
continue
|
||||
|
||||
visual_message = self._build_visual_user_message(message)
|
||||
visual_message = self._build_visual_user_message(message, enable_visual_message)
|
||||
if visual_message is not None:
|
||||
messages.append(visual_message)
|
||||
continue
|
||||
@@ -337,6 +345,7 @@ class BaseMaisakaReplyGenerator:
|
||||
reply_reason: str,
|
||||
expression_habits: str = "",
|
||||
stream_id: Optional[str] = None,
|
||||
enable_visual_message: bool = False,
|
||||
) -> List[Message]:
|
||||
messages: List[Message] = []
|
||||
system_prompt = self._build_system_prompt(
|
||||
@@ -348,10 +357,21 @@ class BaseMaisakaReplyGenerator:
|
||||
instruction = self._build_reply_instruction()
|
||||
|
||||
messages.append(MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build())
|
||||
messages.extend(self._build_history_messages(chat_history))
|
||||
messages.extend(self._build_history_messages(chat_history, enable_visual_message))
|
||||
messages.append(MessageBuilder().set_role(RoleType.User).add_text_content(instruction).build())
|
||||
return messages
|
||||
|
||||
def _resolve_enable_visual_message(self, model_info: Optional[ModelInfo] = None) -> bool:
|
||||
if self._enable_visual_message is not None:
|
||||
return self._enable_visual_message
|
||||
if self._replyer_mode == "multimodal":
|
||||
if model_info is not None and not model_info.visual:
|
||||
raise ValueError(f"replyer_mode=multimodal,但模型 '{model_info.name}' 未开启 visual,无法使用多模态 replyer")
|
||||
return True
|
||||
if self._replyer_mode == "text":
|
||||
return False
|
||||
return bool(model_info.visual) if model_info is not None else False
|
||||
|
||||
def _resolve_session_id(self, stream_id: Optional[str]) -> str:
|
||||
if stream_id:
|
||||
return stream_id
|
||||
@@ -494,7 +514,19 @@ class BaseMaisakaReplyGenerator:
|
||||
show_replyer_prompt = bool(getattr(global_config.debug, "show_replyer_prompt", False))
|
||||
show_replyer_reasoning = bool(getattr(global_config.debug, "show_replyer_reasoning", False))
|
||||
|
||||
def message_factory(_client: object) -> List[Message]:
|
||||
def message_factory(_client: object, model_info: Optional[ModelInfo] = None) -> List[Message]:
|
||||
nonlocal prompt_ms, prompt_preview, request_messages
|
||||
prompt_started_at = time.perf_counter()
|
||||
request_messages = self._build_request_messages(
|
||||
chat_history=filtered_history,
|
||||
reply_message=reply_message,
|
||||
reply_reason=reply_reason or "",
|
||||
expression_habits=merged_expression_habits,
|
||||
stream_id=stream_id,
|
||||
enable_visual_message=self._resolve_enable_visual_message(model_info),
|
||||
)
|
||||
prompt_ms = round((time.perf_counter() - prompt_started_at) * 1000, 2)
|
||||
prompt_preview = PromptCLIVisualizer._build_prompt_dump_text(request_messages)
|
||||
return request_messages
|
||||
|
||||
result.completion.request_prompt = prompt_preview
|
||||
@@ -531,6 +563,8 @@ class BaseMaisakaReplyGenerator:
|
||||
)
|
||||
return finalize(False)
|
||||
|
||||
result.completion.request_prompt = prompt_preview
|
||||
result.request_messages = serialize_prompt_messages(request_messages)
|
||||
llm_ms = round((time.perf_counter() - llm_started_at) * 1000, 2)
|
||||
response_text = (generation_result.response or "").strip()
|
||||
result.success = bool(response_text)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from src.chat.message_receive.chat_manager import BotChatSession, chat_manager as _chat_manager
|
||||
from src.config.config import global_config
|
||||
@@ -6,10 +6,6 @@ from src.common.logger import get_logger
|
||||
|
||||
from .maisaka_generator import MaisakaReplyGenerator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.chat.replyer.group_generator import DefaultReplyer
|
||||
from src.chat.replyer.private_generator import PrivateReplyer
|
||||
|
||||
logger = get_logger("ReplyerManager")
|
||||
|
||||
|
||||
@@ -22,7 +18,7 @@ class ReplyerManager:
|
||||
@staticmethod
|
||||
def _get_maisaka_generator_type() -> str:
|
||||
"""返回当前配置下 Maisaka replyer 的消息模式。"""
|
||||
return "multimodal" if global_config.visual.multimodal_replyer else "legacy"
|
||||
return global_config.visual.replyer_mode
|
||||
|
||||
def get_replyer(
|
||||
self,
|
||||
@@ -30,7 +26,7 @@ class ReplyerManager:
|
||||
chat_id: Optional[str] = None,
|
||||
request_type: str = "replyer",
|
||||
replyer_type: str = "default",
|
||||
) -> Optional["DefaultReplyer | PrivateReplyer | Any"]:
|
||||
) -> Optional[MaisakaReplyGenerator]:
|
||||
"""按会话和 replyer 类型获取实例。"""
|
||||
stream_id = chat_stream.session_id if chat_stream else chat_id
|
||||
if not stream_id:
|
||||
|
||||
@@ -14,7 +14,6 @@ from src.llm_models.payload_content.resp_format import RespFormat
|
||||
from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.llm_models.model_client.base_client import BaseClient
|
||||
from src.llm_models.payload_content.message import Message
|
||||
|
||||
|
||||
@@ -24,7 +23,7 @@ PromptMessage: TypeAlias = Dict[str, Any]
|
||||
PromptInput: TypeAlias = str | List[PromptMessage]
|
||||
"""统一的提示输入类型。"""
|
||||
|
||||
MessageFactory: TypeAlias = Callable[["BaseClient"], List["Message"]]
|
||||
MessageFactory: TypeAlias = Callable[..., List["Message"]]
|
||||
"""统一的消息工厂类型。"""
|
||||
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config"
|
||||
BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
|
||||
MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
|
||||
MMC_VERSION: str = "1.0.0"
|
||||
CONFIG_VERSION: str = "8.5.5"
|
||||
CONFIG_VERSION: str = "8.6.0"
|
||||
MODEL_CONFIG_VERSION: str = "1.14.0"
|
||||
|
||||
logger = get_logger("config")
|
||||
|
||||
@@ -253,6 +253,8 @@ def _migrate_target_item_list(parent: dict[str, Any], key: str) -> bool:
|
||||
raw = _as_list(parent.get(key))
|
||||
if raw is None:
|
||||
return False
|
||||
if not raw:
|
||||
return False
|
||||
if raw and all(isinstance(i, dict) for i in raw):
|
||||
return False
|
||||
targets: list[dict[str, str]] = []
|
||||
@@ -285,18 +287,18 @@ def _migrate_extra_prompt_list(exp: dict[str, Any], key: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _parse_multimodal_replyer(v: Any) -> Optional[bool]:
|
||||
def _parse_replyer_mode(v: Any) -> Optional[str]:
|
||||
"""兼容旧 replyer_generator_type 到布尔开关的迁移。"""
|
||||
if isinstance(v, bool):
|
||||
return v
|
||||
return "multimodal" if v else "text"
|
||||
if not isinstance(v, str):
|
||||
return None
|
||||
|
||||
normalized_value = v.strip().lower()
|
||||
if normalized_value == "multimodal":
|
||||
return True
|
||||
if normalized_value in {"text", "multimodal", "auto"}:
|
||||
return normalized_value
|
||||
if normalized_value == "legacy":
|
||||
return False
|
||||
return "text"
|
||||
return None
|
||||
|
||||
|
||||
@@ -403,14 +405,23 @@ def try_migrate_legacy_bot_config_dict(data: dict[str, Any]) -> MigrationResult:
|
||||
migrated_any = True
|
||||
reasons.append("chat.multimodal_planner_moved_to_visual.multimodal_planner")
|
||||
|
||||
if visual is not None and "multimodal_replyer" in visual:
|
||||
replyer_mode = _parse_replyer_mode(visual.get("multimodal_replyer"))
|
||||
if "replyer_mode" not in visual and replyer_mode is not None:
|
||||
visual["replyer_mode"] = replyer_mode
|
||||
if "replyer_mode" in visual:
|
||||
visual.pop("multimodal_replyer", None)
|
||||
migrated_any = True
|
||||
reasons.append("visual.multimodal_replyer_moved_to_visual.replyer_mode")
|
||||
|
||||
if visual is not None and "replyer_generator_type" in chat:
|
||||
multimodal_replyer = _parse_multimodal_replyer(chat["replyer_generator_type"])
|
||||
if "multimodal_replyer" not in visual and multimodal_replyer is not None:
|
||||
visual["multimodal_replyer"] = multimodal_replyer
|
||||
if "multimodal_replyer" in visual:
|
||||
replyer_mode = _parse_replyer_mode(chat["replyer_generator_type"])
|
||||
if "replyer_mode" not in visual and replyer_mode is not None:
|
||||
visual["replyer_mode"] = replyer_mode
|
||||
if "replyer_mode" in visual:
|
||||
chat.pop("replyer_generator_type", None)
|
||||
migrated_any = True
|
||||
reasons.append("chat.replyer_generator_type_moved_to_visual.multimodal_replyer")
|
||||
reasons.append("chat.replyer_generator_type_moved_to_visual.replyer_mode")
|
||||
|
||||
maisaka = _as_dict(data.get("maisaka"))
|
||||
mem = _as_dict(data.get("memory"))
|
||||
|
||||
@@ -152,16 +152,16 @@ class VisualConfig(ConfigBase):
|
||||
"x-icon": "image",
|
||||
},
|
||||
)
|
||||
"""是否直接输入图片"""
|
||||
"""是否启用多模态planner"""
|
||||
|
||||
multimodal_replyer: bool = Field(
|
||||
default=False,
|
||||
replyer_mode: Literal["text", "multimodal", "auto"] = Field(
|
||||
default="auto",
|
||||
json_schema_extra={
|
||||
"x-widget": "switch",
|
||||
"x-widget": "select",
|
||||
"x-icon": "git-branch",
|
||||
},
|
||||
)
|
||||
"""是否启用 Maisaka 多模态 replyer 生成器"""
|
||||
"""回复器模式,auto根据模型信息自动选择,text为纯文本模式,multimodal为多模态模式"""
|
||||
|
||||
visual_style: str = Field(
|
||||
default="请用中文描述这张图片的内容。如果有文字,请把文字描述概括出来,请留意其主题,直观感受,输出为一段平文本,最多30字,请注意不要分点,就输出一段文本",
|
||||
|
||||
@@ -3,6 +3,7 @@ from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -910,7 +911,11 @@ class LLMOrchestrator:
|
||||
model_info, api_provider, client = self._select_model(exclude_models=failed_models_this_request)
|
||||
message_list = []
|
||||
if message_factory:
|
||||
message_list = message_factory(client)
|
||||
parameter_count = len(inspect.signature(message_factory).parameters)
|
||||
if parameter_count >= 2:
|
||||
message_list = message_factory(client, model_info)
|
||||
else:
|
||||
message_list = message_factory(client)
|
||||
try:
|
||||
request = self._build_client_request(
|
||||
request_type=request_type,
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
import math
|
||||
from random import sample
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
|
||||
from PIL import Image as PILImage
|
||||
from PIL import ImageDraw, ImageFont
|
||||
@@ -20,12 +20,14 @@ from src.common.logger import get_logger
|
||||
from src.config.config import global_config
|
||||
from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec
|
||||
from src.llm_models.payload_content.resp_format import RespFormat, RespFormatType
|
||||
from src.llm_models.payload_content.message import MessageBuilder, RoleType
|
||||
from src.maisaka.context_messages import (
|
||||
LLMContextMessage,
|
||||
ReferenceMessage,
|
||||
ReferenceMessageType,
|
||||
SessionBackedMessage,
|
||||
)
|
||||
from src.plugin_runtime.hook_payloads import serialize_prompt_messages
|
||||
|
||||
from .context import BuiltinToolRuntimeContext
|
||||
|
||||
@@ -270,6 +272,7 @@ def _build_send_emoji_prompt_preview(
|
||||
def _build_send_emoji_monitor_detail(
|
||||
*,
|
||||
prompt_text: str = "",
|
||||
request_messages: Optional[list[dict[str, Any]]] = None,
|
||||
reasoning_text: str = "",
|
||||
output_text: str = "",
|
||||
metrics: Optional[Dict[str, Any]] = None,
|
||||
@@ -280,6 +283,8 @@ def _build_send_emoji_monitor_detail(
|
||||
detail: Dict[str, Any] = {}
|
||||
if prompt_text.strip():
|
||||
detail["prompt_text"] = prompt_text.strip()
|
||||
if isinstance(request_messages, list) and request_messages:
|
||||
detail["request_messages"] = request_messages
|
||||
if reasoning_text.strip():
|
||||
detail["reasoning_text"] = reasoning_text.strip()
|
||||
if output_text.strip():
|
||||
@@ -394,6 +399,16 @@ async def _select_emoji_with_sub_agent(
|
||||
grid_columns=grid_columns,
|
||||
sampled_emojis=sampled_emojis,
|
||||
)
|
||||
request_messages = [
|
||||
MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build(),
|
||||
]
|
||||
prompt_llm_message = prompt_message.to_llm_message()
|
||||
if prompt_llm_message is not None:
|
||||
request_messages.append(prompt_llm_message)
|
||||
candidate_llm_message = candidate_message.to_llm_message()
|
||||
if candidate_llm_message is not None:
|
||||
request_messages.append(candidate_llm_message)
|
||||
serialized_request_messages = serialize_prompt_messages(request_messages)
|
||||
|
||||
selection_started_at = datetime.now()
|
||||
response = await tool_ctx.runtime.run_sub_agent(
|
||||
@@ -422,6 +437,7 @@ async def _select_emoji_with_sub_agent(
|
||||
if selection_metadata is not None:
|
||||
selection_metadata["monitor_detail"] = _build_send_emoji_monitor_detail(
|
||||
prompt_text=prompt_preview,
|
||||
request_messages=serialized_request_messages,
|
||||
output_text=response.content or "",
|
||||
metrics=selection_metrics,
|
||||
extra_sections=[{
|
||||
@@ -436,6 +452,7 @@ async def _select_emoji_with_sub_agent(
|
||||
selection_metadata["reason"] = selection.reason.strip()
|
||||
selection_metadata["monitor_detail"] = _build_send_emoji_monitor_detail(
|
||||
prompt_text=prompt_preview,
|
||||
request_messages=serialized_request_messages,
|
||||
reasoning_text=selection.reason,
|
||||
output_text=response.content or "",
|
||||
metrics=selection_metrics,
|
||||
|
||||
@@ -440,6 +440,9 @@ class PromptCLIVisualizer:
|
||||
return (
|
||||
"<div class='image-card'>"
|
||||
f"<div class='image-meta'>图片 image/{html.escape(normalized_format)} {html.escape(size_text)}</div>"
|
||||
f"<a class='image-preview-link' href='{html.escape(file_uri, quote=True)}'>"
|
||||
f"<img class='image-preview' src='{html.escape(file_uri, quote=True)}' alt='图片预览' />"
|
||||
"</a>"
|
||||
f"<div class='image-path'>{html.escape(str(file_path))}</div>"
|
||||
f"<a class='image-link' href='{html.escape(file_uri, quote=True)}'>打开图片</a>"
|
||||
"</div>"
|
||||
@@ -727,6 +730,22 @@ class PromptCLIVisualizer:
|
||||
font-family: "Cascadia Mono", "JetBrains Mono", "Consolas", monospace;
|
||||
word-break: break-all;
|
||||
}}
|
||||
.image-preview-link {{
|
||||
display: block;
|
||||
margin-top: 10px;
|
||||
}}
|
||||
.image-preview {{
|
||||
display: block;
|
||||
max-width: min(100%, 560px);
|
||||
max-height: 420px;
|
||||
width: auto;
|
||||
height: auto;
|
||||
border-radius: 12px;
|
||||
border: 1px solid #dbe4f0;
|
||||
background: #fff;
|
||||
box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08);
|
||||
object-fit: contain;
|
||||
}}
|
||||
.image-link {{
|
||||
display: inline-block;
|
||||
margin-top: 8px;
|
||||
|
||||
@@ -11,8 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
||||
from rich.traceback import install
|
||||
|
||||
from src.chat.message_receive.chat_manager import BotChatSession
|
||||
from src.chat.replyer.group_generator import DefaultReplyer
|
||||
from src.chat.replyer.private_generator import PrivateReplyer
|
||||
from src.chat.replyer.maisaka_generator import MaisakaReplyGenerator
|
||||
from src.chat.replyer.replyer_manager import replyer_manager
|
||||
from src.chat.utils.utils import process_llm_response
|
||||
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
|
||||
@@ -38,7 +37,7 @@ def _get_replyer(
|
||||
chat_stream: Optional[BotChatSession] = None,
|
||||
chat_id: Optional[str] = None,
|
||||
request_type: str = "replyer",
|
||||
) -> Optional[DefaultReplyer | PrivateReplyer]:
|
||||
) -> Optional[MaisakaReplyGenerator]:
|
||||
"""获取回复器对象"""
|
||||
if not chat_id and not chat_stream:
|
||||
raise ValueError("chat_stream 和 chat_id 不可均为空")
|
||||
|
||||
Reference in New Issue
Block a user