diff --git a/src/emoji_system/maisaka_tool.py b/src/emoji_system/maisaka_tool.py index 75c0bb19..33601000 100644 --- a/src/emoji_system/maisaka_tool.py +++ b/src/emoji_system/maisaka_tool.py @@ -4,8 +4,6 @@ from collections.abc import Awaitable, Callable, Sequence from dataclasses import dataclass, field from typing import Any, Optional, TYPE_CHECKING -import random - from src.chat.message_receive.chat_manager import chat_manager from src.cli.maisaka_cli_sender import CLI_PLATFORM_NAME, render_cli_message from src.common.data_models.image_data_model import MaiEmoji @@ -121,45 +119,13 @@ def _normalize_emotions(emoji: MaiEmoji) -> list[str]: return [] -async def select_emoji_for_maisaka( - *, - requested_emotion: str = "", - reasoning: str = "", - context_texts: Sequence[str] | None = None, - sample_size: int = 30, -) -> tuple[MaiEmoji | None, str]: - """为 Maisaka 选择一个合适的表情。""" - - del reasoning, context_texts - - available_emojis = list(emoji_manager.emojis) - if not available_emojis: - return None, "" - - normalized_requested_emotion = requested_emotion.strip() - if normalized_requested_emotion: - matched_emojis = [ - emoji - for emoji in available_emojis - if normalized_requested_emotion.lower() in (emotion.lower() for emotion in _normalize_emotions(emoji)) - ] - if matched_emojis: - return random.choice(matched_emojis), normalized_requested_emotion - - sampled_emojis = random.sample( - available_emojis, - min(max(sample_size, 1), len(available_emojis)), - ) - return random.choice(sampled_emojis), "" - - async def send_emoji_for_maisaka( *, stream_id: str, + emoji_selector: EmojiSelector, requested_emotion: str = "", reasoning: str = "", context_texts: Sequence[str] | None = None, - emoji_selector: EmojiSelector | None = None, ) -> MaisakaEmojiSendResult: """为 Maisaka 选择并发送一个表情。""" @@ -194,20 +160,12 @@ async def send_emoji_for_maisaka( normalized_context_texts = _normalize_context_texts(before_select_kwargs.get("context_texts")) sample_size = _coerce_positive_int(before_select_kwargs.get("sample_size"), sample_size) - if emoji_selector is None: - selected_emoji, matched_emotion = await select_emoji_for_maisaka( - requested_emotion=normalized_requested_emotion, - reasoning=normalized_reasoning, - context_texts=normalized_context_texts, - sample_size=sample_size, - ) - else: - selected_emoji, matched_emotion = await emoji_selector( - normalized_requested_emotion, - normalized_reasoning, - normalized_context_texts, - sample_size, - ) + selected_emoji, matched_emotion = await emoji_selector( + normalized_requested_emotion, + normalized_reasoning, + normalized_context_texts, + sample_size, + ) after_select_result = await _get_runtime_manager().invoke_hook( "emoji.maisaka.after_select", stream_id=stream_id, diff --git a/src/maisaka/builtin_tool/send_emoji.py b/src/maisaka/builtin_tool/send_emoji.py index b1853452..e9bd292a 100644 --- a/src/maisaka/builtin_tool/send_emoji.py +++ b/src/maisaka/builtin_tool/send_emoji.py @@ -2,6 +2,7 @@ from datetime import datetime from io import BytesIO +from json import dumps from random import sample from typing import Any, Dict, Optional @@ -17,9 +18,8 @@ from src.emoji_system.maisaka_tool import send_emoji_for_maisaka from src.common.data_models.image_data_model import MaiEmoji from src.common.data_models.message_component_data_model import ImageComponent, MessageSequence, TextComponent from src.common.logger import get_logger -from src.config.config import global_config +from src.config.config import config_manager, global_config from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec -from src.llm_models.payload_content.resp_format import RespFormat, RespFormatType from src.llm_models.payload_content.message import MessageBuilder, RoleType from src.maisaka.context_messages import ( LLMContextMessage, @@ -221,6 +221,7 @@ def _build_send_emoji_monitor_detail( detail: Dict[str, Any] = {} if isinstance(request_messages, list) and request_messages: detail["request_messages"] = request_messages + detail["prompt_text"] = dumps(request_messages, ensure_ascii=False, indent=2) if reasoning_text.strip(): detail["reasoning_text"] = reasoning_text.strip() if output_text.strip(): @@ -279,6 +280,24 @@ def _build_send_emoji_monitor_metadata( return {} +def _resolve_emoji_selector_model_task_name() -> str: + """根据 planner 模型视觉能力选择表情选择子代理的模型任务。""" + + model_config = config_manager.get_model_config() + planner_models = [ + model_name + for model_name in model_config.model_task_config.planner.model_list + if str(model_name).strip() + ] + models_by_name = {model.name: model for model in model_config.models} + if planner_models and all( + model_name in models_by_name and models_by_name[model_name].visual + for model_name in planner_models + ): + return "planner" + return "vlm" + + async def _select_emoji_with_sub_agent( tool_ctx: BuiltinToolRuntimeContext, reasoning: str, @@ -326,7 +345,8 @@ async def _select_emoji_with_sub_agent( prompt_llm_message = prompt_message.to_llm_message() if prompt_llm_message is not None: request_messages.append(prompt_llm_message) - candidate_llm_message = candidate_message.to_llm_message() + candidate_to_llm_message = getattr(candidate_message, "to_llm_message", None) + candidate_llm_message = candidate_to_llm_message() if callable(candidate_to_llm_message) else None if candidate_llm_message is not None: request_messages.append(candidate_llm_message) serialized_request_messages = serialize_prompt_messages(request_messages) @@ -337,10 +357,7 @@ async def _select_emoji_with_sub_agent( system_prompt=system_prompt, extra_messages=[prompt_message, candidate_message], max_tokens=_EMOJI_SUB_AGENT_MAX_TOKENS, - response_format=RespFormat( - format_type=RespFormatType.JSON_SCHEMA, - schema=EmojiSelectionResult, - ), + model_task_name=_resolve_emoji_selector_model_task_name(), ) selection_duration_ms = round((datetime.now() - selection_started_at).total_seconds() * 1000, 2) @@ -409,12 +426,16 @@ async def handle_tool( "reason": "", } selection_metadata: Dict[str, Any] = {"reason": "", "monitor_detail": {}} + requested_emotion = "" + if isinstance(invocation.arguments, dict): + requested_emotion = str(invocation.arguments.get("emotion") or "").strip() logger.info(f"{tool_ctx.runtime.log_prefix} 触发表情包发送工具") try: send_result = await send_emoji_for_maisaka( stream_id=tool_ctx.runtime.session_id, + requested_emotion=requested_emotion, reasoning=tool_ctx.engine.last_reasoning_content, context_texts=context_texts, emoji_selector=lambda _requested_emotion, reasoning, context_texts, sample_size: _select_emoji_with_sub_agent( diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py index 63ec38e8..f32f48c7 100644 --- a/src/maisaka/chat_loop_service.py +++ b/src/maisaka/chat_loop_service.py @@ -194,6 +194,7 @@ class MaisakaChatLoopService: session_id: Optional[str] = None, is_group_chat: Optional[bool] = None, max_tokens: int = 2048, + model_task_name: str = "planner", ) -> None: """初始化 Maisaka 对话循环服务。 @@ -205,6 +206,7 @@ class MaisakaChatLoopService: """ self._max_tokens = max_tokens + self._model_task_name = model_task_name.strip() or "planner" self._is_group_chat = is_group_chat self._session_id = session_id or "" self._extra_tools: List[ToolOption] = [] @@ -236,17 +238,18 @@ class MaisakaChatLoopService: ) def _get_llm_chat_client(self, request_kind: str) -> LLMServiceClient: - """获取当前请求类型对应的 planner LLM 客户端。""" + """获取当前请求类型对应的 LLM 客户端。""" request_type = self._resolve_llm_request_type(request_kind) - llm_client = self._llm_chat_clients.get(request_type) + client_key = f"{self._model_task_name}:{request_type}" + llm_client = self._llm_chat_clients.get(client_key) if llm_client is None: llm_client = LLMServiceClient( - task_name="planner", + task_name=self._model_task_name, request_type=request_type, session_id=self._session_id, ) - self._llm_chat_clients[request_type] = llm_client + self._llm_chat_clients[client_key] = llm_client return llm_client @staticmethod diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py index 12e9fcf5..77eed427 100644 --- a/src/maisaka/runtime.py +++ b/src/maisaka/runtime.py @@ -600,6 +600,7 @@ class MaisakaHeartFlowChatting: extra_messages: Optional[Sequence[LLMContextMessage]] = None, interrupt_flag: asyncio.Event | None = None, max_tokens: int = 512, + model_task_name: str = "planner", response_format: RespFormat | None = None, tool_definitions: Optional[Sequence[ToolDefinitionInput]] = None, ) -> ChatResponse: @@ -622,6 +623,7 @@ class MaisakaHeartFlowChatting: session_id=self.session_id, is_group_chat=self.chat_stream.is_group_session, max_tokens=max_tokens, + model_task_name=model_task_name, ) sub_agent.set_interrupt_flag(interrupt_flag) return await sub_agent.chat_loop_step(