diff --git a/pyproject.toml b/pyproject.toml index 5b88f461..f02add00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "json-repair>=0.47.6", "maim-message>=0.6.2", "maibot-plugin-sdk>=2.0.0", + "mcp", "msgpack>=1.1.2", "numpy>=2.2.6", "openai>=1.95.0", diff --git a/requirements.txt b/requirements.txt index a1160c4a..a654374f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ jieba>=0.42.1 json-repair>=0.47.6 maim-message>=0.6.2 maibot-plugin-sdk>=1.2.3,<2.0.0 +mcp msgpack>=1.1.2 numpy>=2.2.6 openai>=1.95.0 @@ -29,4 +30,4 @@ structlog>=25.4.0 tomlkit>=0.13.3 typing-extensions uvicorn>=0.35.0 -watchfiles>=1.1.1 \ No newline at end of file +watchfiles>=1.1.1 diff --git a/src/chat/message_receive/bot.py b/src/chat/message_receive/bot.py index 1fc4ef53..2d8fdaa6 100644 --- a/src/chat/message_receive/bot.py +++ b/src/chat/message_receive/bot.py @@ -1,4 +1,5 @@ from contextlib import suppress +from copy import deepcopy from typing import Any, Dict, Optional import os @@ -10,6 +11,7 @@ from src.chat.heart_flow.heartflow_message_processor import HeartFCMessageReceiv from src.common.logger import get_logger from src.common.utils.utils_message import MessageUtils from src.common.utils.utils_session import SessionUtils +from src.config.config import global_config from src.platform_io.route_key_factory import RouteKeyFactory # from src.chat.brain_chat.PFC.pfc_manager import PFCManager @@ -301,6 +303,8 @@ class ChatBot: # pass # 处理消息内容,识别表情包等二进制数据并转化为文本描述 + if global_config.maisaka.take_over_hfc and global_config.maisaka.direct_image_input: + message.maisaka_original_raw_message = deepcopy(message.raw_message) # type: ignore[attr-defined] await message.process() # 平台层的 @ 检测由底层 is_mentioned_bot_in_message 统一处理;此处不做用户名硬编码匹配 diff --git a/src/config/config.py b/src/config/config.py index 81bbae07..6cc73331 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -56,7 +56,7 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config" BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute() MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute() MMC_VERSION: str = "1.0.0" -CONFIG_VERSION: str = "8.1.7" +CONFIG_VERSION: str = "8.1.10" MODEL_CONFIG_VERSION: str = "1.12.0" logger = get_logger("config") diff --git a/src/config/official_configs.py b/src/config/official_configs.py index bfd97f15..d0084073 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -1609,6 +1609,34 @@ class MaiSakaConfig(ConfigBase): ) """是否将图片直接作为多模态消息传入 Maisaka 主循环,而不是仅使用转译文本""" + merge_user_messages: bool = Field( + default=True, + json_schema_extra={ + "x-widget": "switch", + "x-icon": "merge", + }, + ) + """Whether Maisaka should merge newly received user utterances into a single user message per round""" + + terminal_image_preview: bool = Field( + default=False, + json_schema_extra={ + "x-widget": "switch", + "x-icon": "image", + }, + ) + """Whether Maisaka should render a low-resolution terminal preview for images in prompt display""" + + terminal_image_preview_width: int = Field( + default=24, + ge=8, + json_schema_extra={ + "x-widget": "input", + "x-icon": "columns", + }, + ) + """Character width for Maisaka terminal image previews""" + take_over_hfc: bool = Field( default=False, json_schema_extra={ diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index a3bfb74f..c84a4f34 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -298,8 +298,17 @@ class LLMRequest: """ self._refresh_task_config() start_time = time.time() + if self.request_type.startswith("maisaka_"): + logger.info( + f"LLMRequest[{self.request_type}] generate_response_with_message_async started " + f"(temperature={temperature}, max_tokens={max_tokens}, tools={len(tools or [])})" + ) + if self.request_type.startswith("maisaka_"): + logger.info(f"LLMRequest[{self.request_type}] building internal tool options from {len(tools or [])} tool(s)") tool_built = self._build_tool_options(tools) + if self.request_type.startswith("maisaka_"): + logger.info(f"LLMRequest[{self.request_type}] built {len(tool_built or [])} internal tool option(s)") response, model_info = await self._execute_request( request_type=RequestType.RESPONSE, @@ -309,6 +318,11 @@ class LLMRequest: tool_options=tool_built, response_format=response_format, ) + if self.request_type.startswith("maisaka_"): + logger.info( + f"LLMRequest[{self.request_type}] generate_response_with_message_async finished " + f"(model={model_info.name}, time_cost={time.time() - start_time:.2f}s)" + ) time_cost = time.time() - start_time logger.debug(f"LLM请求总耗时: {time_cost}") @@ -676,12 +690,28 @@ class LLMRequest: for _ in range(max_attempts): model_info, api_provider, client = self._select_model(exclude_models=failed_models_this_request) + if self.request_type.startswith("maisaka_"): + logger.info( + f"LLMRequest[{self.request_type}] selected model={model_info.name} " + f"provider={api_provider.name} request_type={request_type.value}" + ) message_list = [] if message_factory: + if self.request_type.startswith("maisaka_"): + logger.info(f"LLMRequest[{self.request_type}] building message list via message_factory") message_list = message_factory(client) + if self.request_type.startswith("maisaka_"): + logger.info( + f"LLMRequest[{self.request_type}] message_factory returned {len(message_list)} message(s)" + ) try: + if self.request_type.startswith("maisaka_"): + logger.info( + f"LLMRequest[{self.request_type}] sending request to model={model_info.name} " + f"with tool_options={len(tool_options or [])}" + ) response = await self._attempt_request_on_model( model_info, api_provider, @@ -697,6 +727,10 @@ class LLMRequest: embedding_input=embedding_input, audio_base64=audio_base64, ) + if self.request_type.startswith("maisaka_"): + logger.info( + f"LLMRequest[{self.request_type}] model={model_info.name} returned API response" + ) total_tokens, penalty, usage_penalty = self.model_usage[model_info.name] if response_usage := response.usage: total_tokens += response_usage.total_tokens diff --git a/src/maisaka/builtin_tools.py b/src/maisaka/builtin_tools.py index 0017f1fb..21ba448a 100644 --- a/src/maisaka/builtin_tools.py +++ b/src/maisaka/builtin_tools.py @@ -27,7 +27,14 @@ def create_builtin_tools() -> List[ToolOption]: reply_builder = ToolOptionBuilder() reply_builder.set_name("reply") - reply_builder.set_description("Generate and emit a visible reply based on the current thought.") + reply_builder.set_description("Generate and emit a visible reply based on the current thought. You must specify the target user message_id to reply to.") + reply_builder.add_param( + name="message_id", + param_type=ToolParamType.STRING, + description="The message_id of the specific user message that this reply should target.", + required=True, + enum_values=None, + ) tools.append(reply_builder.build()) no_reply_builder = ToolOptionBuilder() diff --git a/src/maisaka/config.py b/src/maisaka/config.py index bef79ee7..38826cd2 100644 --- a/src/maisaka/config.py +++ b/src/maisaka/config.py @@ -28,6 +28,9 @@ SHOW_ANALYZE_COGNITION_PROMPT = global_config.maisaka.show_analyze_cognition_pro SHOW_THINKING = global_config.maisaka.show_thinking USER_NAME = global_config.maisaka.user_name.strip() or "用户" DIRECT_IMAGE_INPUT = global_config.maisaka.direct_image_input +MERGE_USER_MESSAGES = global_config.maisaka.merge_user_messages +TERMINAL_IMAGE_PREVIEW = global_config.maisaka.terminal_image_preview +TERMINAL_IMAGE_PREVIEW_WIDTH = global_config.maisaka.terminal_image_preview_width TAKE_OVER_HFC = global_config.maisaka.take_over_hfc diff --git a/src/maisaka/llm_service.py b/src/maisaka/llm_service.py index 3a6f4796..e955cb66 100644 --- a/src/maisaka/llm_service.py +++ b/src/maisaka/llm_service.py @@ -3,13 +3,17 @@ MaiSaka LLM 服务 - 使用主项目 LLM 系统 将主项目的 LLMRequest 适配为 MaiSaka 需要的接口 """ +from base64 import b64decode +from dataclasses import dataclass from datetime import datetime +from io import BytesIO +from time import perf_counter +from typing import Any, List, Optional import asyncio import random -from dataclasses import dataclass -from typing import Any, List, Optional +from PIL import Image as PILImage from rich.console import Group from rich.panel import Panel from rich.pretty import Pretty @@ -20,7 +24,7 @@ from src.common.logger import get_logger from src.common.prompt_i18n import load_prompt from src.config.config import config_manager, global_config from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType -from src.llm_models.payload_content.tool_option import ToolCall, ToolOption +from src.llm_models.payload_content.tool_option import ToolCall, ToolOption, ToolParamType from src.llm_models.utils_model import LLMRequest from . import config @@ -167,7 +171,55 @@ class MaiSakaLLMService: def set_extra_tools(self, tools: List[dict]) -> None: """设置额外的工具定义(如 MCP 工具)""" - self._extra_tools = list(tools) + self._extra_tools = [self._normalize_extra_tool(tool) for tool in tools] + logger.info(f"Normalized {len(self._extra_tools)} extra tool(s) for Maisaka") + + @staticmethod + def _json_type_to_tool_param_type(json_type: str) -> ToolParamType: + normalized = (json_type or "").lower() + if normalized == "integer": + return ToolParamType.INTEGER + if normalized == "number": + return ToolParamType.FLOAT + if normalized == "boolean": + return ToolParamType.BOOLEAN + return ToolParamType.STRING + + @classmethod + def _normalize_extra_tool(cls, tool: dict) -> dict: + """Normalize external/OpenAI-style tool definitions into the internal tool schema.""" + if "name" in tool and "description" in tool: + return tool + + if tool.get("type") != "function": + return tool + + function_info = tool.get("function", {}) + parameters_schema = function_info.get("parameters", {}) or {} + required_names = set(parameters_schema.get("required", []) or []) + properties = parameters_schema.get("properties", {}) or {} + parameters: list[tuple[str, ToolParamType, str, bool, list[str] | None]] = [] + + for param_name, param_schema in properties.items(): + if not isinstance(param_schema, dict): + continue + enum_values = param_schema.get("enum") + normalized_enum = [str(value) for value in enum_values] if isinstance(enum_values, list) else None + parameters.append( + ( + str(param_name), + cls._json_type_to_tool_param_type(str(param_schema.get("type", "string"))), + str(param_schema.get("description", "")), + param_name in required_names, + normalized_enum, + ) + ) + + return { + "name": str(function_info.get("name", "")), + "description": str(function_info.get("description", "")), + "parameters": parameters, + } async def _ensure_prompts_loaded(self) -> None: """异步懒加载提示词,避免在运行中的事件循环里同步渲染 prompt。""" @@ -219,6 +271,34 @@ class MaiSakaLLMService: return "bold white on magenta" return "bold white on bright_black" + @staticmethod + def _build_terminal_image_preview(image_base64: str) -> Optional[str]: + """Build a low-resolution ASCII preview for terminals without inline-image support.""" + ascii_chars = " .:-=+*#%@" + + try: + image_bytes = b64decode(image_base64) + with PILImage.open(BytesIO(image_bytes)) as image: + grayscale = image.convert("L") + width, height = grayscale.size + if width <= 0 or height <= 0: + return None + + preview_width = max(8, int(config.TERMINAL_IMAGE_PREVIEW_WIDTH)) + preview_height = max(1, int(height * (preview_width / width) * 0.5)) + resized = grayscale.resize((preview_width, preview_height)) + pixels = list(resized.getdata()) + except Exception: + return None + + rows: list[str] = [] + for row_index in range(preview_height): + row_pixels = pixels[row_index * preview_width : (row_index + 1) * preview_width] + row = "".join(ascii_chars[min(len(ascii_chars) - 1, pixel * len(ascii_chars) // 256)] for pixel in row_pixels) + rows.append(row) + + return "\n".join(rows) + @staticmethod def _render_message_content(content: Any) -> object: """把消息内容转成适合 Rich 输出的 renderable。""" @@ -236,9 +316,16 @@ class MaiSakaLLMService: if isinstance(image_format, str) and isinstance(image_base64, str): approx_size = max(0, len(image_base64) * 3 // 4) size_text = f"{approx_size / 1024:.1f} KB" if approx_size >= 1024 else f"{approx_size} B" + preview_parts: list[object] = [ + Text(f"image/{image_format} {size_text}\nbase64 omitted", style="magenta") + ] + if config.TERMINAL_IMAGE_PREVIEW: + preview_text = MaiSakaLLMService._build_terminal_image_preview(image_base64) + if preview_text: + preview_parts.append(Text(preview_text, style="white")) parts.append( Panel( - Text(f"image/{image_format} {size_text}\nbase64 omitted", style="magenta"), + Group(*preview_parts), border_style="magenta", padding=(0, 1), ) @@ -392,20 +479,29 @@ class MaiSakaLLMService: padding=(0, 1), ) ) + logger.info(f"chat_loop_step prompt display finished ({len(built_messages)} messages, {len(all_tools)} tools)") + request_started_at = perf_counter() + logger.info("chat_loop_step calling planner model generate_response_with_message_async") response, (reasoning, model, tool_calls) = await self._llm_chat.generate_response_with_message_async( message_factory=message_factory, tools=all_tools if all_tools else None, temperature=self._temperature, max_tokens=self._max_tokens, ) + elapsed = perf_counter() - request_started_at + logger.info( + f"chat_loop_step planner model returned in {elapsed:.2f}s " + f"(model={model}, tool_calls={len(tool_calls or [])}, response_len={len(response or '')})" + ) raw_message = build_message( role=RoleType.Assistant.value, content=response or "", source="assistant", tool_calls=tool_calls or None, ) + logger.info("chat_loop_step converted planner response into MaiMessage") return ChatResponse( content=response, diff --git a/src/maisaka/mcp_client/manager.py b/src/maisaka/mcp_client/manager.py index 5409a39d..d9e20e57 100644 --- a/src/maisaka/mcp_client/manager.py +++ b/src/maisaka/mcp_client/manager.py @@ -12,8 +12,13 @@ from .connection import MCPConnection, MCP_AVAILABLE # 内置工具名称集合 —— MCP 工具不允许与这些名称冲突 BUILTIN_TOOL_NAMES = frozenset( { + "reply", + "no_reply", "wait", "stop", + "write_file", + "read_file", + "list_files", "create_table", "list_tables", "view_table", diff --git a/src/maisaka/message_adapter.py b/src/maisaka/message_adapter.py index caa9d6dd..f079ab83 100644 --- a/src/maisaka/message_adapter.py +++ b/src/maisaka/message_adapter.py @@ -2,13 +2,18 @@ MaiSaka message adapters built on top of the main project's MaiMessage model. """ +from copy import deepcopy from datetime import datetime -import re +from io import BytesIO from typing import Optional from uuid import uuid4 +import base64 +import re -from src.common.data_models.mai_message_data_model import MaiMessage, MessageInfo, UserInfo -from src.common.data_models.message_component_data_model import MessageSequence +from PIL import Image as PILImage + +from src.common.data_models.mai_message_data_model import GroupInfo, MaiMessage, MessageInfo, UserInfo +from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent from src.config.config import global_config from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType from src.llm_models.payload_content.tool_option import ToolCall @@ -22,7 +27,10 @@ SOURCE_KEY = "maisaka_source" LLM_ROLE_KEY = "maisaka_llm_role" TOOL_CALL_ID_KEY = "maisaka_tool_call_id" TOOL_CALLS_KEY = "maisaka_tool_calls" -SPEAKER_PREFIX_PATTERN = re.compile(r"^\[(?P[^\]]+)\](?P.*)$", re.DOTALL) +SPEAKER_PREFIX_PATTERN = re.compile( + r"^(?:(?P\d{2}:\d{2}:\d{2}))?(?:[^>]+)>)?\[(?P[^\]]+)\](?P.*)$", + re.DOTALL, +) def _build_user_info_for_role(role: str) -> UserInfo: @@ -55,7 +63,7 @@ def _deserialize_tool_call(data: dict) -> ToolCall: def build_message( role: str, - content: str, + content: str = "", *, message_kind: str = "normal", source: Optional[str] = None, @@ -63,6 +71,12 @@ def build_message( tool_calls: Optional[list[ToolCall]] = None, timestamp: Optional[datetime] = None, message_id: Optional[str] = None, + platform: str = MAISAKA_PLATFORM, + session_id: str = MAISAKA_SESSION_ID, + user_info: Optional[UserInfo] = None, + group_info: Optional[GroupInfo] = None, + raw_message: Optional[MessageSequence] = None, + display_text: Optional[str] = None, ) -> MaiMessage: """Build a MaiMessage for the Maisaka session history.""" resolved_timestamp = timestamp or datetime.now() @@ -70,11 +84,11 @@ def build_message( message = MaiMessage( message_id=message_id or f"maisaka_{uuid4().hex}", timestamp=resolved_timestamp, - platform=MAISAKA_PLATFORM, + platform=platform, ) message.message_info = MessageInfo( - user_info=_build_user_info_for_role(resolved_role), - group_info=None, + user_info=user_info or _build_user_info_for_role(resolved_role), + group_info=group_info, additional_config={ LLM_ROLE_KEY: resolved_role, MESSAGE_KIND_KEY: message_kind, @@ -83,18 +97,26 @@ def build_message( TOOL_CALLS_KEY: [_serialize_tool_call(tool_call) for tool_call in (tool_calls or [])], }, ) - message.session_id = MAISAKA_SESSION_ID - message.raw_message = MessageSequence([]) - if content: + message.session_id = session_id + message.raw_message = raw_message if raw_message is not None else MessageSequence([]) + if raw_message is None and content: message.raw_message.text(content) - message.processed_plain_text = content - message.display_message = content + visible_text = display_text if display_text is not None else content + message.processed_plain_text = visible_text + message.display_message = visible_text return message -def format_speaker_content(speaker_name: str, content: str) -> str: +def format_speaker_content( + speaker_name: str, + content: str, + timestamp: Optional[datetime] = None, + message_id: Optional[str] = None, +) -> str: """Format visible conversation content with an explicit speaker label.""" - return f"[{speaker_name}]{content}" + time_prefix = timestamp.strftime("%H:%M:%S") if timestamp is not None else "" + message_id_prefix = f"" if message_id else "" + return f"{time_prefix}{message_id_prefix}[{speaker_name}]{content}" def parse_speaker_content(content: str) -> tuple[Optional[str], str]: @@ -105,6 +127,39 @@ def parse_speaker_content(content: str) -> tuple[Optional[str], str]: return match.group("speaker"), match.group("content") +def clone_message_sequence(message_sequence: MessageSequence) -> MessageSequence: + """Create a detached copy of a message sequence.""" + return MessageSequence([deepcopy(component) for component in message_sequence.components]) + + +def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str: + """Extract visible text from a message sequence without forcing image descriptions.""" + parts: list[str] = [] + for component in message_sequence.components: + if isinstance(component, TextComponent): + parts.append(SPEAKER_PREFIX_PATTERN.sub(r"\g[\g]\g", component.text)) + continue + + if isinstance(component, EmojiComponent): + parts.append("[表情包]") + continue + + if isinstance(component, ImageComponent): + parts.append("[图片]") + return "".join(parts) + + +def _guess_image_format(image_bytes: bytes) -> Optional[str]: + if not image_bytes: + return None + + try: + with PILImage.open(BytesIO(image_bytes)) as image: + return image.format.lower() if image.format else None + except Exception: + return None + + def get_message_text(message: MaiMessage) -> str: if message.processed_plain_text is not None: return message.processed_plain_text @@ -156,7 +211,6 @@ def remove_last_perception(messages: list[MaiMessage]) -> None: def to_llm_message(message: MaiMessage) -> Optional[Message]: role = get_message_role(message) - content = get_message_text(message) tool_call_id = get_tool_call_id(message) tool_calls = get_tool_calls(message) @@ -176,6 +230,28 @@ def to_llm_message(message: MaiMessage) -> Optional[Message]: builder.set_tool_calls(tool_calls) if role_type == RoleType.Tool and tool_call_id: builder.add_tool_call(tool_call_id) - if content: - builder.add_text_content(content) + + has_content = False + for component in message.raw_message.components: + if isinstance(component, TextComponent): + if component.text: + builder.add_text_content(component.text) + has_content = True + continue + + if isinstance(component, (ImageComponent, EmojiComponent)): + image_format = _guess_image_format(component.binary_data) + if image_format and component.binary_data: + builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8")) + has_content = True + continue + + if component.content: + builder.add_text_content(component.content) + has_content = True + + if not has_content: + content = get_message_text(message) + if content: + builder.add_text_content(content) return builder.build() diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py index 82437b20..ed69964d 100644 --- a/src/maisaka/runtime.py +++ b/src/maisaka/runtime.py @@ -3,6 +3,7 @@ Maisaka runtime for non-CLI integrations. """ from datetime import datetime +from pathlib import Path from typing import Optional import asyncio @@ -21,9 +22,15 @@ from .config import ( ENABLE_COGNITION_MODULE, ENABLE_EMOTION_MODULE, ENABLE_KNOWLEDGE_MODULE, + ENABLE_LIST_FILES, + ENABLE_MCP, + ENABLE_READ_FILE, + ENABLE_WRITE_FILE, + MERGE_USER_MESSAGES, ) from .knowledge import retrieve_relevant_knowledge from .llm_service import MaiSakaLLMService +from .mcp_client import MCPManager from .message_adapter import ( build_message, build_visible_text_from_sequence, @@ -32,6 +39,7 @@ from .message_adapter import ( get_message_role, remove_last_perception, ) +from .tool_handlers import handle_list_files, handle_mcp_tool, handle_read_file, handle_unknown_tool, handle_write_file logger = get_logger("maisaka_runtime") @@ -49,7 +57,9 @@ class MaisakaHeartFlowChatting: self.log_prefix = f"[{session_name}]" self._llm_service = MaiSakaLLMService(api_key="", base_url=None, model="") self._chat_history: list[MaiMessage] = [] + self._mcp_manager: Optional[MCPManager] = None self._pending_messages: list[SessionMessage] = [] + self._source_messages_by_id: dict[str, SessionMessage] = {} self._running = False self._loop_task: Optional[asyncio.Task] = None self._loop_lock = asyncio.Lock() @@ -66,6 +76,9 @@ class MaisakaHeartFlowChatting: if self._running: return + if ENABLE_MCP: + await self._init_mcp() + self._running = True self._loop_task = asyncio.create_task(self._main_loop()) logger.info(f"{self.log_prefix} Maisaka runtime started") @@ -87,6 +100,10 @@ class MaisakaHeartFlowChatting: finally: self._loop_task = None + if self._mcp_manager is not None: + await self._mcp_manager.close() + self._mcp_manager = None + logger.info(f"{self.log_prefix} Maisaka runtime stopped") def adjust_talk_frequency(self, frequency: float) -> None: @@ -96,6 +113,7 @@ class MaisakaHeartFlowChatting: async def register_message(self, message: SessionMessage) -> None: """Queue a newly received message for Maisaka processing.""" self._pending_messages.append(message) + self._source_messages_by_id[message.message_id] = message self._new_message_event.set() async def _main_loop(self) -> None: @@ -118,32 +136,60 @@ class MaisakaHeartFlowChatting: self._pending_messages.clear() return drained_messages - async def _ingest_messages(self, messages: list[SessionMessage]) -> None: - merged_sequence = await self._merge_messages(messages) - merged_content = build_visible_text_from_sequence(merged_sequence).strip() - if not merged_sequence.components: + async def _init_mcp(self) -> None: + """Initialize MCP tools for the runtime and inject them into the planner.""" + config_path = Path(__file__).with_name("mcp_config.json") + self._mcp_manager = await MCPManager.from_config(str(config_path)) + if self._mcp_manager is None: + logger.info(f"{self.log_prefix} MCP not available for Maisaka runtime") return + mcp_tools = self._mcp_manager.get_openai_tools() + if not mcp_tools: + logger.info(f"{self.log_prefix} MCP manager initialized without exposed tools") + return + + self._llm_service.set_extra_tools(mcp_tools) + logger.info( + f"{self.log_prefix} Loaded {len(mcp_tools)} MCP tool(s) for Maisaka runtime:\n" + f"{self._mcp_manager.get_tool_summary()}" + ) + + async def _ingest_messages(self, messages: list[SessionMessage]) -> None: if self._chat_start_time is None: self._chat_start_time = messages[0].timestamp self._last_user_input_time = messages[-1].timestamp self._user_input_times.extend(message.timestamp for message in messages) - self._chat_history.append( - build_message( - role="user", - content=merged_content, - source="user", - timestamp=messages[-1].timestamp, - platform=messages[-1].platform, - session_id=self.session_id, - group_info=self._build_group_info(messages[-1]), - user_info=self._build_runtime_user_info(), - raw_message=merged_sequence, - display_text=merged_content, + if MERGE_USER_MESSAGES: + merged_sequence = await self._merge_messages(messages) + merged_content = build_visible_text_from_sequence(merged_sequence).strip() + if not merged_sequence.components: + return + + self._chat_history.append( + build_message( + role="user", + content=merged_content, + source="user", + timestamp=messages[-1].timestamp, + platform=messages[-1].platform, + session_id=self.session_id, + group_info=self._build_group_info(messages[-1]), + user_info=self._build_runtime_user_info(), + raw_message=merged_sequence, + display_text=merged_content, + ) ) - ) - self._trim_chat_history() + self._trim_chat_history() + return + + for message in messages: + history_message = await self._build_user_history_message(message) + if history_message is None: + continue + self._chat_history.append(history_message) + self._trim_chat_history() async def _merge_messages(self, messages: list[SessionMessage]) -> MessageSequence: merged_sequence = MessageSequence([]) @@ -151,7 +197,7 @@ class MaisakaHeartFlowChatting: for message in messages: user_info = message.message_info.user_info speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id - prefix = format_speaker_content(speaker_name, "", message.timestamp) + prefix = format_speaker_content(speaker_name, "", message.timestamp, message.message_id) merged_sequence.text(prefix) appended_component = False @@ -174,14 +220,69 @@ class MaisakaHeartFlowChatting: return merged_sequence + async def _build_user_history_message(self, message: SessionMessage) -> Optional[MaiMessage]: + user_sequence = await self._build_message_sequence(message) + visible_text = build_visible_text_from_sequence(user_sequence).strip() + if not user_sequence.components: + return None + + return build_message( + role="user", + content=visible_text, + source="user", + timestamp=message.timestamp, + platform=message.platform, + session_id=self.session_id, + group_info=self._build_group_info(message), + user_info=self._build_runtime_user_info(), + raw_message=user_sequence, + display_text=visible_text, + ) + + async def _build_message_sequence(self, message: SessionMessage) -> MessageSequence: + message_sequence = MessageSequence([]) + user_info = message.message_info.user_info + speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id + message_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, message.message_id)) + + appended_component = False + if DIRECT_IMAGE_INPUT: + source_sequence = getattr(message, "maisaka_original_raw_message", message.raw_message) + else: + source_sequence = message.raw_message + + for component in clone_message_sequence(source_sequence).components: + message_sequence.components.append(component) + appended_component = True + + if not appended_component: + if not message.processed_plain_text: + await message.process() + content = (message.processed_plain_text or "").strip() + if content: + message_sequence.text(content) + + return message_sequence + async def _run_internal_loop(self, anchor_message: SessionMessage) -> None: last_had_tool_calls = True - for _ in range(self._max_internal_rounds): + for round_index in range(self._max_internal_rounds): + logger.info( + f"{self.log_prefix} Internal loop round {round_index + 1}/{self._max_internal_rounds} started " + f"(history={len(self._chat_history)})" + ) if last_had_tool_calls: + logger.info(f"{self.log_prefix} Building perception snapshot before planner call") await self._append_perception_snapshot() + logger.info(f"{self.log_prefix} Perception snapshot step finished") + logger.info(f"{self.log_prefix} Calling Maisaka chat_loop_step") response = await self._llm_service.chat_loop_step(self._chat_history) + logger.info( + f"{self.log_prefix} chat_loop_step returned " + f"(content_len={len(response.content or '')}, tool_calls={len(response.tool_calls)})" + ) response.raw_message.platform = anchor_message.platform response.raw_message.session_id = self.session_id response.raw_message.message_info.group_info = self._build_group_info(anchor_message) @@ -189,16 +290,20 @@ class MaisakaHeartFlowChatting: self._last_assistant_response_time = datetime.now() if response.tool_calls: + logger.info(f"{self.log_prefix} Handling {len(response.tool_calls)} tool call(s)") should_pause = await self._handle_tool_calls(response.tool_calls, response.content or "", anchor_message) + logger.info(f"{self.log_prefix} Tool handling finished (should_pause={should_pause})") if should_pause: return last_had_tool_calls = True continue if response.content: + logger.info(f"{self.log_prefix} Planner returned content without tool calls; continuing inner loop") last_had_tool_calls = False continue + logger.info(f"{self.log_prefix} Planner returned empty content and no tool calls; leaving inner loop") return logger.info(f"{self.log_prefix} Maisaka internal loop reached max rounds and paused") @@ -271,8 +376,10 @@ class MaisakaHeartFlowChatting: ) -> bool: for tool_call in tool_calls: if tool_call.func_name == "reply": - await self._handle_reply(tool_call, latest_thought, anchor_message) - return True + reply_sent = await self._handle_reply(tool_call, latest_thought, anchor_message) + if reply_sent: + return True + continue if tool_call.func_name == "no_reply": self._chat_history.append( @@ -302,28 +409,53 @@ class MaisakaHeartFlowChatting: ) return True - self._chat_history.append( - self._build_tool_message( - tool_call, - f"Unsupported runtime tool: {tool_call.func_name}", - ) - ) + if tool_call.func_name == "write_file" and ENABLE_WRITE_FILE: + await handle_write_file(tool_call, self._chat_history) + continue + + if tool_call.func_name == "read_file" and ENABLE_READ_FILE: + await handle_read_file(tool_call, self._chat_history) + continue + + if tool_call.func_name == "list_files" and ENABLE_LIST_FILES: + await handle_list_files(tool_call, self._chat_history) + continue + + if self._mcp_manager and self._mcp_manager.is_mcp_tool(tool_call.func_name): + await handle_mcp_tool(tool_call, self._chat_history, self._mcp_manager) + continue + + await handle_unknown_tool(tool_call, self._chat_history) return False - async def _handle_reply(self, tool_call: ToolCall, latest_thought: str, anchor_message: SessionMessage) -> None: + async def _handle_reply(self, tool_call: ToolCall, latest_thought: str, anchor_message: SessionMessage) -> bool: + target_message_id = str((tool_call.args or {}).get("message_id", "")).strip() + if not target_message_id: + self._chat_history.append( + self._build_tool_message(tool_call, "reply requires a valid message_id argument.") + ) + return False + + target_message = self._source_messages_by_id.get(target_message_id) + if target_message is None: + self._chat_history.append( + self._build_tool_message(tool_call, f"reply target message_id not found: {target_message_id}") + ) + return False + reply_text = await self._llm_service.generate_reply(latest_thought, self._chat_history) sent = await send_service.text_to_stream( text=reply_text, stream_id=self.session_id, set_reply=True, - reply_message=anchor_message, + reply_message=target_message, typing=False, ) tool_result = "Visible reply generated and sent." if sent else "Visible reply generation succeeded but send failed." self._chat_history.append(self._build_tool_message(tool_call, tool_result)) if not sent: - return + return False bot_name = global_config.bot.nickname.strip() or "MaiSaka" self._chat_history.append( @@ -331,12 +463,13 @@ class MaisakaHeartFlowChatting: role="user", content=format_speaker_content(bot_name, reply_text, datetime.now()), source="guided_reply", - platform=anchor_message.platform, + platform=target_message.platform or anchor_message.platform, session_id=self.session_id, - group_info=self._build_group_info(anchor_message), + group_info=self._build_group_info(target_message), user_info=self._build_runtime_user_info(), ) ) + return True def _build_tool_message(self, tool_call: ToolCall, content: str) -> MaiMessage: return build_message(