feat:提供原生vlm支持

This commit is contained in:
SengokuCola
2026-03-24 20:57:57 +08:00
parent 6cfc92e66f
commit a5fc4d172d
12 changed files with 447 additions and 59 deletions

View File

@@ -20,6 +20,7 @@ dependencies = [
"json-repair>=0.47.6",
"maim-message>=0.6.2",
"maibot-plugin-sdk>=2.0.0",
"mcp",
"msgpack>=1.1.2",
"numpy>=2.2.6",
"openai>=1.95.0",

View File

@@ -10,6 +10,7 @@ jieba>=0.42.1
json-repair>=0.47.6
maim-message>=0.6.2
maibot-plugin-sdk>=1.2.3,<2.0.0
mcp
msgpack>=1.1.2
numpy>=2.2.6
openai>=1.95.0
@@ -29,4 +30,4 @@ structlog>=25.4.0
tomlkit>=0.13.3
typing-extensions
uvicorn>=0.35.0
watchfiles>=1.1.1
watchfiles>=1.1.1

View File

@@ -1,4 +1,5 @@
from contextlib import suppress
from copy import deepcopy
from typing import Any, Dict, Optional
import os
@@ -10,6 +11,7 @@ from src.chat.heart_flow.heartflow_message_processor import HeartFCMessageReceiv
from src.common.logger import get_logger
from src.common.utils.utils_message import MessageUtils
from src.common.utils.utils_session import SessionUtils
from src.config.config import global_config
from src.platform_io.route_key_factory import RouteKeyFactory
# from src.chat.brain_chat.PFC.pfc_manager import PFCManager
@@ -301,6 +303,8 @@ class ChatBot:
# pass
# 处理消息内容,识别表情包等二进制数据并转化为文本描述
if global_config.maisaka.take_over_hfc and global_config.maisaka.direct_image_input:
message.maisaka_original_raw_message = deepcopy(message.raw_message) # type: ignore[attr-defined]
await message.process()
# 平台层的 @ 检测由底层 is_mentioned_bot_in_message 统一处理;此处不做用户名硬编码匹配

View File

@@ -56,7 +56,7 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config"
BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
MMC_VERSION: str = "1.0.0"
CONFIG_VERSION: str = "8.1.7"
CONFIG_VERSION: str = "8.1.10"
MODEL_CONFIG_VERSION: str = "1.12.0"
logger = get_logger("config")

View File

@@ -1609,6 +1609,34 @@ class MaiSakaConfig(ConfigBase):
)
"""是否将图片直接作为多模态消息传入 Maisaka 主循环,而不是仅使用转译文本"""
merge_user_messages: bool = Field(
default=True,
json_schema_extra={
"x-widget": "switch",
"x-icon": "merge",
},
)
"""Whether Maisaka should merge newly received user utterances into a single user message per round"""
terminal_image_preview: bool = Field(
default=False,
json_schema_extra={
"x-widget": "switch",
"x-icon": "image",
},
)
"""Whether Maisaka should render a low-resolution terminal preview for images in prompt display"""
terminal_image_preview_width: int = Field(
default=24,
ge=8,
json_schema_extra={
"x-widget": "input",
"x-icon": "columns",
},
)
"""Character width for Maisaka terminal image previews"""
take_over_hfc: bool = Field(
default=False,
json_schema_extra={

View File

@@ -298,8 +298,17 @@ class LLMRequest:
"""
self._refresh_task_config()
start_time = time.time()
if self.request_type.startswith("maisaka_"):
logger.info(
f"LLMRequest[{self.request_type}] generate_response_with_message_async started "
f"(temperature={temperature}, max_tokens={max_tokens}, tools={len(tools or [])})"
)
if self.request_type.startswith("maisaka_"):
logger.info(f"LLMRequest[{self.request_type}] building internal tool options from {len(tools or [])} tool(s)")
tool_built = self._build_tool_options(tools)
if self.request_type.startswith("maisaka_"):
logger.info(f"LLMRequest[{self.request_type}] built {len(tool_built or [])} internal tool option(s)")
response, model_info = await self._execute_request(
request_type=RequestType.RESPONSE,
@@ -309,6 +318,11 @@ class LLMRequest:
tool_options=tool_built,
response_format=response_format,
)
if self.request_type.startswith("maisaka_"):
logger.info(
f"LLMRequest[{self.request_type}] generate_response_with_message_async finished "
f"(model={model_info.name}, time_cost={time.time() - start_time:.2f}s)"
)
time_cost = time.time() - start_time
logger.debug(f"LLM请求总耗时: {time_cost}")
@@ -676,12 +690,28 @@ class LLMRequest:
for _ in range(max_attempts):
model_info, api_provider, client = self._select_model(exclude_models=failed_models_this_request)
if self.request_type.startswith("maisaka_"):
logger.info(
f"LLMRequest[{self.request_type}] selected model={model_info.name} "
f"provider={api_provider.name} request_type={request_type.value}"
)
message_list = []
if message_factory:
if self.request_type.startswith("maisaka_"):
logger.info(f"LLMRequest[{self.request_type}] building message list via message_factory")
message_list = message_factory(client)
if self.request_type.startswith("maisaka_"):
logger.info(
f"LLMRequest[{self.request_type}] message_factory returned {len(message_list)} message(s)"
)
try:
if self.request_type.startswith("maisaka_"):
logger.info(
f"LLMRequest[{self.request_type}] sending request to model={model_info.name} "
f"with tool_options={len(tool_options or [])}"
)
response = await self._attempt_request_on_model(
model_info,
api_provider,
@@ -697,6 +727,10 @@ class LLMRequest:
embedding_input=embedding_input,
audio_base64=audio_base64,
)
if self.request_type.startswith("maisaka_"):
logger.info(
f"LLMRequest[{self.request_type}] model={model_info.name} returned API response"
)
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
if response_usage := response.usage:
total_tokens += response_usage.total_tokens

View File

@@ -27,7 +27,14 @@ def create_builtin_tools() -> List[ToolOption]:
reply_builder = ToolOptionBuilder()
reply_builder.set_name("reply")
reply_builder.set_description("Generate and emit a visible reply based on the current thought.")
reply_builder.set_description("Generate and emit a visible reply based on the current thought. You must specify the target user message_id to reply to.")
reply_builder.add_param(
name="message_id",
param_type=ToolParamType.STRING,
description="The message_id of the specific user message that this reply should target.",
required=True,
enum_values=None,
)
tools.append(reply_builder.build())
no_reply_builder = ToolOptionBuilder()

View File

@@ -28,6 +28,9 @@ SHOW_ANALYZE_COGNITION_PROMPT = global_config.maisaka.show_analyze_cognition_pro
SHOW_THINKING = global_config.maisaka.show_thinking
USER_NAME = global_config.maisaka.user_name.strip() or "用户"
DIRECT_IMAGE_INPUT = global_config.maisaka.direct_image_input
MERGE_USER_MESSAGES = global_config.maisaka.merge_user_messages
TERMINAL_IMAGE_PREVIEW = global_config.maisaka.terminal_image_preview
TERMINAL_IMAGE_PREVIEW_WIDTH = global_config.maisaka.terminal_image_preview_width
TAKE_OVER_HFC = global_config.maisaka.take_over_hfc

View File

@@ -3,13 +3,17 @@ MaiSaka LLM 服务 - 使用主项目 LLM 系统
将主项目的 LLMRequest 适配为 MaiSaka 需要的接口
"""
from base64 import b64decode
from dataclasses import dataclass
from datetime import datetime
from io import BytesIO
from time import perf_counter
from typing import Any, List, Optional
import asyncio
import random
from dataclasses import dataclass
from typing import Any, List, Optional
from PIL import Image as PILImage
from rich.console import Group
from rich.panel import Panel
from rich.pretty import Pretty
@@ -20,7 +24,7 @@ from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt
from src.config.config import config_manager, global_config
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.llm_models.payload_content.tool_option import ToolCall, ToolOption
from src.llm_models.payload_content.tool_option import ToolCall, ToolOption, ToolParamType
from src.llm_models.utils_model import LLMRequest
from . import config
@@ -167,7 +171,55 @@ class MaiSakaLLMService:
def set_extra_tools(self, tools: List[dict]) -> None:
"""设置额外的工具定义(如 MCP 工具)"""
self._extra_tools = list(tools)
self._extra_tools = [self._normalize_extra_tool(tool) for tool in tools]
logger.info(f"Normalized {len(self._extra_tools)} extra tool(s) for Maisaka")
@staticmethod
def _json_type_to_tool_param_type(json_type: str) -> ToolParamType:
normalized = (json_type or "").lower()
if normalized == "integer":
return ToolParamType.INTEGER
if normalized == "number":
return ToolParamType.FLOAT
if normalized == "boolean":
return ToolParamType.BOOLEAN
return ToolParamType.STRING
@classmethod
def _normalize_extra_tool(cls, tool: dict) -> dict:
"""Normalize external/OpenAI-style tool definitions into the internal tool schema."""
if "name" in tool and "description" in tool:
return tool
if tool.get("type") != "function":
return tool
function_info = tool.get("function", {})
parameters_schema = function_info.get("parameters", {}) or {}
required_names = set(parameters_schema.get("required", []) or [])
properties = parameters_schema.get("properties", {}) or {}
parameters: list[tuple[str, ToolParamType, str, bool, list[str] | None]] = []
for param_name, param_schema in properties.items():
if not isinstance(param_schema, dict):
continue
enum_values = param_schema.get("enum")
normalized_enum = [str(value) for value in enum_values] if isinstance(enum_values, list) else None
parameters.append(
(
str(param_name),
cls._json_type_to_tool_param_type(str(param_schema.get("type", "string"))),
str(param_schema.get("description", "")),
param_name in required_names,
normalized_enum,
)
)
return {
"name": str(function_info.get("name", "")),
"description": str(function_info.get("description", "")),
"parameters": parameters,
}
async def _ensure_prompts_loaded(self) -> None:
"""异步懒加载提示词,避免在运行中的事件循环里同步渲染 prompt。"""
@@ -219,6 +271,34 @@ class MaiSakaLLMService:
return "bold white on magenta"
return "bold white on bright_black"
@staticmethod
def _build_terminal_image_preview(image_base64: str) -> Optional[str]:
"""Build a low-resolution ASCII preview for terminals without inline-image support."""
ascii_chars = " .:-=+*#%@"
try:
image_bytes = b64decode(image_base64)
with PILImage.open(BytesIO(image_bytes)) as image:
grayscale = image.convert("L")
width, height = grayscale.size
if width <= 0 or height <= 0:
return None
preview_width = max(8, int(config.TERMINAL_IMAGE_PREVIEW_WIDTH))
preview_height = max(1, int(height * (preview_width / width) * 0.5))
resized = grayscale.resize((preview_width, preview_height))
pixels = list(resized.getdata())
except Exception:
return None
rows: list[str] = []
for row_index in range(preview_height):
row_pixels = pixels[row_index * preview_width : (row_index + 1) * preview_width]
row = "".join(ascii_chars[min(len(ascii_chars) - 1, pixel * len(ascii_chars) // 256)] for pixel in row_pixels)
rows.append(row)
return "\n".join(rows)
@staticmethod
def _render_message_content(content: Any) -> object:
"""把消息内容转成适合 Rich 输出的 renderable。"""
@@ -236,9 +316,16 @@ class MaiSakaLLMService:
if isinstance(image_format, str) and isinstance(image_base64, str):
approx_size = max(0, len(image_base64) * 3 // 4)
size_text = f"{approx_size / 1024:.1f} KB" if approx_size >= 1024 else f"{approx_size} B"
preview_parts: list[object] = [
Text(f"image/{image_format} {size_text}\nbase64 omitted", style="magenta")
]
if config.TERMINAL_IMAGE_PREVIEW:
preview_text = MaiSakaLLMService._build_terminal_image_preview(image_base64)
if preview_text:
preview_parts.append(Text(preview_text, style="white"))
parts.append(
Panel(
Text(f"image/{image_format} {size_text}\nbase64 omitted", style="magenta"),
Group(*preview_parts),
border_style="magenta",
padding=(0, 1),
)
@@ -392,20 +479,29 @@ class MaiSakaLLMService:
padding=(0, 1),
)
)
logger.info(f"chat_loop_step prompt display finished ({len(built_messages)} messages, {len(all_tools)} tools)")
request_started_at = perf_counter()
logger.info("chat_loop_step calling planner model generate_response_with_message_async")
response, (reasoning, model, tool_calls) = await self._llm_chat.generate_response_with_message_async(
message_factory=message_factory,
tools=all_tools if all_tools else None,
temperature=self._temperature,
max_tokens=self._max_tokens,
)
elapsed = perf_counter() - request_started_at
logger.info(
f"chat_loop_step planner model returned in {elapsed:.2f}s "
f"(model={model}, tool_calls={len(tool_calls or [])}, response_len={len(response or '')})"
)
raw_message = build_message(
role=RoleType.Assistant.value,
content=response or "",
source="assistant",
tool_calls=tool_calls or None,
)
logger.info("chat_loop_step converted planner response into MaiMessage")
return ChatResponse(
content=response,

View File

@@ -12,8 +12,13 @@ from .connection import MCPConnection, MCP_AVAILABLE
# 内置工具名称集合 —— MCP 工具不允许与这些名称冲突
BUILTIN_TOOL_NAMES = frozenset(
{
"reply",
"no_reply",
"wait",
"stop",
"write_file",
"read_file",
"list_files",
"create_table",
"list_tables",
"view_table",

View File

@@ -2,13 +2,18 @@
MaiSaka message adapters built on top of the main project's MaiMessage model.
"""
from copy import deepcopy
from datetime import datetime
import re
from io import BytesIO
from typing import Optional
from uuid import uuid4
import base64
import re
from src.common.data_models.mai_message_data_model import MaiMessage, MessageInfo, UserInfo
from src.common.data_models.message_component_data_model import MessageSequence
from PIL import Image as PILImage
from src.common.data_models.mai_message_data_model import GroupInfo, MaiMessage, MessageInfo, UserInfo
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
from src.config.config import global_config
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.llm_models.payload_content.tool_option import ToolCall
@@ -22,7 +27,10 @@ SOURCE_KEY = "maisaka_source"
LLM_ROLE_KEY = "maisaka_llm_role"
TOOL_CALL_ID_KEY = "maisaka_tool_call_id"
TOOL_CALLS_KEY = "maisaka_tool_calls"
SPEAKER_PREFIX_PATTERN = re.compile(r"^\[(?P<speaker>[^\]]+)\](?P<content>.*)$", re.DOTALL)
SPEAKER_PREFIX_PATTERN = re.compile(
r"^(?:(?P<timestamp>\d{2}:\d{2}:\d{2}))?(?:<mid:(?P<message_id>[^>]+)>)?\[(?P<speaker>[^\]]+)\](?P<content>.*)$",
re.DOTALL,
)
def _build_user_info_for_role(role: str) -> UserInfo:
@@ -55,7 +63,7 @@ def _deserialize_tool_call(data: dict) -> ToolCall:
def build_message(
role: str,
content: str,
content: str = "",
*,
message_kind: str = "normal",
source: Optional[str] = None,
@@ -63,6 +71,12 @@ def build_message(
tool_calls: Optional[list[ToolCall]] = None,
timestamp: Optional[datetime] = None,
message_id: Optional[str] = None,
platform: str = MAISAKA_PLATFORM,
session_id: str = MAISAKA_SESSION_ID,
user_info: Optional[UserInfo] = None,
group_info: Optional[GroupInfo] = None,
raw_message: Optional[MessageSequence] = None,
display_text: Optional[str] = None,
) -> MaiMessage:
"""Build a MaiMessage for the Maisaka session history."""
resolved_timestamp = timestamp or datetime.now()
@@ -70,11 +84,11 @@ def build_message(
message = MaiMessage(
message_id=message_id or f"maisaka_{uuid4().hex}",
timestamp=resolved_timestamp,
platform=MAISAKA_PLATFORM,
platform=platform,
)
message.message_info = MessageInfo(
user_info=_build_user_info_for_role(resolved_role),
group_info=None,
user_info=user_info or _build_user_info_for_role(resolved_role),
group_info=group_info,
additional_config={
LLM_ROLE_KEY: resolved_role,
MESSAGE_KIND_KEY: message_kind,
@@ -83,18 +97,26 @@ def build_message(
TOOL_CALLS_KEY: [_serialize_tool_call(tool_call) for tool_call in (tool_calls or [])],
},
)
message.session_id = MAISAKA_SESSION_ID
message.raw_message = MessageSequence([])
if content:
message.session_id = session_id
message.raw_message = raw_message if raw_message is not None else MessageSequence([])
if raw_message is None and content:
message.raw_message.text(content)
message.processed_plain_text = content
message.display_message = content
visible_text = display_text if display_text is not None else content
message.processed_plain_text = visible_text
message.display_message = visible_text
return message
def format_speaker_content(speaker_name: str, content: str) -> str:
def format_speaker_content(
speaker_name: str,
content: str,
timestamp: Optional[datetime] = None,
message_id: Optional[str] = None,
) -> str:
"""Format visible conversation content with an explicit speaker label."""
return f"[{speaker_name}]{content}"
time_prefix = timestamp.strftime("%H:%M:%S") if timestamp is not None else ""
message_id_prefix = f"<mid:{message_id}>" if message_id else ""
return f"{time_prefix}{message_id_prefix}[{speaker_name}]{content}"
def parse_speaker_content(content: str) -> tuple[Optional[str], str]:
@@ -105,6 +127,39 @@ def parse_speaker_content(content: str) -> tuple[Optional[str], str]:
return match.group("speaker"), match.group("content")
def clone_message_sequence(message_sequence: MessageSequence) -> MessageSequence:
"""Create a detached copy of a message sequence."""
return MessageSequence([deepcopy(component) for component in message_sequence.components])
def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str:
"""Extract visible text from a message sequence without forcing image descriptions."""
parts: list[str] = []
for component in message_sequence.components:
if isinstance(component, TextComponent):
parts.append(SPEAKER_PREFIX_PATTERN.sub(r"\g<timestamp>[\g<speaker>]\g<content>", component.text))
continue
if isinstance(component, EmojiComponent):
parts.append("[表情包]")
continue
if isinstance(component, ImageComponent):
parts.append("[图片]")
return "".join(parts)
def _guess_image_format(image_bytes: bytes) -> Optional[str]:
if not image_bytes:
return None
try:
with PILImage.open(BytesIO(image_bytes)) as image:
return image.format.lower() if image.format else None
except Exception:
return None
def get_message_text(message: MaiMessage) -> str:
if message.processed_plain_text is not None:
return message.processed_plain_text
@@ -156,7 +211,6 @@ def remove_last_perception(messages: list[MaiMessage]) -> None:
def to_llm_message(message: MaiMessage) -> Optional[Message]:
role = get_message_role(message)
content = get_message_text(message)
tool_call_id = get_tool_call_id(message)
tool_calls = get_tool_calls(message)
@@ -176,6 +230,28 @@ def to_llm_message(message: MaiMessage) -> Optional[Message]:
builder.set_tool_calls(tool_calls)
if role_type == RoleType.Tool and tool_call_id:
builder.add_tool_call(tool_call_id)
if content:
builder.add_text_content(content)
has_content = False
for component in message.raw_message.components:
if isinstance(component, TextComponent):
if component.text:
builder.add_text_content(component.text)
has_content = True
continue
if isinstance(component, (ImageComponent, EmojiComponent)):
image_format = _guess_image_format(component.binary_data)
if image_format and component.binary_data:
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
has_content = True
continue
if component.content:
builder.add_text_content(component.content)
has_content = True
if not has_content:
content = get_message_text(message)
if content:
builder.add_text_content(content)
return builder.build()

View File

@@ -3,6 +3,7 @@ Maisaka runtime for non-CLI integrations.
"""
from datetime import datetime
from pathlib import Path
from typing import Optional
import asyncio
@@ -21,9 +22,15 @@ from .config import (
ENABLE_COGNITION_MODULE,
ENABLE_EMOTION_MODULE,
ENABLE_KNOWLEDGE_MODULE,
ENABLE_LIST_FILES,
ENABLE_MCP,
ENABLE_READ_FILE,
ENABLE_WRITE_FILE,
MERGE_USER_MESSAGES,
)
from .knowledge import retrieve_relevant_knowledge
from .llm_service import MaiSakaLLMService
from .mcp_client import MCPManager
from .message_adapter import (
build_message,
build_visible_text_from_sequence,
@@ -32,6 +39,7 @@ from .message_adapter import (
get_message_role,
remove_last_perception,
)
from .tool_handlers import handle_list_files, handle_mcp_tool, handle_read_file, handle_unknown_tool, handle_write_file
logger = get_logger("maisaka_runtime")
@@ -49,7 +57,9 @@ class MaisakaHeartFlowChatting:
self.log_prefix = f"[{session_name}]"
self._llm_service = MaiSakaLLMService(api_key="", base_url=None, model="")
self._chat_history: list[MaiMessage] = []
self._mcp_manager: Optional[MCPManager] = None
self._pending_messages: list[SessionMessage] = []
self._source_messages_by_id: dict[str, SessionMessage] = {}
self._running = False
self._loop_task: Optional[asyncio.Task] = None
self._loop_lock = asyncio.Lock()
@@ -66,6 +76,9 @@ class MaisakaHeartFlowChatting:
if self._running:
return
if ENABLE_MCP:
await self._init_mcp()
self._running = True
self._loop_task = asyncio.create_task(self._main_loop())
logger.info(f"{self.log_prefix} Maisaka runtime started")
@@ -87,6 +100,10 @@ class MaisakaHeartFlowChatting:
finally:
self._loop_task = None
if self._mcp_manager is not None:
await self._mcp_manager.close()
self._mcp_manager = None
logger.info(f"{self.log_prefix} Maisaka runtime stopped")
def adjust_talk_frequency(self, frequency: float) -> None:
@@ -96,6 +113,7 @@ class MaisakaHeartFlowChatting:
async def register_message(self, message: SessionMessage) -> None:
"""Queue a newly received message for Maisaka processing."""
self._pending_messages.append(message)
self._source_messages_by_id[message.message_id] = message
self._new_message_event.set()
async def _main_loop(self) -> None:
@@ -118,32 +136,60 @@ class MaisakaHeartFlowChatting:
self._pending_messages.clear()
return drained_messages
async def _ingest_messages(self, messages: list[SessionMessage]) -> None:
merged_sequence = await self._merge_messages(messages)
merged_content = build_visible_text_from_sequence(merged_sequence).strip()
if not merged_sequence.components:
async def _init_mcp(self) -> None:
"""Initialize MCP tools for the runtime and inject them into the planner."""
config_path = Path(__file__).with_name("mcp_config.json")
self._mcp_manager = await MCPManager.from_config(str(config_path))
if self._mcp_manager is None:
logger.info(f"{self.log_prefix} MCP not available for Maisaka runtime")
return
mcp_tools = self._mcp_manager.get_openai_tools()
if not mcp_tools:
logger.info(f"{self.log_prefix} MCP manager initialized without exposed tools")
return
self._llm_service.set_extra_tools(mcp_tools)
logger.info(
f"{self.log_prefix} Loaded {len(mcp_tools)} MCP tool(s) for Maisaka runtime:\n"
f"{self._mcp_manager.get_tool_summary()}"
)
async def _ingest_messages(self, messages: list[SessionMessage]) -> None:
if self._chat_start_time is None:
self._chat_start_time = messages[0].timestamp
self._last_user_input_time = messages[-1].timestamp
self._user_input_times.extend(message.timestamp for message in messages)
self._chat_history.append(
build_message(
role="user",
content=merged_content,
source="user",
timestamp=messages[-1].timestamp,
platform=messages[-1].platform,
session_id=self.session_id,
group_info=self._build_group_info(messages[-1]),
user_info=self._build_runtime_user_info(),
raw_message=merged_sequence,
display_text=merged_content,
if MERGE_USER_MESSAGES:
merged_sequence = await self._merge_messages(messages)
merged_content = build_visible_text_from_sequence(merged_sequence).strip()
if not merged_sequence.components:
return
self._chat_history.append(
build_message(
role="user",
content=merged_content,
source="user",
timestamp=messages[-1].timestamp,
platform=messages[-1].platform,
session_id=self.session_id,
group_info=self._build_group_info(messages[-1]),
user_info=self._build_runtime_user_info(),
raw_message=merged_sequence,
display_text=merged_content,
)
)
)
self._trim_chat_history()
self._trim_chat_history()
return
for message in messages:
history_message = await self._build_user_history_message(message)
if history_message is None:
continue
self._chat_history.append(history_message)
self._trim_chat_history()
async def _merge_messages(self, messages: list[SessionMessage]) -> MessageSequence:
merged_sequence = MessageSequence([])
@@ -151,7 +197,7 @@ class MaisakaHeartFlowChatting:
for message in messages:
user_info = message.message_info.user_info
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
prefix = format_speaker_content(speaker_name, "", message.timestamp)
prefix = format_speaker_content(speaker_name, "", message.timestamp, message.message_id)
merged_sequence.text(prefix)
appended_component = False
@@ -174,14 +220,69 @@ class MaisakaHeartFlowChatting:
return merged_sequence
async def _build_user_history_message(self, message: SessionMessage) -> Optional[MaiMessage]:
user_sequence = await self._build_message_sequence(message)
visible_text = build_visible_text_from_sequence(user_sequence).strip()
if not user_sequence.components:
return None
return build_message(
role="user",
content=visible_text,
source="user",
timestamp=message.timestamp,
platform=message.platform,
session_id=self.session_id,
group_info=self._build_group_info(message),
user_info=self._build_runtime_user_info(),
raw_message=user_sequence,
display_text=visible_text,
)
async def _build_message_sequence(self, message: SessionMessage) -> MessageSequence:
message_sequence = MessageSequence([])
user_info = message.message_info.user_info
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
message_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, message.message_id))
appended_component = False
if DIRECT_IMAGE_INPUT:
source_sequence = getattr(message, "maisaka_original_raw_message", message.raw_message)
else:
source_sequence = message.raw_message
for component in clone_message_sequence(source_sequence).components:
message_sequence.components.append(component)
appended_component = True
if not appended_component:
if not message.processed_plain_text:
await message.process()
content = (message.processed_plain_text or "").strip()
if content:
message_sequence.text(content)
return message_sequence
async def _run_internal_loop(self, anchor_message: SessionMessage) -> None:
last_had_tool_calls = True
for _ in range(self._max_internal_rounds):
for round_index in range(self._max_internal_rounds):
logger.info(
f"{self.log_prefix} Internal loop round {round_index + 1}/{self._max_internal_rounds} started "
f"(history={len(self._chat_history)})"
)
if last_had_tool_calls:
logger.info(f"{self.log_prefix} Building perception snapshot before planner call")
await self._append_perception_snapshot()
logger.info(f"{self.log_prefix} Perception snapshot step finished")
logger.info(f"{self.log_prefix} Calling Maisaka chat_loop_step")
response = await self._llm_service.chat_loop_step(self._chat_history)
logger.info(
f"{self.log_prefix} chat_loop_step returned "
f"(content_len={len(response.content or '')}, tool_calls={len(response.tool_calls)})"
)
response.raw_message.platform = anchor_message.platform
response.raw_message.session_id = self.session_id
response.raw_message.message_info.group_info = self._build_group_info(anchor_message)
@@ -189,16 +290,20 @@ class MaisakaHeartFlowChatting:
self._last_assistant_response_time = datetime.now()
if response.tool_calls:
logger.info(f"{self.log_prefix} Handling {len(response.tool_calls)} tool call(s)")
should_pause = await self._handle_tool_calls(response.tool_calls, response.content or "", anchor_message)
logger.info(f"{self.log_prefix} Tool handling finished (should_pause={should_pause})")
if should_pause:
return
last_had_tool_calls = True
continue
if response.content:
logger.info(f"{self.log_prefix} Planner returned content without tool calls; continuing inner loop")
last_had_tool_calls = False
continue
logger.info(f"{self.log_prefix} Planner returned empty content and no tool calls; leaving inner loop")
return
logger.info(f"{self.log_prefix} Maisaka internal loop reached max rounds and paused")
@@ -271,8 +376,10 @@ class MaisakaHeartFlowChatting:
) -> bool:
for tool_call in tool_calls:
if tool_call.func_name == "reply":
await self._handle_reply(tool_call, latest_thought, anchor_message)
return True
reply_sent = await self._handle_reply(tool_call, latest_thought, anchor_message)
if reply_sent:
return True
continue
if tool_call.func_name == "no_reply":
self._chat_history.append(
@@ -302,28 +409,53 @@ class MaisakaHeartFlowChatting:
)
return True
self._chat_history.append(
self._build_tool_message(
tool_call,
f"Unsupported runtime tool: {tool_call.func_name}",
)
)
if tool_call.func_name == "write_file" and ENABLE_WRITE_FILE:
await handle_write_file(tool_call, self._chat_history)
continue
if tool_call.func_name == "read_file" and ENABLE_READ_FILE:
await handle_read_file(tool_call, self._chat_history)
continue
if tool_call.func_name == "list_files" and ENABLE_LIST_FILES:
await handle_list_files(tool_call, self._chat_history)
continue
if self._mcp_manager and self._mcp_manager.is_mcp_tool(tool_call.func_name):
await handle_mcp_tool(tool_call, self._chat_history, self._mcp_manager)
continue
await handle_unknown_tool(tool_call, self._chat_history)
return False
async def _handle_reply(self, tool_call: ToolCall, latest_thought: str, anchor_message: SessionMessage) -> None:
async def _handle_reply(self, tool_call: ToolCall, latest_thought: str, anchor_message: SessionMessage) -> bool:
target_message_id = str((tool_call.args or {}).get("message_id", "")).strip()
if not target_message_id:
self._chat_history.append(
self._build_tool_message(tool_call, "reply requires a valid message_id argument.")
)
return False
target_message = self._source_messages_by_id.get(target_message_id)
if target_message is None:
self._chat_history.append(
self._build_tool_message(tool_call, f"reply target message_id not found: {target_message_id}")
)
return False
reply_text = await self._llm_service.generate_reply(latest_thought, self._chat_history)
sent = await send_service.text_to_stream(
text=reply_text,
stream_id=self.session_id,
set_reply=True,
reply_message=anchor_message,
reply_message=target_message,
typing=False,
)
tool_result = "Visible reply generated and sent." if sent else "Visible reply generation succeeded but send failed."
self._chat_history.append(self._build_tool_message(tool_call, tool_result))
if not sent:
return
return False
bot_name = global_config.bot.nickname.strip() or "MaiSaka"
self._chat_history.append(
@@ -331,12 +463,13 @@ class MaisakaHeartFlowChatting:
role="user",
content=format_speaker_content(bot_name, reply_text, datetime.now()),
source="guided_reply",
platform=anchor_message.platform,
platform=target_message.platform or anchor_message.platform,
session_id=self.session_id,
group_info=self._build_group_info(anchor_message),
group_info=self._build_group_info(target_message),
user_info=self._build_runtime_user_info(),
)
)
return True
def _build_tool_message(self, tool_call: ToolCall, content: str) -> MaiMessage:
return build_message(