feat:提供原生vlm支持
This commit is contained in:
@@ -20,6 +20,7 @@ dependencies = [
|
||||
"json-repair>=0.47.6",
|
||||
"maim-message>=0.6.2",
|
||||
"maibot-plugin-sdk>=2.0.0",
|
||||
"mcp",
|
||||
"msgpack>=1.1.2",
|
||||
"numpy>=2.2.6",
|
||||
"openai>=1.95.0",
|
||||
|
||||
@@ -10,6 +10,7 @@ jieba>=0.42.1
|
||||
json-repair>=0.47.6
|
||||
maim-message>=0.6.2
|
||||
maibot-plugin-sdk>=1.2.3,<2.0.0
|
||||
mcp
|
||||
msgpack>=1.1.2
|
||||
numpy>=2.2.6
|
||||
openai>=1.95.0
|
||||
@@ -29,4 +30,4 @@ structlog>=25.4.0
|
||||
tomlkit>=0.13.3
|
||||
typing-extensions
|
||||
uvicorn>=0.35.0
|
||||
watchfiles>=1.1.1
|
||||
watchfiles>=1.1.1
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from contextlib import suppress
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import os
|
||||
@@ -10,6 +11,7 @@ from src.chat.heart_flow.heartflow_message_processor import HeartFCMessageReceiv
|
||||
from src.common.logger import get_logger
|
||||
from src.common.utils.utils_message import MessageUtils
|
||||
from src.common.utils.utils_session import SessionUtils
|
||||
from src.config.config import global_config
|
||||
from src.platform_io.route_key_factory import RouteKeyFactory
|
||||
|
||||
# from src.chat.brain_chat.PFC.pfc_manager import PFCManager
|
||||
@@ -301,6 +303,8 @@ class ChatBot:
|
||||
# pass
|
||||
|
||||
# 处理消息内容,识别表情包等二进制数据并转化为文本描述
|
||||
if global_config.maisaka.take_over_hfc and global_config.maisaka.direct_image_input:
|
||||
message.maisaka_original_raw_message = deepcopy(message.raw_message) # type: ignore[attr-defined]
|
||||
await message.process()
|
||||
|
||||
# 平台层的 @ 检测由底层 is_mentioned_bot_in_message 统一处理;此处不做用户名硬编码匹配
|
||||
|
||||
@@ -56,7 +56,7 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config"
|
||||
BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
|
||||
MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
|
||||
MMC_VERSION: str = "1.0.0"
|
||||
CONFIG_VERSION: str = "8.1.7"
|
||||
CONFIG_VERSION: str = "8.1.10"
|
||||
MODEL_CONFIG_VERSION: str = "1.12.0"
|
||||
|
||||
logger = get_logger("config")
|
||||
|
||||
@@ -1609,6 +1609,34 @@ class MaiSakaConfig(ConfigBase):
|
||||
)
|
||||
"""是å¦å°†å›¾ç‰‡ç›´æŽ¥ä½œä¸ºå¤šæ¨¡æ€æ¶ˆæ¯ä¼ å…¥ Maisaka ä¸»å¾ªçŽ¯ï¼Œè€Œä¸æ˜¯ä»…使用转译文本"""
|
||||
|
||||
merge_user_messages: bool = Field(
|
||||
default=True,
|
||||
json_schema_extra={
|
||||
"x-widget": "switch",
|
||||
"x-icon": "merge",
|
||||
},
|
||||
)
|
||||
"""Whether Maisaka should merge newly received user utterances into a single user message per round"""
|
||||
|
||||
terminal_image_preview: bool = Field(
|
||||
default=False,
|
||||
json_schema_extra={
|
||||
"x-widget": "switch",
|
||||
"x-icon": "image",
|
||||
},
|
||||
)
|
||||
"""Whether Maisaka should render a low-resolution terminal preview for images in prompt display"""
|
||||
|
||||
terminal_image_preview_width: int = Field(
|
||||
default=24,
|
||||
ge=8,
|
||||
json_schema_extra={
|
||||
"x-widget": "input",
|
||||
"x-icon": "columns",
|
||||
},
|
||||
)
|
||||
"""Character width for Maisaka terminal image previews"""
|
||||
|
||||
take_over_hfc: bool = Field(
|
||||
default=False,
|
||||
json_schema_extra={
|
||||
|
||||
@@ -298,8 +298,17 @@ class LLMRequest:
|
||||
"""
|
||||
self._refresh_task_config()
|
||||
start_time = time.time()
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(
|
||||
f"LLMRequest[{self.request_type}] generate_response_with_message_async started "
|
||||
f"(temperature={temperature}, max_tokens={max_tokens}, tools={len(tools or [])})"
|
||||
)
|
||||
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(f"LLMRequest[{self.request_type}] building internal tool options from {len(tools or [])} tool(s)")
|
||||
tool_built = self._build_tool_options(tools)
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(f"LLMRequest[{self.request_type}] built {len(tool_built or [])} internal tool option(s)")
|
||||
|
||||
response, model_info = await self._execute_request(
|
||||
request_type=RequestType.RESPONSE,
|
||||
@@ -309,6 +318,11 @@ class LLMRequest:
|
||||
tool_options=tool_built,
|
||||
response_format=response_format,
|
||||
)
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(
|
||||
f"LLMRequest[{self.request_type}] generate_response_with_message_async finished "
|
||||
f"(model={model_info.name}, time_cost={time.time() - start_time:.2f}s)"
|
||||
)
|
||||
|
||||
time_cost = time.time() - start_time
|
||||
logger.debug(f"LLM请求总耗时: {time_cost}")
|
||||
@@ -676,12 +690,28 @@ class LLMRequest:
|
||||
|
||||
for _ in range(max_attempts):
|
||||
model_info, api_provider, client = self._select_model(exclude_models=failed_models_this_request)
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(
|
||||
f"LLMRequest[{self.request_type}] selected model={model_info.name} "
|
||||
f"provider={api_provider.name} request_type={request_type.value}"
|
||||
)
|
||||
|
||||
message_list = []
|
||||
if message_factory:
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(f"LLMRequest[{self.request_type}] building message list via message_factory")
|
||||
message_list = message_factory(client)
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(
|
||||
f"LLMRequest[{self.request_type}] message_factory returned {len(message_list)} message(s)"
|
||||
)
|
||||
|
||||
try:
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(
|
||||
f"LLMRequest[{self.request_type}] sending request to model={model_info.name} "
|
||||
f"with tool_options={len(tool_options or [])}"
|
||||
)
|
||||
response = await self._attempt_request_on_model(
|
||||
model_info,
|
||||
api_provider,
|
||||
@@ -697,6 +727,10 @@ class LLMRequest:
|
||||
embedding_input=embedding_input,
|
||||
audio_base64=audio_base64,
|
||||
)
|
||||
if self.request_type.startswith("maisaka_"):
|
||||
logger.info(
|
||||
f"LLMRequest[{self.request_type}] model={model_info.name} returned API response"
|
||||
)
|
||||
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
||||
if response_usage := response.usage:
|
||||
total_tokens += response_usage.total_tokens
|
||||
|
||||
@@ -27,7 +27,14 @@ def create_builtin_tools() -> List[ToolOption]:
|
||||
|
||||
reply_builder = ToolOptionBuilder()
|
||||
reply_builder.set_name("reply")
|
||||
reply_builder.set_description("Generate and emit a visible reply based on the current thought.")
|
||||
reply_builder.set_description("Generate and emit a visible reply based on the current thought. You must specify the target user message_id to reply to.")
|
||||
reply_builder.add_param(
|
||||
name="message_id",
|
||||
param_type=ToolParamType.STRING,
|
||||
description="The message_id of the specific user message that this reply should target.",
|
||||
required=True,
|
||||
enum_values=None,
|
||||
)
|
||||
tools.append(reply_builder.build())
|
||||
|
||||
no_reply_builder = ToolOptionBuilder()
|
||||
|
||||
@@ -28,6 +28,9 @@ SHOW_ANALYZE_COGNITION_PROMPT = global_config.maisaka.show_analyze_cognition_pro
|
||||
SHOW_THINKING = global_config.maisaka.show_thinking
|
||||
USER_NAME = global_config.maisaka.user_name.strip() or "用户"
|
||||
DIRECT_IMAGE_INPUT = global_config.maisaka.direct_image_input
|
||||
MERGE_USER_MESSAGES = global_config.maisaka.merge_user_messages
|
||||
TERMINAL_IMAGE_PREVIEW = global_config.maisaka.terminal_image_preview
|
||||
TERMINAL_IMAGE_PREVIEW_WIDTH = global_config.maisaka.terminal_image_preview_width
|
||||
TAKE_OVER_HFC = global_config.maisaka.take_over_hfc
|
||||
|
||||
|
||||
|
||||
@@ -3,13 +3,17 @@ MaiSaka LLM 服务 - 使用主项目 LLM 系统
|
||||
将主项目的 LLMRequest 适配为 MaiSaka 需要的接口
|
||||
"""
|
||||
|
||||
from base64 import b64decode
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from time import perf_counter
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import asyncio
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from PIL import Image as PILImage
|
||||
from rich.console import Group
|
||||
from rich.panel import Panel
|
||||
from rich.pretty import Pretty
|
||||
@@ -20,7 +24,7 @@ from src.common.logger import get_logger
|
||||
from src.common.prompt_i18n import load_prompt
|
||||
from src.config.config import config_manager, global_config
|
||||
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
|
||||
from src.llm_models.payload_content.tool_option import ToolCall, ToolOption
|
||||
from src.llm_models.payload_content.tool_option import ToolCall, ToolOption, ToolParamType
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
|
||||
from . import config
|
||||
@@ -167,7 +171,55 @@ class MaiSakaLLMService:
|
||||
|
||||
def set_extra_tools(self, tools: List[dict]) -> None:
|
||||
"""设置额外的工具定义(如 MCP 工具)"""
|
||||
self._extra_tools = list(tools)
|
||||
self._extra_tools = [self._normalize_extra_tool(tool) for tool in tools]
|
||||
logger.info(f"Normalized {len(self._extra_tools)} extra tool(s) for Maisaka")
|
||||
|
||||
@staticmethod
|
||||
def _json_type_to_tool_param_type(json_type: str) -> ToolParamType:
|
||||
normalized = (json_type or "").lower()
|
||||
if normalized == "integer":
|
||||
return ToolParamType.INTEGER
|
||||
if normalized == "number":
|
||||
return ToolParamType.FLOAT
|
||||
if normalized == "boolean":
|
||||
return ToolParamType.BOOLEAN
|
||||
return ToolParamType.STRING
|
||||
|
||||
@classmethod
|
||||
def _normalize_extra_tool(cls, tool: dict) -> dict:
|
||||
"""Normalize external/OpenAI-style tool definitions into the internal tool schema."""
|
||||
if "name" in tool and "description" in tool:
|
||||
return tool
|
||||
|
||||
if tool.get("type") != "function":
|
||||
return tool
|
||||
|
||||
function_info = tool.get("function", {})
|
||||
parameters_schema = function_info.get("parameters", {}) or {}
|
||||
required_names = set(parameters_schema.get("required", []) or [])
|
||||
properties = parameters_schema.get("properties", {}) or {}
|
||||
parameters: list[tuple[str, ToolParamType, str, bool, list[str] | None]] = []
|
||||
|
||||
for param_name, param_schema in properties.items():
|
||||
if not isinstance(param_schema, dict):
|
||||
continue
|
||||
enum_values = param_schema.get("enum")
|
||||
normalized_enum = [str(value) for value in enum_values] if isinstance(enum_values, list) else None
|
||||
parameters.append(
|
||||
(
|
||||
str(param_name),
|
||||
cls._json_type_to_tool_param_type(str(param_schema.get("type", "string"))),
|
||||
str(param_schema.get("description", "")),
|
||||
param_name in required_names,
|
||||
normalized_enum,
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"name": str(function_info.get("name", "")),
|
||||
"description": str(function_info.get("description", "")),
|
||||
"parameters": parameters,
|
||||
}
|
||||
|
||||
async def _ensure_prompts_loaded(self) -> None:
|
||||
"""异步懒加载提示词,避免在运行中的事件循环里同步渲染 prompt。"""
|
||||
@@ -219,6 +271,34 @@ class MaiSakaLLMService:
|
||||
return "bold white on magenta"
|
||||
return "bold white on bright_black"
|
||||
|
||||
@staticmethod
|
||||
def _build_terminal_image_preview(image_base64: str) -> Optional[str]:
|
||||
"""Build a low-resolution ASCII preview for terminals without inline-image support."""
|
||||
ascii_chars = " .:-=+*#%@"
|
||||
|
||||
try:
|
||||
image_bytes = b64decode(image_base64)
|
||||
with PILImage.open(BytesIO(image_bytes)) as image:
|
||||
grayscale = image.convert("L")
|
||||
width, height = grayscale.size
|
||||
if width <= 0 or height <= 0:
|
||||
return None
|
||||
|
||||
preview_width = max(8, int(config.TERMINAL_IMAGE_PREVIEW_WIDTH))
|
||||
preview_height = max(1, int(height * (preview_width / width) * 0.5))
|
||||
resized = grayscale.resize((preview_width, preview_height))
|
||||
pixels = list(resized.getdata())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
rows: list[str] = []
|
||||
for row_index in range(preview_height):
|
||||
row_pixels = pixels[row_index * preview_width : (row_index + 1) * preview_width]
|
||||
row = "".join(ascii_chars[min(len(ascii_chars) - 1, pixel * len(ascii_chars) // 256)] for pixel in row_pixels)
|
||||
rows.append(row)
|
||||
|
||||
return "\n".join(rows)
|
||||
|
||||
@staticmethod
|
||||
def _render_message_content(content: Any) -> object:
|
||||
"""把消息内容转成适合 Rich 输出的 renderable。"""
|
||||
@@ -236,9 +316,16 @@ class MaiSakaLLMService:
|
||||
if isinstance(image_format, str) and isinstance(image_base64, str):
|
||||
approx_size = max(0, len(image_base64) * 3 // 4)
|
||||
size_text = f"{approx_size / 1024:.1f} KB" if approx_size >= 1024 else f"{approx_size} B"
|
||||
preview_parts: list[object] = [
|
||||
Text(f"image/{image_format} {size_text}\nbase64 omitted", style="magenta")
|
||||
]
|
||||
if config.TERMINAL_IMAGE_PREVIEW:
|
||||
preview_text = MaiSakaLLMService._build_terminal_image_preview(image_base64)
|
||||
if preview_text:
|
||||
preview_parts.append(Text(preview_text, style="white"))
|
||||
parts.append(
|
||||
Panel(
|
||||
Text(f"image/{image_format} {size_text}\nbase64 omitted", style="magenta"),
|
||||
Group(*preview_parts),
|
||||
border_style="magenta",
|
||||
padding=(0, 1),
|
||||
)
|
||||
@@ -392,20 +479,29 @@ class MaiSakaLLMService:
|
||||
padding=(0, 1),
|
||||
)
|
||||
)
|
||||
logger.info(f"chat_loop_step prompt display finished ({len(built_messages)} messages, {len(all_tools)} tools)")
|
||||
|
||||
|
||||
request_started_at = perf_counter()
|
||||
logger.info("chat_loop_step calling planner model generate_response_with_message_async")
|
||||
response, (reasoning, model, tool_calls) = await self._llm_chat.generate_response_with_message_async(
|
||||
message_factory=message_factory,
|
||||
tools=all_tools if all_tools else None,
|
||||
temperature=self._temperature,
|
||||
max_tokens=self._max_tokens,
|
||||
)
|
||||
elapsed = perf_counter() - request_started_at
|
||||
logger.info(
|
||||
f"chat_loop_step planner model returned in {elapsed:.2f}s "
|
||||
f"(model={model}, tool_calls={len(tool_calls or [])}, response_len={len(response or '')})"
|
||||
)
|
||||
raw_message = build_message(
|
||||
role=RoleType.Assistant.value,
|
||||
content=response or "",
|
||||
source="assistant",
|
||||
tool_calls=tool_calls or None,
|
||||
)
|
||||
logger.info("chat_loop_step converted planner response into MaiMessage")
|
||||
|
||||
return ChatResponse(
|
||||
content=response,
|
||||
|
||||
@@ -12,8 +12,13 @@ from .connection import MCPConnection, MCP_AVAILABLE
|
||||
# 内置工具名称集合 —— MCP 工具不允许与这些名称冲突
|
||||
BUILTIN_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"reply",
|
||||
"no_reply",
|
||||
"wait",
|
||||
"stop",
|
||||
"write_file",
|
||||
"read_file",
|
||||
"list_files",
|
||||
"create_table",
|
||||
"list_tables",
|
||||
"view_table",
|
||||
|
||||
@@ -2,13 +2,18 @@
|
||||
MaiSaka message adapters built on top of the main project's MaiMessage model.
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
import re
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
import base64
|
||||
import re
|
||||
|
||||
from src.common.data_models.mai_message_data_model import MaiMessage, MessageInfo, UserInfo
|
||||
from src.common.data_models.message_component_data_model import MessageSequence
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from src.common.data_models.mai_message_data_model import GroupInfo, MaiMessage, MessageInfo, UserInfo
|
||||
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
|
||||
from src.config.config import global_config
|
||||
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
|
||||
from src.llm_models.payload_content.tool_option import ToolCall
|
||||
@@ -22,7 +27,10 @@ SOURCE_KEY = "maisaka_source"
|
||||
LLM_ROLE_KEY = "maisaka_llm_role"
|
||||
TOOL_CALL_ID_KEY = "maisaka_tool_call_id"
|
||||
TOOL_CALLS_KEY = "maisaka_tool_calls"
|
||||
SPEAKER_PREFIX_PATTERN = re.compile(r"^\[(?P<speaker>[^\]]+)\](?P<content>.*)$", re.DOTALL)
|
||||
SPEAKER_PREFIX_PATTERN = re.compile(
|
||||
r"^(?:(?P<timestamp>\d{2}:\d{2}:\d{2}))?(?:<mid:(?P<message_id>[^>]+)>)?\[(?P<speaker>[^\]]+)\](?P<content>.*)$",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def _build_user_info_for_role(role: str) -> UserInfo:
|
||||
@@ -55,7 +63,7 @@ def _deserialize_tool_call(data: dict) -> ToolCall:
|
||||
|
||||
def build_message(
|
||||
role: str,
|
||||
content: str,
|
||||
content: str = "",
|
||||
*,
|
||||
message_kind: str = "normal",
|
||||
source: Optional[str] = None,
|
||||
@@ -63,6 +71,12 @@ def build_message(
|
||||
tool_calls: Optional[list[ToolCall]] = None,
|
||||
timestamp: Optional[datetime] = None,
|
||||
message_id: Optional[str] = None,
|
||||
platform: str = MAISAKA_PLATFORM,
|
||||
session_id: str = MAISAKA_SESSION_ID,
|
||||
user_info: Optional[UserInfo] = None,
|
||||
group_info: Optional[GroupInfo] = None,
|
||||
raw_message: Optional[MessageSequence] = None,
|
||||
display_text: Optional[str] = None,
|
||||
) -> MaiMessage:
|
||||
"""Build a MaiMessage for the Maisaka session history."""
|
||||
resolved_timestamp = timestamp or datetime.now()
|
||||
@@ -70,11 +84,11 @@ def build_message(
|
||||
message = MaiMessage(
|
||||
message_id=message_id or f"maisaka_{uuid4().hex}",
|
||||
timestamp=resolved_timestamp,
|
||||
platform=MAISAKA_PLATFORM,
|
||||
platform=platform,
|
||||
)
|
||||
message.message_info = MessageInfo(
|
||||
user_info=_build_user_info_for_role(resolved_role),
|
||||
group_info=None,
|
||||
user_info=user_info or _build_user_info_for_role(resolved_role),
|
||||
group_info=group_info,
|
||||
additional_config={
|
||||
LLM_ROLE_KEY: resolved_role,
|
||||
MESSAGE_KIND_KEY: message_kind,
|
||||
@@ -83,18 +97,26 @@ def build_message(
|
||||
TOOL_CALLS_KEY: [_serialize_tool_call(tool_call) for tool_call in (tool_calls or [])],
|
||||
},
|
||||
)
|
||||
message.session_id = MAISAKA_SESSION_ID
|
||||
message.raw_message = MessageSequence([])
|
||||
if content:
|
||||
message.session_id = session_id
|
||||
message.raw_message = raw_message if raw_message is not None else MessageSequence([])
|
||||
if raw_message is None and content:
|
||||
message.raw_message.text(content)
|
||||
message.processed_plain_text = content
|
||||
message.display_message = content
|
||||
visible_text = display_text if display_text is not None else content
|
||||
message.processed_plain_text = visible_text
|
||||
message.display_message = visible_text
|
||||
return message
|
||||
|
||||
|
||||
def format_speaker_content(speaker_name: str, content: str) -> str:
|
||||
def format_speaker_content(
|
||||
speaker_name: str,
|
||||
content: str,
|
||||
timestamp: Optional[datetime] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Format visible conversation content with an explicit speaker label."""
|
||||
return f"[{speaker_name}]{content}"
|
||||
time_prefix = timestamp.strftime("%H:%M:%S") if timestamp is not None else ""
|
||||
message_id_prefix = f"<mid:{message_id}>" if message_id else ""
|
||||
return f"{time_prefix}{message_id_prefix}[{speaker_name}]{content}"
|
||||
|
||||
|
||||
def parse_speaker_content(content: str) -> tuple[Optional[str], str]:
|
||||
@@ -105,6 +127,39 @@ def parse_speaker_content(content: str) -> tuple[Optional[str], str]:
|
||||
return match.group("speaker"), match.group("content")
|
||||
|
||||
|
||||
def clone_message_sequence(message_sequence: MessageSequence) -> MessageSequence:
|
||||
"""Create a detached copy of a message sequence."""
|
||||
return MessageSequence([deepcopy(component) for component in message_sequence.components])
|
||||
|
||||
|
||||
def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str:
|
||||
"""Extract visible text from a message sequence without forcing image descriptions."""
|
||||
parts: list[str] = []
|
||||
for component in message_sequence.components:
|
||||
if isinstance(component, TextComponent):
|
||||
parts.append(SPEAKER_PREFIX_PATTERN.sub(r"\g<timestamp>[\g<speaker>]\g<content>", component.text))
|
||||
continue
|
||||
|
||||
if isinstance(component, EmojiComponent):
|
||||
parts.append("[表情包]")
|
||||
continue
|
||||
|
||||
if isinstance(component, ImageComponent):
|
||||
parts.append("[图片]")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _guess_image_format(image_bytes: bytes) -> Optional[str]:
|
||||
if not image_bytes:
|
||||
return None
|
||||
|
||||
try:
|
||||
with PILImage.open(BytesIO(image_bytes)) as image:
|
||||
return image.format.lower() if image.format else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_message_text(message: MaiMessage) -> str:
|
||||
if message.processed_plain_text is not None:
|
||||
return message.processed_plain_text
|
||||
@@ -156,7 +211,6 @@ def remove_last_perception(messages: list[MaiMessage]) -> None:
|
||||
|
||||
def to_llm_message(message: MaiMessage) -> Optional[Message]:
|
||||
role = get_message_role(message)
|
||||
content = get_message_text(message)
|
||||
tool_call_id = get_tool_call_id(message)
|
||||
tool_calls = get_tool_calls(message)
|
||||
|
||||
@@ -176,6 +230,28 @@ def to_llm_message(message: MaiMessage) -> Optional[Message]:
|
||||
builder.set_tool_calls(tool_calls)
|
||||
if role_type == RoleType.Tool and tool_call_id:
|
||||
builder.add_tool_call(tool_call_id)
|
||||
if content:
|
||||
builder.add_text_content(content)
|
||||
|
||||
has_content = False
|
||||
for component in message.raw_message.components:
|
||||
if isinstance(component, TextComponent):
|
||||
if component.text:
|
||||
builder.add_text_content(component.text)
|
||||
has_content = True
|
||||
continue
|
||||
|
||||
if isinstance(component, (ImageComponent, EmojiComponent)):
|
||||
image_format = _guess_image_format(component.binary_data)
|
||||
if image_format and component.binary_data:
|
||||
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
|
||||
has_content = True
|
||||
continue
|
||||
|
||||
if component.content:
|
||||
builder.add_text_content(component.content)
|
||||
has_content = True
|
||||
|
||||
if not has_content:
|
||||
content = get_message_text(message)
|
||||
if content:
|
||||
builder.add_text_content(content)
|
||||
return builder.build()
|
||||
|
||||
@@ -3,6 +3,7 @@ Maisaka runtime for non-CLI integrations.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import asyncio
|
||||
@@ -21,9 +22,15 @@ from .config import (
|
||||
ENABLE_COGNITION_MODULE,
|
||||
ENABLE_EMOTION_MODULE,
|
||||
ENABLE_KNOWLEDGE_MODULE,
|
||||
ENABLE_LIST_FILES,
|
||||
ENABLE_MCP,
|
||||
ENABLE_READ_FILE,
|
||||
ENABLE_WRITE_FILE,
|
||||
MERGE_USER_MESSAGES,
|
||||
)
|
||||
from .knowledge import retrieve_relevant_knowledge
|
||||
from .llm_service import MaiSakaLLMService
|
||||
from .mcp_client import MCPManager
|
||||
from .message_adapter import (
|
||||
build_message,
|
||||
build_visible_text_from_sequence,
|
||||
@@ -32,6 +39,7 @@ from .message_adapter import (
|
||||
get_message_role,
|
||||
remove_last_perception,
|
||||
)
|
||||
from .tool_handlers import handle_list_files, handle_mcp_tool, handle_read_file, handle_unknown_tool, handle_write_file
|
||||
|
||||
logger = get_logger("maisaka_runtime")
|
||||
|
||||
@@ -49,7 +57,9 @@ class MaisakaHeartFlowChatting:
|
||||
self.log_prefix = f"[{session_name}]"
|
||||
self._llm_service = MaiSakaLLMService(api_key="", base_url=None, model="")
|
||||
self._chat_history: list[MaiMessage] = []
|
||||
self._mcp_manager: Optional[MCPManager] = None
|
||||
self._pending_messages: list[SessionMessage] = []
|
||||
self._source_messages_by_id: dict[str, SessionMessage] = {}
|
||||
self._running = False
|
||||
self._loop_task: Optional[asyncio.Task] = None
|
||||
self._loop_lock = asyncio.Lock()
|
||||
@@ -66,6 +76,9 @@ class MaisakaHeartFlowChatting:
|
||||
if self._running:
|
||||
return
|
||||
|
||||
if ENABLE_MCP:
|
||||
await self._init_mcp()
|
||||
|
||||
self._running = True
|
||||
self._loop_task = asyncio.create_task(self._main_loop())
|
||||
logger.info(f"{self.log_prefix} Maisaka runtime started")
|
||||
@@ -87,6 +100,10 @@ class MaisakaHeartFlowChatting:
|
||||
finally:
|
||||
self._loop_task = None
|
||||
|
||||
if self._mcp_manager is not None:
|
||||
await self._mcp_manager.close()
|
||||
self._mcp_manager = None
|
||||
|
||||
logger.info(f"{self.log_prefix} Maisaka runtime stopped")
|
||||
|
||||
def adjust_talk_frequency(self, frequency: float) -> None:
|
||||
@@ -96,6 +113,7 @@ class MaisakaHeartFlowChatting:
|
||||
async def register_message(self, message: SessionMessage) -> None:
|
||||
"""Queue a newly received message for Maisaka processing."""
|
||||
self._pending_messages.append(message)
|
||||
self._source_messages_by_id[message.message_id] = message
|
||||
self._new_message_event.set()
|
||||
|
||||
async def _main_loop(self) -> None:
|
||||
@@ -118,32 +136,60 @@ class MaisakaHeartFlowChatting:
|
||||
self._pending_messages.clear()
|
||||
return drained_messages
|
||||
|
||||
async def _ingest_messages(self, messages: list[SessionMessage]) -> None:
|
||||
merged_sequence = await self._merge_messages(messages)
|
||||
merged_content = build_visible_text_from_sequence(merged_sequence).strip()
|
||||
if not merged_sequence.components:
|
||||
async def _init_mcp(self) -> None:
|
||||
"""Initialize MCP tools for the runtime and inject them into the planner."""
|
||||
config_path = Path(__file__).with_name("mcp_config.json")
|
||||
self._mcp_manager = await MCPManager.from_config(str(config_path))
|
||||
if self._mcp_manager is None:
|
||||
logger.info(f"{self.log_prefix} MCP not available for Maisaka runtime")
|
||||
return
|
||||
|
||||
mcp_tools = self._mcp_manager.get_openai_tools()
|
||||
if not mcp_tools:
|
||||
logger.info(f"{self.log_prefix} MCP manager initialized without exposed tools")
|
||||
return
|
||||
|
||||
self._llm_service.set_extra_tools(mcp_tools)
|
||||
logger.info(
|
||||
f"{self.log_prefix} Loaded {len(mcp_tools)} MCP tool(s) for Maisaka runtime:\n"
|
||||
f"{self._mcp_manager.get_tool_summary()}"
|
||||
)
|
||||
|
||||
async def _ingest_messages(self, messages: list[SessionMessage]) -> None:
|
||||
if self._chat_start_time is None:
|
||||
self._chat_start_time = messages[0].timestamp
|
||||
|
||||
self._last_user_input_time = messages[-1].timestamp
|
||||
self._user_input_times.extend(message.timestamp for message in messages)
|
||||
self._chat_history.append(
|
||||
build_message(
|
||||
role="user",
|
||||
content=merged_content,
|
||||
source="user",
|
||||
timestamp=messages[-1].timestamp,
|
||||
platform=messages[-1].platform,
|
||||
session_id=self.session_id,
|
||||
group_info=self._build_group_info(messages[-1]),
|
||||
user_info=self._build_runtime_user_info(),
|
||||
raw_message=merged_sequence,
|
||||
display_text=merged_content,
|
||||
if MERGE_USER_MESSAGES:
|
||||
merged_sequence = await self._merge_messages(messages)
|
||||
merged_content = build_visible_text_from_sequence(merged_sequence).strip()
|
||||
if not merged_sequence.components:
|
||||
return
|
||||
|
||||
self._chat_history.append(
|
||||
build_message(
|
||||
role="user",
|
||||
content=merged_content,
|
||||
source="user",
|
||||
timestamp=messages[-1].timestamp,
|
||||
platform=messages[-1].platform,
|
||||
session_id=self.session_id,
|
||||
group_info=self._build_group_info(messages[-1]),
|
||||
user_info=self._build_runtime_user_info(),
|
||||
raw_message=merged_sequence,
|
||||
display_text=merged_content,
|
||||
)
|
||||
)
|
||||
)
|
||||
self._trim_chat_history()
|
||||
self._trim_chat_history()
|
||||
return
|
||||
|
||||
for message in messages:
|
||||
history_message = await self._build_user_history_message(message)
|
||||
if history_message is None:
|
||||
continue
|
||||
self._chat_history.append(history_message)
|
||||
self._trim_chat_history()
|
||||
|
||||
async def _merge_messages(self, messages: list[SessionMessage]) -> MessageSequence:
|
||||
merged_sequence = MessageSequence([])
|
||||
@@ -151,7 +197,7 @@ class MaisakaHeartFlowChatting:
|
||||
for message in messages:
|
||||
user_info = message.message_info.user_info
|
||||
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
|
||||
prefix = format_speaker_content(speaker_name, "", message.timestamp)
|
||||
prefix = format_speaker_content(speaker_name, "", message.timestamp, message.message_id)
|
||||
merged_sequence.text(prefix)
|
||||
|
||||
appended_component = False
|
||||
@@ -174,14 +220,69 @@ class MaisakaHeartFlowChatting:
|
||||
|
||||
return merged_sequence
|
||||
|
||||
async def _build_user_history_message(self, message: SessionMessage) -> Optional[MaiMessage]:
|
||||
user_sequence = await self._build_message_sequence(message)
|
||||
visible_text = build_visible_text_from_sequence(user_sequence).strip()
|
||||
if not user_sequence.components:
|
||||
return None
|
||||
|
||||
return build_message(
|
||||
role="user",
|
||||
content=visible_text,
|
||||
source="user",
|
||||
timestamp=message.timestamp,
|
||||
platform=message.platform,
|
||||
session_id=self.session_id,
|
||||
group_info=self._build_group_info(message),
|
||||
user_info=self._build_runtime_user_info(),
|
||||
raw_message=user_sequence,
|
||||
display_text=visible_text,
|
||||
)
|
||||
|
||||
async def _build_message_sequence(self, message: SessionMessage) -> MessageSequence:
|
||||
message_sequence = MessageSequence([])
|
||||
user_info = message.message_info.user_info
|
||||
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
|
||||
message_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, message.message_id))
|
||||
|
||||
appended_component = False
|
||||
if DIRECT_IMAGE_INPUT:
|
||||
source_sequence = getattr(message, "maisaka_original_raw_message", message.raw_message)
|
||||
else:
|
||||
source_sequence = message.raw_message
|
||||
|
||||
for component in clone_message_sequence(source_sequence).components:
|
||||
message_sequence.components.append(component)
|
||||
appended_component = True
|
||||
|
||||
if not appended_component:
|
||||
if not message.processed_plain_text:
|
||||
await message.process()
|
||||
content = (message.processed_plain_text or "").strip()
|
||||
if content:
|
||||
message_sequence.text(content)
|
||||
|
||||
return message_sequence
|
||||
|
||||
async def _run_internal_loop(self, anchor_message: SessionMessage) -> None:
|
||||
last_had_tool_calls = True
|
||||
|
||||
for _ in range(self._max_internal_rounds):
|
||||
for round_index in range(self._max_internal_rounds):
|
||||
logger.info(
|
||||
f"{self.log_prefix} Internal loop round {round_index + 1}/{self._max_internal_rounds} started "
|
||||
f"(history={len(self._chat_history)})"
|
||||
)
|
||||
if last_had_tool_calls:
|
||||
logger.info(f"{self.log_prefix} Building perception snapshot before planner call")
|
||||
await self._append_perception_snapshot()
|
||||
logger.info(f"{self.log_prefix} Perception snapshot step finished")
|
||||
|
||||
logger.info(f"{self.log_prefix} Calling Maisaka chat_loop_step")
|
||||
response = await self._llm_service.chat_loop_step(self._chat_history)
|
||||
logger.info(
|
||||
f"{self.log_prefix} chat_loop_step returned "
|
||||
f"(content_len={len(response.content or '')}, tool_calls={len(response.tool_calls)})"
|
||||
)
|
||||
response.raw_message.platform = anchor_message.platform
|
||||
response.raw_message.session_id = self.session_id
|
||||
response.raw_message.message_info.group_info = self._build_group_info(anchor_message)
|
||||
@@ -189,16 +290,20 @@ class MaisakaHeartFlowChatting:
|
||||
self._last_assistant_response_time = datetime.now()
|
||||
|
||||
if response.tool_calls:
|
||||
logger.info(f"{self.log_prefix} Handling {len(response.tool_calls)} tool call(s)")
|
||||
should_pause = await self._handle_tool_calls(response.tool_calls, response.content or "", anchor_message)
|
||||
logger.info(f"{self.log_prefix} Tool handling finished (should_pause={should_pause})")
|
||||
if should_pause:
|
||||
return
|
||||
last_had_tool_calls = True
|
||||
continue
|
||||
|
||||
if response.content:
|
||||
logger.info(f"{self.log_prefix} Planner returned content without tool calls; continuing inner loop")
|
||||
last_had_tool_calls = False
|
||||
continue
|
||||
|
||||
logger.info(f"{self.log_prefix} Planner returned empty content and no tool calls; leaving inner loop")
|
||||
return
|
||||
|
||||
logger.info(f"{self.log_prefix} Maisaka internal loop reached max rounds and paused")
|
||||
@@ -271,8 +376,10 @@ class MaisakaHeartFlowChatting:
|
||||
) -> bool:
|
||||
for tool_call in tool_calls:
|
||||
if tool_call.func_name == "reply":
|
||||
await self._handle_reply(tool_call, latest_thought, anchor_message)
|
||||
return True
|
||||
reply_sent = await self._handle_reply(tool_call, latest_thought, anchor_message)
|
||||
if reply_sent:
|
||||
return True
|
||||
continue
|
||||
|
||||
if tool_call.func_name == "no_reply":
|
||||
self._chat_history.append(
|
||||
@@ -302,28 +409,53 @@ class MaisakaHeartFlowChatting:
|
||||
)
|
||||
return True
|
||||
|
||||
self._chat_history.append(
|
||||
self._build_tool_message(
|
||||
tool_call,
|
||||
f"Unsupported runtime tool: {tool_call.func_name}",
|
||||
)
|
||||
)
|
||||
if tool_call.func_name == "write_file" and ENABLE_WRITE_FILE:
|
||||
await handle_write_file(tool_call, self._chat_history)
|
||||
continue
|
||||
|
||||
if tool_call.func_name == "read_file" and ENABLE_READ_FILE:
|
||||
await handle_read_file(tool_call, self._chat_history)
|
||||
continue
|
||||
|
||||
if tool_call.func_name == "list_files" and ENABLE_LIST_FILES:
|
||||
await handle_list_files(tool_call, self._chat_history)
|
||||
continue
|
||||
|
||||
if self._mcp_manager and self._mcp_manager.is_mcp_tool(tool_call.func_name):
|
||||
await handle_mcp_tool(tool_call, self._chat_history, self._mcp_manager)
|
||||
continue
|
||||
|
||||
await handle_unknown_tool(tool_call, self._chat_history)
|
||||
|
||||
return False
|
||||
|
||||
async def _handle_reply(self, tool_call: ToolCall, latest_thought: str, anchor_message: SessionMessage) -> None:
|
||||
async def _handle_reply(self, tool_call: ToolCall, latest_thought: str, anchor_message: SessionMessage) -> bool:
|
||||
target_message_id = str((tool_call.args or {}).get("message_id", "")).strip()
|
||||
if not target_message_id:
|
||||
self._chat_history.append(
|
||||
self._build_tool_message(tool_call, "reply requires a valid message_id argument.")
|
||||
)
|
||||
return False
|
||||
|
||||
target_message = self._source_messages_by_id.get(target_message_id)
|
||||
if target_message is None:
|
||||
self._chat_history.append(
|
||||
self._build_tool_message(tool_call, f"reply target message_id not found: {target_message_id}")
|
||||
)
|
||||
return False
|
||||
|
||||
reply_text = await self._llm_service.generate_reply(latest_thought, self._chat_history)
|
||||
sent = await send_service.text_to_stream(
|
||||
text=reply_text,
|
||||
stream_id=self.session_id,
|
||||
set_reply=True,
|
||||
reply_message=anchor_message,
|
||||
reply_message=target_message,
|
||||
typing=False,
|
||||
)
|
||||
tool_result = "Visible reply generated and sent." if sent else "Visible reply generation succeeded but send failed."
|
||||
self._chat_history.append(self._build_tool_message(tool_call, tool_result))
|
||||
if not sent:
|
||||
return
|
||||
return False
|
||||
|
||||
bot_name = global_config.bot.nickname.strip() or "MaiSaka"
|
||||
self._chat_history.append(
|
||||
@@ -331,12 +463,13 @@ class MaisakaHeartFlowChatting:
|
||||
role="user",
|
||||
content=format_speaker_content(bot_name, reply_text, datetime.now()),
|
||||
source="guided_reply",
|
||||
platform=anchor_message.platform,
|
||||
platform=target_message.platform or anchor_message.platform,
|
||||
session_id=self.session_id,
|
||||
group_info=self._build_group_info(anchor_message),
|
||||
group_info=self._build_group_info(target_message),
|
||||
user_info=self._build_runtime_user_info(),
|
||||
)
|
||||
)
|
||||
return True
|
||||
|
||||
def _build_tool_message(self, tool_call: ToolCall, content: str) -> MaiMessage:
|
||||
return build_message(
|
||||
|
||||
Reference in New Issue
Block a user