mai-bot/src/maisaka/builtin_tool/send_emoji.py

"""send_emoji 内置工具。"""

from datetime import datetime
from io import BytesIO
from json import dumps
from random import sample
from typing import Any, Dict, Optional

import asyncio
import math

from PIL import Image as PILImage
from PIL import ImageDraw, ImageFont
from pydantic import BaseModel, Field as PydanticField

from src.emoji_system.emoji_manager import _is_vlm_task_configured, emoji_manager
from src.emoji_system.maisaka_tool import send_emoji_for_maisaka
from src.common.data_models.image_data_model import MaiEmoji
from src.common.data_models.message_component_data_model import ImageComponent, MessageSequence, TextComponent
from src.common.logger import get_logger
from src.config.config import config_manager, global_config
from src.core.tooling import ToolExecutionContext, ToolExecutionResult, ToolInvocation, ToolSpec
from src.llm_models.payload_content.message import MessageBuilder, RoleType
from src.maisaka.context_messages import (
    LLMContextMessage,
    ReferenceMessage,
    ReferenceMessageType,
    SessionBackedMessage,
)
from src.plugin_runtime.hook_payloads import serialize_prompt_messages

from .context import BuiltinToolRuntimeContext

logger = get_logger("maisaka_builtin_send_emoji")

_EMOJI_SUB_AGENT_CONTEXT_LIMIT = 12
_EMOJI_SUB_AGENT_MAX_TOKENS = 240
_EMOJI_MAX_CANDIDATE_COUNT = 64
_EMOJI_CANDIDATE_TILE_SIZE = 256
_EMOJI_SUCCESS_MESSAGE = "表情包发送成功"
_EMOJI_VLM_NOT_CONFIGURED_MESSAGE = "错误，没有配置视觉模型，无法使用表情包功能"


class EmojiSelectionResult(BaseModel):
    """表情包子代理的结构化选择结果。"""

    emoji_index: int = PydanticField(default=1, description="选中的表情包序号，从 1 开始计数。")
    reason: str = PydanticField(default="", description="选择这张表情包的简短理由。")


def get_tool_spec() -> ToolSpec:
    """获取 send_emoji 工具声明。"""

    return ToolSpec(
        name="send_emoji",
        brief_description="发送一个合适的表情包来辅助表达情绪。",
        detailed_description="无需参数，直接发送一个合适的表情包。",
        parameters_schema={
            "type": "object",
            "properties": {},
        },
        provider_name="maisaka_builtin",
        provider_type="builtin",
    )


async def _load_emoji_bytes(emoji: MaiEmoji) -> bytes:
    """读取单个表情包图片字节。"""

    return await asyncio.to_thread(emoji.full_path.read_bytes)


def _get_emoji_candidate_count() -> int:
    """获取本次表情包候选数量配置。"""

    configured_count = int(getattr(global_config.emoji, "emoji_send_num", 25))
    return max(1, min(configured_count, _EMOJI_MAX_CANDIDATE_COUNT))


def _calculate_grid_shape(candidate_count: int) -> tuple[int, int]:
    """根据候选数量计算尽量接近矩形的拼图行列数。"""

    if candidate_count <= 0:
        return 1, 1

    best_columns = candidate_count
    best_rows = 1
    best_score: tuple[int, int] | None = None

    for columns in range(1, candidate_count + 1):
        rows = math.ceil(candidate_count / columns)
        empty_slots = rows * columns - candidate_count
        aspect_gap = abs(columns - rows)
        score = (aspect_gap, empty_slots)
        if best_score is None or score < best_score:
            best_score = score
            best_columns = columns
            best_rows = rows

    return best_rows, best_columns


def _build_placeholder_tile(label: str, tile_size: int) -> PILImage.Image:
    """构建图片读取失败时使用的占位图。"""

    tile = PILImage.new("RGB", (tile_size, tile_size), color=(245, 245, 245))
    draw = ImageDraw.Draw(tile)
    font = ImageFont.load_default()
    text_bbox = draw.textbbox((0, 0), label, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    draw.text(
        ((tile_size - text_width) / 2, (tile_size - text_height) / 2),
        label,
        fill=(80, 80, 80),
        font=font,
    )
    return tile


def _build_labeled_tile(image_bytes: bytes, index: int, tile_size: int) -> PILImage.Image:
    """构建带序号角标的候选图片块。"""

    try:
        with PILImage.open(BytesIO(image_bytes)) as raw_image:
            image = raw_image.convert("RGBA")
    except Exception:
        return _build_placeholder_tile(str(index), tile_size)

    image.thumbnail((tile_size, tile_size))
    tile = PILImage.new("RGBA", (tile_size, tile_size), color=(255, 255, 255, 255))
    offset_x = (tile_size - image.width) // 2
    offset_y = (tile_size - image.height) // 2
    tile.paste(image, (offset_x, offset_y), image)

    draw = ImageDraw.Draw(tile)
    font = ImageFont.load_default()
    badge_size = 56
    badge_margin = 14
    draw.rounded_rectangle(
        (
            badge_margin,
            badge_margin,
            badge_margin + badge_size,
            badge_margin + badge_size,
        ),
        radius=8,
        fill=(0, 0, 0, 180),
    )
    label = str(index)
    text_bbox = draw.textbbox((0, 0), label, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    draw.text(
        (
            badge_margin + (badge_size - text_width) / 2,
            badge_margin + (badge_size - text_height) / 2 - 1,
        ),
        label,
        fill=(255, 255, 255, 255),
        font=font,
    )
    return tile


def _merge_emoji_tiles(image_bytes_list: list[bytes]) -> bytes:
    """将候选表情图拼接成一张尽量接近矩形的网格图片。"""

    tile_size = _EMOJI_CANDIDATE_TILE_SIZE
    gap = 12
    candidate_count = len(image_bytes_list)
    grid_rows, grid_columns = _calculate_grid_shape(candidate_count)
    tiles = [
        _build_labeled_tile(image_bytes=image_bytes, index=index, tile_size=tile_size)
        for index, image_bytes in enumerate(image_bytes_list, start=1)
    ]
    canvas_width = tile_size * grid_columns + gap * (grid_columns - 1)
    canvas_height = tile_size * grid_rows + gap * (grid_rows - 1)
    canvas = PILImage.new("RGBA", (canvas_width, canvas_height), color=(255, 255, 255, 255))

    for index, tile in enumerate(tiles):
        row = index // grid_columns
        column = index % grid_columns
        offset_x = column * (tile_size + gap)
        offset_y = row * (tile_size + gap)
        canvas.paste(tile, (offset_x, offset_y), tile)

    output = BytesIO()
    canvas.convert("RGB").save(output, format="PNG")
    return output.getvalue()


async def _build_emoji_candidate_message(emojis: list[MaiEmoji]) -> SessionBackedMessage:
    """构建供子代理挑选的拼图候选消息。"""

    image_bytes_list = await asyncio.gather(*[_load_emoji_bytes(emoji) for emoji in emojis])
    merged_image_bytes = await asyncio.to_thread(_merge_emoji_tiles, list(image_bytes_list))
    raw_message = MessageSequence(
        [
            TextComponent("请从这张 5x5 拼图中选择一个序号。"),
            ImageComponent(binary_hash="", binary_data=merged_image_bytes),
        ]
    )
    return SessionBackedMessage(
        raw_message=raw_message,
        visible_text="[表情包拼图候选]",
        timestamp=datetime.now(),
        source_kind="emoji_candidate",
    )


def _build_send_emoji_monitor_detail(
    *,
    request_messages: Optional[list[dict[str, Any]]] = None,
    reasoning_text: str = "",
    output_text: str = "",
    metrics: Optional[Dict[str, Any]] = None,
    extra_sections: Optional[list[dict[str, str]]] = None,
) -> Dict[str, Any]:
    """构建 send_emoji 工具统一监控详情。"""

    detail: Dict[str, Any] = {}
    if isinstance(request_messages, list) and request_messages:
        detail["request_messages"] = request_messages
        detail["prompt_text"] = dumps(request_messages, ensure_ascii=False, indent=2)
    if reasoning_text.strip():
        detail["reasoning_text"] = reasoning_text.strip()
    if output_text.strip():
        detail["output_text"] = output_text.strip()
    if isinstance(metrics, dict) and metrics:
        detail["metrics"] = dict(metrics)
    normalized_sections = [
        {
            "title": str(section.get("title") or "").strip(),
            "content": str(section.get("content") or "").strip(),
        }
        for section in extra_sections or []
        if isinstance(section, dict)
        and str(section.get("title") or "").strip()
        and str(section.get("content") or "").strip()
    ]
    if normalized_sections:
        detail["extra_sections"] = normalized_sections
    return detail


def _build_send_emoji_monitor_metadata(
    selection_metadata: Dict[str, Any],
    *,
    send_result: Optional[Any] = None,
    error_message: str = "",
) -> Dict[str, Any]:
    """根据表情选择与发送结果构建统一监控 metadata。"""

    raw_detail = selection_metadata.get("monitor_detail")
    detail = dict(raw_detail) if isinstance(raw_detail, dict) else {}
    extra_sections = list(detail.get("extra_sections", [])) if isinstance(detail.get("extra_sections"), list) else []

    if send_result is not None:
        result_lines = [
            f"命中情绪：{send_result.matched_emotion or '未命中'}",
            f"表情描述：{send_result.description or '无描述'}",
            f"情绪标签：{'、'.join(send_result.emotions) if send_result.emotions else '无'}",
            f"发送结果：{send_result.message or ('成功' if send_result.success else '失败')}",
        ]
        extra_sections.append({
            "title": "表情发送结果",
            "content": "\n".join(result_lines),
        })
    elif error_message.strip():
        extra_sections.append({
            "title": "表情发送结果",
            "content": f"发送结果：{error_message.strip()}",
        })

    if extra_sections:
        detail["extra_sections"] = extra_sections

    if detail:
        return {"monitor_detail": detail}
    return {}


def _resolve_emoji_selector_model_task_name() -> str:
    """根据 planner 模型视觉能力选择表情选择子代理的模型任务。"""

    model_config = config_manager.get_model_config()
    planner_models = [
        model_name
        for model_name in model_config.model_task_config.planner.model_list
        if str(model_name).strip()
    ]
    models_by_name = {model.name: model for model in model_config.models}
    if planner_models and all(
        model_name in models_by_name and models_by_name[model_name].visual
        for model_name in planner_models
    ):
        return "planner"
    return "vlm"


def _is_missing_visual_model_error(exc: Exception) -> bool:
    """判断是否为未配置视觉模型导致的选择失败。"""

    error_text = str(exc)
    return _EMOJI_VLM_NOT_CONFIGURED_MESSAGE in error_text or "未找到名为 '' 的模型" in error_text


async def _select_emoji_with_sub_agent(
    tool_ctx: BuiltinToolRuntimeContext,
    reasoning: str,
    context_texts: list[str],
    sample_size: int,
    selection_metadata: Optional[Dict[str, Any]] = None,
) -> tuple[MaiEmoji | None, str]:
    """通过临时子代理从候选表情包中选出一个结果。"""

    del reasoning, context_texts, sample_size

    available_emojis = list(emoji_manager.emojis)
    if not available_emojis:
        return None, ""

    total_candidate_count = min(len(available_emojis), _get_emoji_candidate_count())
    sampled_emojis = sample(available_emojis, total_candidate_count)
    candidate_message = await _build_emoji_candidate_message(sampled_emojis)
    grid_rows, grid_columns = _calculate_grid_shape(len(sampled_emojis))

    system_prompt = (
        "你是 Maisaka 的临时表情包选择子代理。\n"
        f"你会收到群聊上下文，以及 1 条额外候选消息，其中包含一张 {grid_rows}x{grid_columns} 的表情包拼图，"
        f"一共 {len(sampled_emojis)} 个位置。\n"
        f"每张小图左上角都有一个较大的序号，范围是 1 到 {len(sampled_emojis)}。\n"
        f"你的任务是根据上下文和当前语气，从这 {len(sampled_emojis)} 张图里选出最合适的一张表情包。\n"
        "你必须返回一个 JSON 对象（json object），不要输出任何 JSON 之外的内容。\n"
        '返回格式固定为：{"emoji_index":1,"reason":"简短理由"}'
    )
    prompt_message = ReferenceMessage(
        content=(
            f"[选择任务]\n"
            f"候选总数: {len(sampled_emojis)}\n"
            f"拼图布局: {grid_rows}x{grid_columns}\n"
            "请只输出 JSON。"
        ),
        timestamp=datetime.now(),
        reference_type=ReferenceMessageType.TOOL_HINT,
        remaining_uses_value=1,
        display_prefix="[表情包选择任务]",
    )
    request_messages = [
        MessageBuilder().set_role(RoleType.System).add_text_content(system_prompt).build(),
    ]
    prompt_llm_message = prompt_message.to_llm_message()
    if prompt_llm_message is not None:
        request_messages.append(prompt_llm_message)
    candidate_to_llm_message = getattr(candidate_message, "to_llm_message", None)
    candidate_llm_message = candidate_to_llm_message() if callable(candidate_to_llm_message) else None
    if candidate_llm_message is not None:
        request_messages.append(candidate_llm_message)
    serialized_request_messages = serialize_prompt_messages(request_messages)

    model_task_name = _resolve_emoji_selector_model_task_name()
    if model_task_name == "vlm" and not _is_vlm_task_configured():
        raise RuntimeError(_EMOJI_VLM_NOT_CONFIGURED_MESSAGE)

    selection_started_at = datetime.now()
    response = await tool_ctx.runtime.run_sub_agent(
        context_message_limit=_EMOJI_SUB_AGENT_CONTEXT_LIMIT,
        system_prompt=system_prompt,
        extra_messages=[prompt_message, candidate_message],
        max_tokens=_EMOJI_SUB_AGENT_MAX_TOKENS,
        model_task_name=model_task_name,
    )
    selection_duration_ms = round((datetime.now() - selection_started_at).total_seconds() * 1000, 2)

    selection_metrics: Dict[str, Any] = {
        "prompt_tokens": response.prompt_tokens,
        "completion_tokens": response.completion_tokens,
        "total_tokens": response.total_tokens,
        "overall_ms": selection_duration_ms,
    }

    try:
        selection = EmojiSelectionResult.model_validate_json(response.content or "")
    except Exception as exc:
        logger.warning(f"{tool_ctx.runtime.log_prefix} 表情包子代理结果解析失败，将回退到候选首项: {exc}")
        if selection_metadata is not None:
            selection_metadata["monitor_detail"] = _build_send_emoji_monitor_detail(
                request_messages=serialized_request_messages,
                output_text=response.content or "",
                metrics=selection_metrics,
                extra_sections=[{
                    "title": "解析异常",
                    "content": str(exc),
                }],
            )
        fallback_emoji = sampled_emojis[0] if sampled_emojis else None
        return fallback_emoji, ""

    if selection_metadata is not None:
        selection_metadata["reason"] = selection.reason.strip()
        selection_metadata["monitor_detail"] = _build_send_emoji_monitor_detail(
            request_messages=serialized_request_messages,
            reasoning_text=selection.reason,
            output_text=response.content or "",
            metrics=selection_metrics,
        )

    emoji_index = int(selection.emoji_index)
    if emoji_index < 1 or emoji_index > len(sampled_emojis):
        logger.warning(
            f"{tool_ctx.runtime.log_prefix} 表情包子代理返回了无效序号: {emoji_index!r}，将回退到第 1 张"
        )
        emoji_index = 1

    return sampled_emojis[emoji_index - 1], ""


async def handle_tool(
    tool_ctx: BuiltinToolRuntimeContext,
    invocation: ToolInvocation,
    context: Optional[ToolExecutionContext] = None,
) -> ToolExecutionResult:
    """执行 send_emoji 内置工具。"""

    del context
    context_texts = [
        message.processed_plain_text.strip()
        for message in tool_ctx.runtime._chat_history[-5:]
        if isinstance(message, LLMContextMessage) and message.processed_plain_text.strip()
    ]
    structured_result: Dict[str, Any] = {
        "success": False,
        "message": "",
        "description": "",
        "emotion": [],
        "matched_emotion": "",
        "reason": "",
    }
    selection_metadata: Dict[str, Any] = {"reason": "", "monitor_detail": {}}
    requested_emotion = ""
    if isinstance(invocation.arguments, dict):
        requested_emotion = str(invocation.arguments.get("emotion") or "").strip()

    logger.info(f"{tool_ctx.runtime.log_prefix} 触发表情包发送工具")

    try:
        send_result = await send_emoji_for_maisaka(
            stream_id=tool_ctx.runtime.session_id,
            requested_emotion=requested_emotion,
            reasoning=tool_ctx.engine.last_reasoning_content,
            context_texts=context_texts,
            emoji_selector=lambda _requested_emotion, reasoning, context_texts, sample_size: _select_emoji_with_sub_agent(
                tool_ctx,
                reasoning,
                list(context_texts or []),
                sample_size,
                selection_metadata,
            ),
        )
    except Exception as exc:
        logger.exception(f"{tool_ctx.runtime.log_prefix} 发送表情包时发生异常: {exc}")
        if _is_missing_visual_model_error(exc):
            structured_result["message"] = _EMOJI_VLM_NOT_CONFIGURED_MESSAGE
        else:
            structured_result["message"] = f"发送表情包时发生异常：{exc}"
        return tool_ctx.build_failure_result(
            invocation.tool_name,
            structured_result["message"],
            structured_content=structured_result,
            metadata=_build_send_emoji_monitor_metadata(
                selection_metadata,
                error_message=structured_result["message"],
            ),
        )

    if send_result.success:
        structured_result["message"] = _EMOJI_SUCCESS_MESSAGE
        structured_result["reason"] = selection_metadata["reason"]
        logger.info(
            f"{tool_ctx.runtime.log_prefix} 表情包发送成功 "
            f"描述={send_result.description!r} 情绪标签={send_result.emotions} "
            f"命中情绪={send_result.matched_emotion!r}"
        )
        if send_result.sent_message is None:
            tool_ctx.append_sent_emoji_to_chat_history(
                emoji_base64=send_result.emoji_base64,
                success_message=_EMOJI_SUCCESS_MESSAGE,
            )
        structured_result["success"] = True
        return tool_ctx.build_success_result(
            invocation.tool_name,
            selection_metadata["reason"] or _EMOJI_SUCCESS_MESSAGE,
            structured_content=structured_result,
            metadata=_build_send_emoji_monitor_metadata(
                selection_metadata,
                send_result=send_result,
            ),
        )

    structured_result["description"] = send_result.description
    structured_result["emotion"] = list(send_result.emotions)
    structured_result["matched_emotion"] = send_result.matched_emotion
    structured_result["message"] = send_result.message

    logger.warning(
        f"{tool_ctx.runtime.log_prefix} 表情包发送失败 "
        f"错误信息={send_result.message}"
    )
    return tool_ctx.build_failure_result(
        invocation.tool_name,
        structured_result["message"],
        structured_content=structured_result,
        metadata=_build_send_emoji_monitor_metadata(
            selection_metadata,
            send_result=send_result,
        ),
    )