From fe6ccaaf865c3d7567f18d5bde9534a143978565 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Wed, 1 Apr 2026 19:56:08 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E9=83=A8=E5=88=86=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E4=B8=8D=E6=94=AF=E6=8C=81gif?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/openai_client.py | 101 +++++++++++++++++-- src/maisaka/builtin_tools.py | 38 +++---- 2 files changed, 114 insertions(+), 25 deletions(-) diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index d7730df9..5010f1d0 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -1,12 +1,12 @@ -from collections.abc import Iterable -from dataclasses import dataclass, field -from typing import Any, Callable, Coroutine, Dict, List, Tuple, cast - import asyncio import base64 +import binascii import io import json import re +from collections.abc import Iterable +from dataclasses import dataclass, field +from typing import Any, Callable, Coroutine, Dict, List, Tuple, cast from json_repair import repair_json from openai import APIConnectionError, APIStatusError, AsyncOpenAI, AsyncStream @@ -27,6 +27,7 @@ from openai.types.chat import ( ) from openai.types.shared_params.function_definition import FunctionDefinition from openai.types.chat.chat_completion_chunk import ChoiceDelta +from PIL import Image as PILImage from src.common.logger import get_logger from src.config.model_configs import APIProvider, ReasoningParseMode, ToolArgumentParseMode @@ -62,6 +63,9 @@ from .base_client import ( logger = get_logger("llm_models") +SUPPORTED_OPENAI_IMAGE_FORMATS = {"jpeg", "png", "webp"} +"""OpenAI 兼容图片输入稳定支持的格式集合。""" + THINK_CONTENT_PATTERN = re.compile( r"(?P.*?)(?P.*)|(?P.*)|(?P.+)", re.DOTALL, @@ -149,14 +153,85 @@ def _build_image_content_part(part: ImageMessagePart) -> ChatCompletionContentPa Returns: ChatCompletionContentPartImageParam: OpenAI 兼容的图片片段。 """ + normalized_image = _normalize_image_part_for_openai(part) + if normalized_image is None: + raise ValueError("图片数据无效,无法构建图片消息片段") + + image_format, image_base64 = normalized_image return { "type": "image_url", "image_url": { - "url": f"data:image/{part.normalized_image_format};base64,{part.image_base64}", + "url": f"data:image/{image_format};base64,{image_base64}", }, } +def _normalize_image_part_for_openai(part: ImageMessagePart) -> Tuple[str, str] | None: + """将图片片段规范化为 OpenAI 兼容格式。 + + Args: + part: 内部图片片段。 + + Returns: + Tuple[str, str] | None: `(image_format, image_base64)`;无法解析时返回 `None`。 + """ + try: + image_bytes = base64.b64decode(part.image_base64, validate=True) + except (binascii.Error, ValueError) as exc: + logger.warning(f"图片 Base64 解码失败,已跳过该图片片段: {exc}") + return None + + try: + with PILImage.open(io.BytesIO(image_bytes)) as image: + image_format = (image.format or part.normalized_image_format).lower() + if image_format in {"jpg", "jpeg"}: + image_format = "jpeg" + + if image_format in SUPPORTED_OPENAI_IMAGE_FORMATS: + return image_format, part.image_base64 + + if image_format == "gif": + frame_count = getattr(image, "n_frames", 1) + frames: List[PILImage.Image] = [] + durations: List[int] = [] + + for frame_index in range(frame_count): + image.seek(frame_index) + frame = image.copy() + if frame.mode not in {"RGB", "RGBA"}: + frame = frame.convert("RGBA") + frames.append(frame) + durations.append(int(image.info.get("duration", 100) or 100)) + + output_buffer = io.BytesIO() + save_kwargs: Dict[str, Any] = { + "format": "WEBP", + "save_all": True, + "append_images": frames[1:], + "duration": durations, + "loop": int(image.info.get("loop", 0) or 0), + } + if frame_count > 1: + save_kwargs["lossless"] = True + + frames[0].save(output_buffer, **save_kwargs) + converted_base64 = base64.b64encode(output_buffer.getvalue()).decode("utf-8") + return "webp", converted_base64 + + image.seek(0) + normalized_image = image.copy() + if normalized_image.mode not in {"RGB", "RGBA"}: + normalized_image = normalized_image.convert("RGBA") + + output_buffer = io.BytesIO() + normalized_image.save(output_buffer, format="PNG") + converted_base64 = base64.b64encode(output_buffer.getvalue()).decode("utf-8") + return "png", converted_base64 + except Exception as exc: + logger.warning(f"图片内容无法被识别为有效图片,已跳过该图片片段: {exc}") + return None + + def _convert_response_format(response_format: RespFormat | None) -> Any: """将内部响应格式转换为 OpenAI 兼容结构。 @@ -222,7 +297,21 @@ def _convert_user_message_content(message: Message) -> str | List[ChatCompletion if isinstance(part, TextMessagePart): content.append(_build_text_content_part(part.text)) continue - content.append(_build_image_content_part(part)) + + normalized_image = _normalize_image_part_for_openai(part) + if normalized_image is None: + content.append(_build_text_content_part("[图片内容不可用]")) + continue + + image_format, image_base64 = normalized_image + content.append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/{image_format};base64,{image_base64}", + }, + } + ) return content diff --git a/src/maisaka/builtin_tools.py b/src/maisaka/builtin_tools.py index 0222b173..14aed475 100644 --- a/src/maisaka/builtin_tools.py +++ b/src/maisaka/builtin_tools.py @@ -100,25 +100,25 @@ def create_builtin_tool_specs() -> List[ToolSpec]: "required": ["words"], }, ), - _build_tool_spec( - name="query_person_info", - brief_description="查询某个人的档案和相关记忆信息。", - parameters_schema={ - "type": "object", - "properties": { - "person_name": { - "type": "string", - "description": "人物名称、昵称或用户 ID。", - }, - "limit": { - "type": "integer", - "description": "最多返回多少条匹配记录。", - "default": 3, - }, - }, - "required": ["person_name"], - }, - ), + # _build_tool_spec( + # name="query_person_info", + # brief_description="查询某个人的档案和相关记忆信息。", + # parameters_schema={ + # "type": "object", + # "properties": { + # "person_name": { + # "type": "string", + # "description": "人物名称、昵称或用户 ID。", + # }, + # "limit": { + # "type": "integer", + # "description": "最多返回多少条匹配记录。", + # "default": 3, + # }, + # }, + # "required": ["person_name"], + # }, + # ), _build_tool_spec( name="no_reply", brief_description="本轮不进行回复,等待其他用户的新消息。",