feat:提供原生vlm支持
This commit is contained in:
@@ -2,13 +2,18 @@
|
||||
MaiSaka message adapters built on top of the main project's MaiMessage model.
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
import re
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
import base64
|
||||
import re
|
||||
|
||||
from src.common.data_models.mai_message_data_model import MaiMessage, MessageInfo, UserInfo
|
||||
from src.common.data_models.message_component_data_model import MessageSequence
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from src.common.data_models.mai_message_data_model import GroupInfo, MaiMessage, MessageInfo, UserInfo
|
||||
from src.common.data_models.message_component_data_model import EmojiComponent, ImageComponent, MessageSequence, TextComponent
|
||||
from src.config.config import global_config
|
||||
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
|
||||
from src.llm_models.payload_content.tool_option import ToolCall
|
||||
@@ -22,7 +27,10 @@ SOURCE_KEY = "maisaka_source"
|
||||
LLM_ROLE_KEY = "maisaka_llm_role"
|
||||
TOOL_CALL_ID_KEY = "maisaka_tool_call_id"
|
||||
TOOL_CALLS_KEY = "maisaka_tool_calls"
|
||||
SPEAKER_PREFIX_PATTERN = re.compile(r"^\[(?P<speaker>[^\]]+)\](?P<content>.*)$", re.DOTALL)
|
||||
SPEAKER_PREFIX_PATTERN = re.compile(
|
||||
r"^(?:(?P<timestamp>\d{2}:\d{2}:\d{2}))?(?:<mid:(?P<message_id>[^>]+)>)?\[(?P<speaker>[^\]]+)\](?P<content>.*)$",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def _build_user_info_for_role(role: str) -> UserInfo:
|
||||
@@ -55,7 +63,7 @@ def _deserialize_tool_call(data: dict) -> ToolCall:
|
||||
|
||||
def build_message(
|
||||
role: str,
|
||||
content: str,
|
||||
content: str = "",
|
||||
*,
|
||||
message_kind: str = "normal",
|
||||
source: Optional[str] = None,
|
||||
@@ -63,6 +71,12 @@ def build_message(
|
||||
tool_calls: Optional[list[ToolCall]] = None,
|
||||
timestamp: Optional[datetime] = None,
|
||||
message_id: Optional[str] = None,
|
||||
platform: str = MAISAKA_PLATFORM,
|
||||
session_id: str = MAISAKA_SESSION_ID,
|
||||
user_info: Optional[UserInfo] = None,
|
||||
group_info: Optional[GroupInfo] = None,
|
||||
raw_message: Optional[MessageSequence] = None,
|
||||
display_text: Optional[str] = None,
|
||||
) -> MaiMessage:
|
||||
"""Build a MaiMessage for the Maisaka session history."""
|
||||
resolved_timestamp = timestamp or datetime.now()
|
||||
@@ -70,11 +84,11 @@ def build_message(
|
||||
message = MaiMessage(
|
||||
message_id=message_id or f"maisaka_{uuid4().hex}",
|
||||
timestamp=resolved_timestamp,
|
||||
platform=MAISAKA_PLATFORM,
|
||||
platform=platform,
|
||||
)
|
||||
message.message_info = MessageInfo(
|
||||
user_info=_build_user_info_for_role(resolved_role),
|
||||
group_info=None,
|
||||
user_info=user_info or _build_user_info_for_role(resolved_role),
|
||||
group_info=group_info,
|
||||
additional_config={
|
||||
LLM_ROLE_KEY: resolved_role,
|
||||
MESSAGE_KIND_KEY: message_kind,
|
||||
@@ -83,18 +97,26 @@ def build_message(
|
||||
TOOL_CALLS_KEY: [_serialize_tool_call(tool_call) for tool_call in (tool_calls or [])],
|
||||
},
|
||||
)
|
||||
message.session_id = MAISAKA_SESSION_ID
|
||||
message.raw_message = MessageSequence([])
|
||||
if content:
|
||||
message.session_id = session_id
|
||||
message.raw_message = raw_message if raw_message is not None else MessageSequence([])
|
||||
if raw_message is None and content:
|
||||
message.raw_message.text(content)
|
||||
message.processed_plain_text = content
|
||||
message.display_message = content
|
||||
visible_text = display_text if display_text is not None else content
|
||||
message.processed_plain_text = visible_text
|
||||
message.display_message = visible_text
|
||||
return message
|
||||
|
||||
|
||||
def format_speaker_content(speaker_name: str, content: str) -> str:
|
||||
def format_speaker_content(
|
||||
speaker_name: str,
|
||||
content: str,
|
||||
timestamp: Optional[datetime] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Format visible conversation content with an explicit speaker label."""
|
||||
return f"[{speaker_name}]{content}"
|
||||
time_prefix = timestamp.strftime("%H:%M:%S") if timestamp is not None else ""
|
||||
message_id_prefix = f"<mid:{message_id}>" if message_id else ""
|
||||
return f"{time_prefix}{message_id_prefix}[{speaker_name}]{content}"
|
||||
|
||||
|
||||
def parse_speaker_content(content: str) -> tuple[Optional[str], str]:
|
||||
@@ -105,6 +127,39 @@ def parse_speaker_content(content: str) -> tuple[Optional[str], str]:
|
||||
return match.group("speaker"), match.group("content")
|
||||
|
||||
|
||||
def clone_message_sequence(message_sequence: MessageSequence) -> MessageSequence:
|
||||
"""Create a detached copy of a message sequence."""
|
||||
return MessageSequence([deepcopy(component) for component in message_sequence.components])
|
||||
|
||||
|
||||
def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str:
|
||||
"""Extract visible text from a message sequence without forcing image descriptions."""
|
||||
parts: list[str] = []
|
||||
for component in message_sequence.components:
|
||||
if isinstance(component, TextComponent):
|
||||
parts.append(SPEAKER_PREFIX_PATTERN.sub(r"\g<timestamp>[\g<speaker>]\g<content>", component.text))
|
||||
continue
|
||||
|
||||
if isinstance(component, EmojiComponent):
|
||||
parts.append("[表情包]")
|
||||
continue
|
||||
|
||||
if isinstance(component, ImageComponent):
|
||||
parts.append("[图片]")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _guess_image_format(image_bytes: bytes) -> Optional[str]:
|
||||
if not image_bytes:
|
||||
return None
|
||||
|
||||
try:
|
||||
with PILImage.open(BytesIO(image_bytes)) as image:
|
||||
return image.format.lower() if image.format else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_message_text(message: MaiMessage) -> str:
|
||||
if message.processed_plain_text is not None:
|
||||
return message.processed_plain_text
|
||||
@@ -156,7 +211,6 @@ def remove_last_perception(messages: list[MaiMessage]) -> None:
|
||||
|
||||
def to_llm_message(message: MaiMessage) -> Optional[Message]:
|
||||
role = get_message_role(message)
|
||||
content = get_message_text(message)
|
||||
tool_call_id = get_tool_call_id(message)
|
||||
tool_calls = get_tool_calls(message)
|
||||
|
||||
@@ -176,6 +230,28 @@ def to_llm_message(message: MaiMessage) -> Optional[Message]:
|
||||
builder.set_tool_calls(tool_calls)
|
||||
if role_type == RoleType.Tool and tool_call_id:
|
||||
builder.add_tool_call(tool_call_id)
|
||||
if content:
|
||||
builder.add_text_content(content)
|
||||
|
||||
has_content = False
|
||||
for component in message.raw_message.components:
|
||||
if isinstance(component, TextComponent):
|
||||
if component.text:
|
||||
builder.add_text_content(component.text)
|
||||
has_content = True
|
||||
continue
|
||||
|
||||
if isinstance(component, (ImageComponent, EmojiComponent)):
|
||||
image_format = _guess_image_format(component.binary_data)
|
||||
if image_format and component.binary_data:
|
||||
builder.add_image_content(image_format, base64.b64encode(component.binary_data).decode("utf-8"))
|
||||
has_content = True
|
||||
continue
|
||||
|
||||
if component.content:
|
||||
builder.add_text_content(component.content)
|
||||
has_content = True
|
||||
|
||||
if not has_content:
|
||||
content = get_message_text(message)
|
||||
if content:
|
||||
builder.add_text_content(content)
|
||||
return builder.build()
|
||||
|
||||
Reference in New Issue
Block a user