移除chat_message_builder

This commit is contained in:
UnCLAS-Prommer
2026-03-11 22:28:13 +08:00
committed by DrSmoothl
parent d01abd893d
commit f17b85c1bd
8 changed files with 209 additions and 1093 deletions

View File

@@ -88,7 +88,14 @@ class ExpressionLearner:
def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None):
if not jargon_miner:
return []
# TODO: 完成检测逻辑
# 获取缓存的所有jargon实例
cached_jargons = jargon_miner.get_cached_jargons()
if not cached_jargons:
return []
matched_entries: List[Tuple[str, str]] = []
for i, msg in enumerate(self._messages_cache):
if
# ====== DB 操作相关 ======
async def _upsert_expression_to_db(self, situation: str, style: str):

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,65 @@
from datetime import datetime
from typing import Optional, Dict
import json
from src.common.database.database_model import ActionRecord
from . import BaseDatabaseDataModel
class MaiActionRecord(BaseDatabaseDataModel[ActionRecord]):
def __init__(
self,
action_id: str,
timestamp: datetime,
session_id: str,
action_name: str,
action_reasoning: Optional[str] = None,
action_data: Optional[Dict] = None,
action_builtin_prompt: Optional[str] = None,
action_display_prompt: Optional[str] = None,
):
self.action_id = action_id
"""动作ID"""
self.timestamp = timestamp
"""时间戳"""
self.session_id = session_id
"""会话ID"""
self.action_name = action_name
"""动作名称"""
self.action_reasoning = action_reasoning
"""动作推理过程"""
self.action_data = action_data or {}
"""动作数据"""
self.action_builtin_prompt = action_builtin_prompt
"""内置动作提示"""
self.action_display_prompt = action_display_prompt
"""最终输入到 Prompt 的内容"""
@classmethod
def from_db_instance(cls, db_record: ActionRecord):
"""Create a data model object from a database record."""
return cls(
action_id=db_record.action_id,
timestamp=db_record.timestamp,
session_id=db_record.session_id,
action_name=db_record.action_name,
action_reasoning=db_record.action_reasoning,
action_data=json.loads(db_record.action_data) if db_record.action_data else None,
action_builtin_prompt=db_record.action_builtin_prompt,
action_display_prompt=db_record.action_display_prompt,
)
def to_db_instance(self):
"""Convert the data model object back to a database instance."""
return ActionRecord(
action_id=self.action_id,
timestamp=self.timestamp,
session_id=self.session_id,
action_name=self.action_name,
action_reasoning=self.action_reasoning,
action_data=json.dumps(self.action_data) if self.action_data else None,
action_builtin_prompt=self.action_builtin_prompt,
action_display_prompt=self.action_display_prompt,
)

View File

@@ -0,0 +1,8 @@
# TODO: 这个函数的实现非常临时后续需要替换为更完善的实现比如直接从配置文件中读取机器人自己的ID或者通过API获取机器人自己的信息等
def is_bot_self(user_id: str, platform: str) -> bool:
"""
判断用户ID是否是机器人自己
临时方法,后续会替换为更完善的实现
"""
return user_id == "bot_self" and platform == "test_platform"

View File

@@ -0,0 +1,32 @@
from typing import TYPE_CHECKING, List
from src.common.utils.math_utils import translate_timestamp_to_human_readable, TimestampMode
if TYPE_CHECKING:
from src.common.data_models.action_record_data_model import MaiActionRecord
class ActionUtils:
@staticmethod
def build_readable_action_records(action_records: List["MaiActionRecord"], timestamp_mode: str | TimestampMode):
"""
将动作列表转换为可读的文本格式。
格式: `在`time`,你使用了`action_name`,具体内容是:`action_prompt_display`
Args:
action_records: 动作记录字典列表。
timestamp_mode: 时间戳模式。
Returns:
格式化的动作字符串。
"""
if not action_records:
return ""
output_lines = []
for record in action_records:
timestamp_str = translate_timestamp_to_human_readable(record.timestamp.timestamp(), mode=timestamp_mode)
line = f"{timestamp_str},你使用了{record.action_name},具体内容是:{record.action_display_prompt}"
output_lines.append(line)
return "\n".join(output_lines)

View File

@@ -8,7 +8,7 @@ import numpy as np
from src.common.logger import get_logger
logger = get_logger("image")
logger = get_logger("image_utils")
class ImageUtils:

View File

@@ -1,5 +1,6 @@
from maim_message import MessageBase, Seg
from typing import List, Tuple, Optional, Dict, TYPE_CHECKING, Callable
from datetime import datetime
import base64
import hashlib
@@ -7,6 +8,8 @@ import msgpack
import random
import re
from sqlmodel import select, col
from src.common.data_models.message_component_data_model import (
MessageSequence,
StandardMessageComponents,
@@ -20,13 +23,17 @@ from src.common.data_models.message_component_data_model import (
UnknownUser,
ForwardNodeComponent,
)
from src.common.logger import get_logger
from src.config.config import global_config
from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable
from .system_utils import is_bot_self
if TYPE_CHECKING:
from src.chat.message_receive.message import SessionMessage
logger = get_logger("message_utils")
class MessageUtils:
@staticmethod
@@ -156,6 +163,7 @@ class MessageUtils:
read_mark_time: Optional[float] = None,
truncate_message: bool = False,
truncate_func: Optional[Callable[[float], Tuple[Optional[int], str]]] = None,
show_actions: bool = False,
) -> Tuple[str, Dict[str, Tuple[str, str]], List[str]]:
"""
将消息构建为LLM可读的文本格式
@@ -171,6 +179,7 @@ class MessageUtils:
show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀
truncate_message (bool): 是否启用消息文本截断功能,截断过长的消息文本
truncate_func (Optional[Callable[[float], Tuple[Optional[int], str]]]) 截断函数,接受消息的百分位位置(0-1),返回一个元组(文本长度限制(可为None表不切割), 替换内容)
show_actions (bool): 是否显示Action组件内容
Returns:
return (Tuple[str, Dict[str, Tuple[str, str]], List[str]]): 构建后的消息文本,映射表 {用户ID: (匿名ID, 原始名称)},消息编号列表
"""
@@ -217,11 +226,30 @@ class MessageUtils:
processed_plain_texts.extend(f"[表情{emoji_id}: {desc}]" for emoji_id, desc in emoji_map.values())
processed_plain_texts.extend(("", "聊天记录信息:"))
# 获取动作记录文本列表
action_messages: List[Tuple[float, str]] = []
if show_actions and messages:
min_time = msg_list[0].timestamp.timestamp()
max_time = msg_list[-1].timestamp.timestamp()
session_id = msg_list[0].session_id
action_messages = MessageUtils._generate_action_readable(min_time, max_time, session_id)
msg_count = len(msg_list)
read_mark_added_flag: bool = False # 标记是否已经添加过已读标签,确保只添加一次
action_idx: int = 0 # 动作记录的索引,用于双指针遍历
for i, msg in enumerate(msg_list):
await msg.process()
plain_text: str = msg.processed_plain_text # type: ignore
msg_time = msg.timestamp.timestamp()
# 使用双指针插入动作记录
while action_idx < len(action_messages) and action_messages[action_idx][0] <= msg_time:
processed_plain_texts.append(
MessageUtils._build_action_str_single(action_messages[action_idx], timestamp_mode)
)
action_idx += 1
if truncate_message: # 消息截断逻辑
percentile = i / msg_count
if not read_mark_time: # 没有已读标签
@@ -250,6 +278,13 @@ class MessageUtils:
message_ids.append(message_id)
processed_plain_texts.append("".join([header, plain_text]))
# 处理剩余的动作记录(时间在最后一条消息之后的动作)
while action_idx < len(action_messages):
processed_plain_texts.append(
MessageUtils._build_action_str_single(action_messages[action_idx], timestamp_mode)
)
action_idx += 1
return "\n".join(processed_plain_texts), user_id_mapping, message_ids
@staticmethod
@@ -531,12 +566,64 @@ class MessageUtils:
]
return component
@staticmethod
def _generate_action_readable(min_time: float, max_time: float, session_id: str) -> List[Tuple[float, str]]:
"""
获取消息时间范围内的动作记录,并构建动作文本列表
# TODO: 这个函数的实现非常临时后续需要替换为更完善的实现比如直接从配置文件中读取机器人自己的ID或者通过API获取机器人自己的信息等
def is_bot_self(user_id: str, platform: str) -> bool:
"""
判断用户ID是否是机器人自己
Args:
messages: 消息列表用于确定时间范围和session_id
timestamp_mode: 时间戳显示模式默认为None表示不显示时间戳
临时方法,后续会替换为更完善的实现
"""
return user_id == "bot_self" and platform == "test_platform"
Returns:
List[Tuple[float, str]]: 按时间排序的动作文本列表,每个元素为 (timestamp, action_text)
"""
from src.common.database.database import get_db_session
from src.common.database.database_model import ActionRecord
# 获取这个时间范围内的动作记录并匹配session_id
try:
with get_db_session() as session:
actions_in_range = session.exec(
select(ActionRecord)
.where(col(ActionRecord.timestamp) >= datetime.fromtimestamp(min_time))
.where(col(ActionRecord.timestamp) <= datetime.fromtimestamp(max_time))
.where(col(ActionRecord.session_id) == session_id)
.order_by(col(ActionRecord.timestamp))
).all()
# 获取最新消息之后的第一个动作记录
with get_db_session() as session:
action_after_latest = session.exec(
select(ActionRecord)
.where(col(ActionRecord.timestamp) > datetime.fromtimestamp(max_time))
.where(col(ActionRecord.session_id) == session_id)
.order_by(col(ActionRecord.timestamp))
.limit(1)
).all()
except Exception as e:
logger.error(f"查询动作记录失败: {e}")
return []
# 合并两部分动作记录
actions = list(actions_in_range) + list(action_after_latest)
# 构建动作文本列表
action_messages: List[Tuple[float, str]] = []
for action in actions:
if action_display_prompt := action.action_display_prompt or "":
action_time = action.timestamp.timestamp()
action_messages.append((action_time, action_display_prompt))
return action_messages
@staticmethod
def _build_action_str_single(
action_content: Tuple[float, str], timestamp_mode: Optional[str | TimestampMode] = None
) -> str:
action_time, action_text = action_content
action_header = "你执行了: "
if timestamp_mode:
timestamp_str = translate_timestamp_to_human_readable(action_time, mode=timestamp_mode)
action_header = f"[{timestamp_str}] {action_header}"
return f"{action_header}{action_text}"

View File

@@ -10,7 +10,7 @@ from src.llm_models.utils_model import LLMRequest
install(extra_lines=3)
logger = get_logger("chat_voice")
logger = get_logger("voice_utils")
# TODO: 在LLMRequest重构后修改这里
asr_model = LLMRequest(model_set=model_config.model_task_config.voice, request_type="audio")