移除chat_message_builder
This commit is contained in:
committed by
DrSmoothl
parent
d01abd893d
commit
f17b85c1bd
@@ -88,7 +88,14 @@ class ExpressionLearner:
|
||||
def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None):
|
||||
if not jargon_miner:
|
||||
return []
|
||||
# TODO: 完成检测逻辑
|
||||
# 获取缓存的所有jargon实例
|
||||
cached_jargons = jargon_miner.get_cached_jargons()
|
||||
if not cached_jargons:
|
||||
return []
|
||||
matched_entries: List[Tuple[str, str]] = []
|
||||
|
||||
for i, msg in enumerate(self._messages_cache):
|
||||
if
|
||||
|
||||
# ====== DB 操作相关 ======
|
||||
async def _upsert_expression_to_db(self, situation: str, style: str):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
65
src/common/data_models/action_record_data_model.py
Normal file
65
src/common/data_models/action_record_data_model.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict
|
||||
|
||||
import json
|
||||
|
||||
from src.common.database.database_model import ActionRecord
|
||||
|
||||
from . import BaseDatabaseDataModel
|
||||
|
||||
|
||||
class MaiActionRecord(BaseDatabaseDataModel[ActionRecord]):
|
||||
def __init__(
|
||||
self,
|
||||
action_id: str,
|
||||
timestamp: datetime,
|
||||
session_id: str,
|
||||
action_name: str,
|
||||
action_reasoning: Optional[str] = None,
|
||||
action_data: Optional[Dict] = None,
|
||||
action_builtin_prompt: Optional[str] = None,
|
||||
action_display_prompt: Optional[str] = None,
|
||||
):
|
||||
self.action_id = action_id
|
||||
"""动作ID"""
|
||||
self.timestamp = timestamp
|
||||
"""时间戳"""
|
||||
self.session_id = session_id
|
||||
"""会话ID"""
|
||||
self.action_name = action_name
|
||||
"""动作名称"""
|
||||
self.action_reasoning = action_reasoning
|
||||
"""动作推理过程"""
|
||||
self.action_data = action_data or {}
|
||||
"""动作数据"""
|
||||
self.action_builtin_prompt = action_builtin_prompt
|
||||
"""内置动作提示"""
|
||||
self.action_display_prompt = action_display_prompt
|
||||
"""最终输入到 Prompt 的内容"""
|
||||
|
||||
@classmethod
|
||||
def from_db_instance(cls, db_record: ActionRecord):
|
||||
"""Create a data model object from a database record."""
|
||||
return cls(
|
||||
action_id=db_record.action_id,
|
||||
timestamp=db_record.timestamp,
|
||||
session_id=db_record.session_id,
|
||||
action_name=db_record.action_name,
|
||||
action_reasoning=db_record.action_reasoning,
|
||||
action_data=json.loads(db_record.action_data) if db_record.action_data else None,
|
||||
action_builtin_prompt=db_record.action_builtin_prompt,
|
||||
action_display_prompt=db_record.action_display_prompt,
|
||||
)
|
||||
|
||||
def to_db_instance(self):
|
||||
"""Convert the data model object back to a database instance."""
|
||||
return ActionRecord(
|
||||
action_id=self.action_id,
|
||||
timestamp=self.timestamp,
|
||||
session_id=self.session_id,
|
||||
action_name=self.action_name,
|
||||
action_reasoning=self.action_reasoning,
|
||||
action_data=json.dumps(self.action_data) if self.action_data else None,
|
||||
action_builtin_prompt=self.action_builtin_prompt,
|
||||
action_display_prompt=self.action_display_prompt,
|
||||
)
|
||||
8
src/common/utils/system_utils.py
Normal file
8
src/common/utils/system_utils.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# TODO: 这个函数的实现非常临时,后续需要替换为更完善的实现,比如直接从配置文件中读取机器人自己的ID,或者通过API获取机器人自己的信息等
|
||||
def is_bot_self(user_id: str, platform: str) -> bool:
|
||||
"""
|
||||
判断用户ID是否是机器人自己
|
||||
|
||||
临时方法,后续会替换为更完善的实现
|
||||
"""
|
||||
return user_id == "bot_self" and platform == "test_platform"
|
||||
32
src/common/utils/utils_action.py
Normal file
32
src/common/utils/utils_action.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from src.common.utils.math_utils import translate_timestamp_to_human_readable, TimestampMode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.common.data_models.action_record_data_model import MaiActionRecord
|
||||
|
||||
|
||||
class ActionUtils:
|
||||
@staticmethod
|
||||
def build_readable_action_records(action_records: List["MaiActionRecord"], timestamp_mode: str | TimestampMode):
|
||||
"""
|
||||
将动作列表转换为可读的文本格式。
|
||||
|
||||
格式: `在`time`,你使用了`action_name`,具体内容是:`action_prompt_display`
|
||||
|
||||
Args:
|
||||
action_records: 动作记录字典列表。
|
||||
timestamp_mode: 时间戳模式。
|
||||
|
||||
Returns:
|
||||
格式化的动作字符串。
|
||||
"""
|
||||
if not action_records:
|
||||
return ""
|
||||
|
||||
output_lines = []
|
||||
for record in action_records:
|
||||
timestamp_str = translate_timestamp_to_human_readable(record.timestamp.timestamp(), mode=timestamp_mode)
|
||||
line = f"在{timestamp_str},你使用了{record.action_name},具体内容是:{record.action_display_prompt}"
|
||||
output_lines.append(line)
|
||||
return "\n".join(output_lines)
|
||||
@@ -8,7 +8,7 @@ import numpy as np
|
||||
|
||||
from src.common.logger import get_logger
|
||||
|
||||
logger = get_logger("image")
|
||||
logger = get_logger("image_utils")
|
||||
|
||||
|
||||
class ImageUtils:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from maim_message import MessageBase, Seg
|
||||
from typing import List, Tuple, Optional, Dict, TYPE_CHECKING, Callable
|
||||
from datetime import datetime
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
@@ -7,6 +8,8 @@ import msgpack
|
||||
import random
|
||||
import re
|
||||
|
||||
from sqlmodel import select, col
|
||||
|
||||
from src.common.data_models.message_component_data_model import (
|
||||
MessageSequence,
|
||||
StandardMessageComponents,
|
||||
@@ -20,13 +23,17 @@ from src.common.data_models.message_component_data_model import (
|
||||
UnknownUser,
|
||||
ForwardNodeComponent,
|
||||
)
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import global_config
|
||||
|
||||
from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable
|
||||
from .system_utils import is_bot_self
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.chat.message_receive.message import SessionMessage
|
||||
|
||||
logger = get_logger("message_utils")
|
||||
|
||||
|
||||
class MessageUtils:
|
||||
@staticmethod
|
||||
@@ -156,6 +163,7 @@ class MessageUtils:
|
||||
read_mark_time: Optional[float] = None,
|
||||
truncate_message: bool = False,
|
||||
truncate_func: Optional[Callable[[float], Tuple[Optional[int], str]]] = None,
|
||||
show_actions: bool = False,
|
||||
) -> Tuple[str, Dict[str, Tuple[str, str]], List[str]]:
|
||||
"""
|
||||
将消息构建为LLM可读的文本格式
|
||||
@@ -171,6 +179,7 @@ class MessageUtils:
|
||||
show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀
|
||||
truncate_message (bool): 是否启用消息文本截断功能,截断过长的消息文本
|
||||
truncate_func (Optional[Callable[[float], Tuple[Optional[int], str]]]) 截断函数,接受消息的百分位位置(0-1),返回一个元组(文本长度限制(可为None表不切割), 替换内容)
|
||||
show_actions (bool): 是否显示Action组件内容
|
||||
Returns:
|
||||
return (Tuple[str, Dict[str, Tuple[str, str]], List[str]]): 构建后的消息文本,映射表 {用户ID: (匿名ID, 原始名称)},消息编号列表
|
||||
"""
|
||||
@@ -217,11 +226,30 @@ class MessageUtils:
|
||||
processed_plain_texts.extend(f"[表情{emoji_id}: {desc}]" for emoji_id, desc in emoji_map.values())
|
||||
processed_plain_texts.extend(("", "聊天记录信息:"))
|
||||
|
||||
# 获取动作记录文本列表
|
||||
action_messages: List[Tuple[float, str]] = []
|
||||
if show_actions and messages:
|
||||
min_time = msg_list[0].timestamp.timestamp()
|
||||
max_time = msg_list[-1].timestamp.timestamp()
|
||||
session_id = msg_list[0].session_id
|
||||
action_messages = MessageUtils._generate_action_readable(min_time, max_time, session_id)
|
||||
|
||||
msg_count = len(msg_list)
|
||||
read_mark_added_flag: bool = False # 标记是否已经添加过已读标签,确保只添加一次
|
||||
action_idx: int = 0 # 动作记录的索引,用于双指针遍历
|
||||
|
||||
for i, msg in enumerate(msg_list):
|
||||
await msg.process()
|
||||
plain_text: str = msg.processed_plain_text # type: ignore
|
||||
msg_time = msg.timestamp.timestamp()
|
||||
|
||||
# 使用双指针插入动作记录
|
||||
while action_idx < len(action_messages) and action_messages[action_idx][0] <= msg_time:
|
||||
processed_plain_texts.append(
|
||||
MessageUtils._build_action_str_single(action_messages[action_idx], timestamp_mode)
|
||||
)
|
||||
action_idx += 1
|
||||
|
||||
if truncate_message: # 消息截断逻辑
|
||||
percentile = i / msg_count
|
||||
if not read_mark_time: # 没有已读标签
|
||||
@@ -250,6 +278,13 @@ class MessageUtils:
|
||||
message_ids.append(message_id)
|
||||
processed_plain_texts.append("".join([header, plain_text]))
|
||||
|
||||
# 处理剩余的动作记录(时间在最后一条消息之后的动作)
|
||||
while action_idx < len(action_messages):
|
||||
processed_plain_texts.append(
|
||||
MessageUtils._build_action_str_single(action_messages[action_idx], timestamp_mode)
|
||||
)
|
||||
action_idx += 1
|
||||
|
||||
return "\n".join(processed_plain_texts), user_id_mapping, message_ids
|
||||
|
||||
@staticmethod
|
||||
@@ -531,12 +566,64 @@ class MessageUtils:
|
||||
]
|
||||
return component
|
||||
|
||||
@staticmethod
|
||||
def _generate_action_readable(min_time: float, max_time: float, session_id: str) -> List[Tuple[float, str]]:
|
||||
"""
|
||||
获取消息时间范围内的动作记录,并构建动作文本列表
|
||||
|
||||
# TODO: 这个函数的实现非常临时,后续需要替换为更完善的实现,比如直接从配置文件中读取机器人自己的ID,或者通过API获取机器人自己的信息等
|
||||
def is_bot_self(user_id: str, platform: str) -> bool:
|
||||
"""
|
||||
判断用户ID是否是机器人自己
|
||||
Args:
|
||||
messages: 消息列表,用于确定时间范围和session_id
|
||||
timestamp_mode: 时间戳显示模式,默认为None表示不显示时间戳
|
||||
|
||||
临时方法,后续会替换为更完善的实现
|
||||
"""
|
||||
return user_id == "bot_self" and platform == "test_platform"
|
||||
Returns:
|
||||
List[Tuple[float, str]]: 按时间排序的动作文本列表,每个元素为 (timestamp, action_text)
|
||||
"""
|
||||
from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import ActionRecord
|
||||
|
||||
# 获取这个时间范围内的动作记录,并匹配session_id
|
||||
try:
|
||||
with get_db_session() as session:
|
||||
actions_in_range = session.exec(
|
||||
select(ActionRecord)
|
||||
.where(col(ActionRecord.timestamp) >= datetime.fromtimestamp(min_time))
|
||||
.where(col(ActionRecord.timestamp) <= datetime.fromtimestamp(max_time))
|
||||
.where(col(ActionRecord.session_id) == session_id)
|
||||
.order_by(col(ActionRecord.timestamp))
|
||||
).all()
|
||||
|
||||
# 获取最新消息之后的第一个动作记录
|
||||
with get_db_session() as session:
|
||||
action_after_latest = session.exec(
|
||||
select(ActionRecord)
|
||||
.where(col(ActionRecord.timestamp) > datetime.fromtimestamp(max_time))
|
||||
.where(col(ActionRecord.session_id) == session_id)
|
||||
.order_by(col(ActionRecord.timestamp))
|
||||
.limit(1)
|
||||
).all()
|
||||
except Exception as e:
|
||||
logger.error(f"查询动作记录失败: {e}")
|
||||
return []
|
||||
|
||||
# 合并两部分动作记录
|
||||
actions = list(actions_in_range) + list(action_after_latest)
|
||||
|
||||
# 构建动作文本列表
|
||||
action_messages: List[Tuple[float, str]] = []
|
||||
for action in actions:
|
||||
if action_display_prompt := action.action_display_prompt or "":
|
||||
action_time = action.timestamp.timestamp()
|
||||
action_messages.append((action_time, action_display_prompt))
|
||||
|
||||
return action_messages
|
||||
|
||||
@staticmethod
|
||||
def _build_action_str_single(
|
||||
action_content: Tuple[float, str], timestamp_mode: Optional[str | TimestampMode] = None
|
||||
) -> str:
|
||||
action_time, action_text = action_content
|
||||
action_header = "你执行了: "
|
||||
if timestamp_mode:
|
||||
timestamp_str = translate_timestamp_to_human_readable(action_time, mode=timestamp_mode)
|
||||
action_header = f"[{timestamp_str}] {action_header}"
|
||||
return f"{action_header}{action_text}"
|
||||
|
||||
@@ -10,7 +10,7 @@ from src.llm_models.utils_model import LLMRequest
|
||||
|
||||
install(extra_lines=3)
|
||||
|
||||
logger = get_logger("chat_voice")
|
||||
logger = get_logger("voice_utils")
|
||||
|
||||
# TODO: 在LLMRequest重构后修改这里
|
||||
asr_model = LLMRequest(model_set=model_config.model_task_config.voice, request_type="audio")
|
||||
|
||||
Reference in New Issue
Block a user