移除chat_message_builder
This commit is contained in:
committed by
DrSmoothl
parent
d01abd893d
commit
f17b85c1bd
@@ -88,7 +88,14 @@ class ExpressionLearner:
|
|||||||
def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None):
|
def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None):
|
||||||
if not jargon_miner:
|
if not jargon_miner:
|
||||||
return []
|
return []
|
||||||
# TODO: 完成检测逻辑
|
# 获取缓存的所有jargon实例
|
||||||
|
cached_jargons = jargon_miner.get_cached_jargons()
|
||||||
|
if not cached_jargons:
|
||||||
|
return []
|
||||||
|
matched_entries: List[Tuple[str, str]] = []
|
||||||
|
|
||||||
|
for i, msg in enumerate(self._messages_cache):
|
||||||
|
if
|
||||||
|
|
||||||
# ====== DB 操作相关 ======
|
# ====== DB 操作相关 ======
|
||||||
async def _upsert_expression_to_db(self, situation: str, style: str):
|
async def _upsert_expression_to_db(self, situation: str, style: str):
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
65
src/common/data_models/action_record_data_model.py
Normal file
65
src/common/data_models/action_record_data_model.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional, Dict
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from src.common.database.database_model import ActionRecord
|
||||||
|
|
||||||
|
from . import BaseDatabaseDataModel
|
||||||
|
|
||||||
|
|
||||||
|
class MaiActionRecord(BaseDatabaseDataModel[ActionRecord]):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
action_id: str,
|
||||||
|
timestamp: datetime,
|
||||||
|
session_id: str,
|
||||||
|
action_name: str,
|
||||||
|
action_reasoning: Optional[str] = None,
|
||||||
|
action_data: Optional[Dict] = None,
|
||||||
|
action_builtin_prompt: Optional[str] = None,
|
||||||
|
action_display_prompt: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self.action_id = action_id
|
||||||
|
"""动作ID"""
|
||||||
|
self.timestamp = timestamp
|
||||||
|
"""时间戳"""
|
||||||
|
self.session_id = session_id
|
||||||
|
"""会话ID"""
|
||||||
|
self.action_name = action_name
|
||||||
|
"""动作名称"""
|
||||||
|
self.action_reasoning = action_reasoning
|
||||||
|
"""动作推理过程"""
|
||||||
|
self.action_data = action_data or {}
|
||||||
|
"""动作数据"""
|
||||||
|
self.action_builtin_prompt = action_builtin_prompt
|
||||||
|
"""内置动作提示"""
|
||||||
|
self.action_display_prompt = action_display_prompt
|
||||||
|
"""最终输入到 Prompt 的内容"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_db_instance(cls, db_record: ActionRecord):
|
||||||
|
"""Create a data model object from a database record."""
|
||||||
|
return cls(
|
||||||
|
action_id=db_record.action_id,
|
||||||
|
timestamp=db_record.timestamp,
|
||||||
|
session_id=db_record.session_id,
|
||||||
|
action_name=db_record.action_name,
|
||||||
|
action_reasoning=db_record.action_reasoning,
|
||||||
|
action_data=json.loads(db_record.action_data) if db_record.action_data else None,
|
||||||
|
action_builtin_prompt=db_record.action_builtin_prompt,
|
||||||
|
action_display_prompt=db_record.action_display_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
|
def to_db_instance(self):
|
||||||
|
"""Convert the data model object back to a database instance."""
|
||||||
|
return ActionRecord(
|
||||||
|
action_id=self.action_id,
|
||||||
|
timestamp=self.timestamp,
|
||||||
|
session_id=self.session_id,
|
||||||
|
action_name=self.action_name,
|
||||||
|
action_reasoning=self.action_reasoning,
|
||||||
|
action_data=json.dumps(self.action_data) if self.action_data else None,
|
||||||
|
action_builtin_prompt=self.action_builtin_prompt,
|
||||||
|
action_display_prompt=self.action_display_prompt,
|
||||||
|
)
|
||||||
8
src/common/utils/system_utils.py
Normal file
8
src/common/utils/system_utils.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# TODO: 这个函数的实现非常临时,后续需要替换为更完善的实现,比如直接从配置文件中读取机器人自己的ID,或者通过API获取机器人自己的信息等
|
||||||
|
def is_bot_self(user_id: str, platform: str) -> bool:
|
||||||
|
"""
|
||||||
|
判断用户ID是否是机器人自己
|
||||||
|
|
||||||
|
临时方法,后续会替换为更完善的实现
|
||||||
|
"""
|
||||||
|
return user_id == "bot_self" and platform == "test_platform"
|
||||||
32
src/common/utils/utils_action.py
Normal file
32
src/common/utils/utils_action.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
from typing import TYPE_CHECKING, List
|
||||||
|
|
||||||
|
from src.common.utils.math_utils import translate_timestamp_to_human_readable, TimestampMode
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from src.common.data_models.action_record_data_model import MaiActionRecord
|
||||||
|
|
||||||
|
|
||||||
|
class ActionUtils:
|
||||||
|
@staticmethod
|
||||||
|
def build_readable_action_records(action_records: List["MaiActionRecord"], timestamp_mode: str | TimestampMode):
|
||||||
|
"""
|
||||||
|
将动作列表转换为可读的文本格式。
|
||||||
|
|
||||||
|
格式: `在`time`,你使用了`action_name`,具体内容是:`action_prompt_display`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action_records: 动作记录字典列表。
|
||||||
|
timestamp_mode: 时间戳模式。
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
格式化的动作字符串。
|
||||||
|
"""
|
||||||
|
if not action_records:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
output_lines = []
|
||||||
|
for record in action_records:
|
||||||
|
timestamp_str = translate_timestamp_to_human_readable(record.timestamp.timestamp(), mode=timestamp_mode)
|
||||||
|
line = f"在{timestamp_str},你使用了{record.action_name},具体内容是:{record.action_display_prompt}"
|
||||||
|
output_lines.append(line)
|
||||||
|
return "\n".join(output_lines)
|
||||||
@@ -8,7 +8,7 @@ import numpy as np
|
|||||||
|
|
||||||
from src.common.logger import get_logger
|
from src.common.logger import get_logger
|
||||||
|
|
||||||
logger = get_logger("image")
|
logger = get_logger("image_utils")
|
||||||
|
|
||||||
|
|
||||||
class ImageUtils:
|
class ImageUtils:
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from maim_message import MessageBase, Seg
|
from maim_message import MessageBase, Seg
|
||||||
from typing import List, Tuple, Optional, Dict, TYPE_CHECKING, Callable
|
from typing import List, Tuple, Optional, Dict, TYPE_CHECKING, Callable
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
@@ -7,6 +8,8 @@ import msgpack
|
|||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from sqlmodel import select, col
|
||||||
|
|
||||||
from src.common.data_models.message_component_data_model import (
|
from src.common.data_models.message_component_data_model import (
|
||||||
MessageSequence,
|
MessageSequence,
|
||||||
StandardMessageComponents,
|
StandardMessageComponents,
|
||||||
@@ -20,13 +23,17 @@ from src.common.data_models.message_component_data_model import (
|
|||||||
UnknownUser,
|
UnknownUser,
|
||||||
ForwardNodeComponent,
|
ForwardNodeComponent,
|
||||||
)
|
)
|
||||||
|
from src.common.logger import get_logger
|
||||||
from src.config.config import global_config
|
from src.config.config import global_config
|
||||||
|
|
||||||
from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable
|
from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable
|
||||||
|
from .system_utils import is_bot_self
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from src.chat.message_receive.message import SessionMessage
|
from src.chat.message_receive.message import SessionMessage
|
||||||
|
|
||||||
|
logger = get_logger("message_utils")
|
||||||
|
|
||||||
|
|
||||||
class MessageUtils:
|
class MessageUtils:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -156,6 +163,7 @@ class MessageUtils:
|
|||||||
read_mark_time: Optional[float] = None,
|
read_mark_time: Optional[float] = None,
|
||||||
truncate_message: bool = False,
|
truncate_message: bool = False,
|
||||||
truncate_func: Optional[Callable[[float], Tuple[Optional[int], str]]] = None,
|
truncate_func: Optional[Callable[[float], Tuple[Optional[int], str]]] = None,
|
||||||
|
show_actions: bool = False,
|
||||||
) -> Tuple[str, Dict[str, Tuple[str, str]], List[str]]:
|
) -> Tuple[str, Dict[str, Tuple[str, str]], List[str]]:
|
||||||
"""
|
"""
|
||||||
将消息构建为LLM可读的文本格式
|
将消息构建为LLM可读的文本格式
|
||||||
@@ -171,6 +179,7 @@ class MessageUtils:
|
|||||||
show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀
|
show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀
|
||||||
truncate_message (bool): 是否启用消息文本截断功能,截断过长的消息文本
|
truncate_message (bool): 是否启用消息文本截断功能,截断过长的消息文本
|
||||||
truncate_func (Optional[Callable[[float], Tuple[Optional[int], str]]]) 截断函数,接受消息的百分位位置(0-1),返回一个元组(文本长度限制(可为None表不切割), 替换内容)
|
truncate_func (Optional[Callable[[float], Tuple[Optional[int], str]]]) 截断函数,接受消息的百分位位置(0-1),返回一个元组(文本长度限制(可为None表不切割), 替换内容)
|
||||||
|
show_actions (bool): 是否显示Action组件内容
|
||||||
Returns:
|
Returns:
|
||||||
return (Tuple[str, Dict[str, Tuple[str, str]], List[str]]): 构建后的消息文本,映射表 {用户ID: (匿名ID, 原始名称)},消息编号列表
|
return (Tuple[str, Dict[str, Tuple[str, str]], List[str]]): 构建后的消息文本,映射表 {用户ID: (匿名ID, 原始名称)},消息编号列表
|
||||||
"""
|
"""
|
||||||
@@ -217,11 +226,30 @@ class MessageUtils:
|
|||||||
processed_plain_texts.extend(f"[表情{emoji_id}: {desc}]" for emoji_id, desc in emoji_map.values())
|
processed_plain_texts.extend(f"[表情{emoji_id}: {desc}]" for emoji_id, desc in emoji_map.values())
|
||||||
processed_plain_texts.extend(("", "聊天记录信息:"))
|
processed_plain_texts.extend(("", "聊天记录信息:"))
|
||||||
|
|
||||||
|
# 获取动作记录文本列表
|
||||||
|
action_messages: List[Tuple[float, str]] = []
|
||||||
|
if show_actions and messages:
|
||||||
|
min_time = msg_list[0].timestamp.timestamp()
|
||||||
|
max_time = msg_list[-1].timestamp.timestamp()
|
||||||
|
session_id = msg_list[0].session_id
|
||||||
|
action_messages = MessageUtils._generate_action_readable(min_time, max_time, session_id)
|
||||||
|
|
||||||
msg_count = len(msg_list)
|
msg_count = len(msg_list)
|
||||||
read_mark_added_flag: bool = False # 标记是否已经添加过已读标签,确保只添加一次
|
read_mark_added_flag: bool = False # 标记是否已经添加过已读标签,确保只添加一次
|
||||||
|
action_idx: int = 0 # 动作记录的索引,用于双指针遍历
|
||||||
|
|
||||||
for i, msg in enumerate(msg_list):
|
for i, msg in enumerate(msg_list):
|
||||||
await msg.process()
|
await msg.process()
|
||||||
plain_text: str = msg.processed_plain_text # type: ignore
|
plain_text: str = msg.processed_plain_text # type: ignore
|
||||||
|
msg_time = msg.timestamp.timestamp()
|
||||||
|
|
||||||
|
# 使用双指针插入动作记录
|
||||||
|
while action_idx < len(action_messages) and action_messages[action_idx][0] <= msg_time:
|
||||||
|
processed_plain_texts.append(
|
||||||
|
MessageUtils._build_action_str_single(action_messages[action_idx], timestamp_mode)
|
||||||
|
)
|
||||||
|
action_idx += 1
|
||||||
|
|
||||||
if truncate_message: # 消息截断逻辑
|
if truncate_message: # 消息截断逻辑
|
||||||
percentile = i / msg_count
|
percentile = i / msg_count
|
||||||
if not read_mark_time: # 没有已读标签
|
if not read_mark_time: # 没有已读标签
|
||||||
@@ -250,6 +278,13 @@ class MessageUtils:
|
|||||||
message_ids.append(message_id)
|
message_ids.append(message_id)
|
||||||
processed_plain_texts.append("".join([header, plain_text]))
|
processed_plain_texts.append("".join([header, plain_text]))
|
||||||
|
|
||||||
|
# 处理剩余的动作记录(时间在最后一条消息之后的动作)
|
||||||
|
while action_idx < len(action_messages):
|
||||||
|
processed_plain_texts.append(
|
||||||
|
MessageUtils._build_action_str_single(action_messages[action_idx], timestamp_mode)
|
||||||
|
)
|
||||||
|
action_idx += 1
|
||||||
|
|
||||||
return "\n".join(processed_plain_texts), user_id_mapping, message_ids
|
return "\n".join(processed_plain_texts), user_id_mapping, message_ids
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -531,12 +566,64 @@ class MessageUtils:
|
|||||||
]
|
]
|
||||||
return component
|
return component
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_action_readable(min_time: float, max_time: float, session_id: str) -> List[Tuple[float, str]]:
|
||||||
|
"""
|
||||||
|
获取消息时间范围内的动作记录,并构建动作文本列表
|
||||||
|
|
||||||
# TODO: 这个函数的实现非常临时,后续需要替换为更完善的实现,比如直接从配置文件中读取机器人自己的ID,或者通过API获取机器人自己的信息等
|
Args:
|
||||||
def is_bot_self(user_id: str, platform: str) -> bool:
|
messages: 消息列表,用于确定时间范围和session_id
|
||||||
"""
|
timestamp_mode: 时间戳显示模式,默认为None表示不显示时间戳
|
||||||
判断用户ID是否是机器人自己
|
|
||||||
|
|
||||||
临时方法,后续会替换为更完善的实现
|
Returns:
|
||||||
"""
|
List[Tuple[float, str]]: 按时间排序的动作文本列表,每个元素为 (timestamp, action_text)
|
||||||
return user_id == "bot_self" and platform == "test_platform"
|
"""
|
||||||
|
from src.common.database.database import get_db_session
|
||||||
|
from src.common.database.database_model import ActionRecord
|
||||||
|
|
||||||
|
# 获取这个时间范围内的动作记录,并匹配session_id
|
||||||
|
try:
|
||||||
|
with get_db_session() as session:
|
||||||
|
actions_in_range = session.exec(
|
||||||
|
select(ActionRecord)
|
||||||
|
.where(col(ActionRecord.timestamp) >= datetime.fromtimestamp(min_time))
|
||||||
|
.where(col(ActionRecord.timestamp) <= datetime.fromtimestamp(max_time))
|
||||||
|
.where(col(ActionRecord.session_id) == session_id)
|
||||||
|
.order_by(col(ActionRecord.timestamp))
|
||||||
|
).all()
|
||||||
|
|
||||||
|
# 获取最新消息之后的第一个动作记录
|
||||||
|
with get_db_session() as session:
|
||||||
|
action_after_latest = session.exec(
|
||||||
|
select(ActionRecord)
|
||||||
|
.where(col(ActionRecord.timestamp) > datetime.fromtimestamp(max_time))
|
||||||
|
.where(col(ActionRecord.session_id) == session_id)
|
||||||
|
.order_by(col(ActionRecord.timestamp))
|
||||||
|
.limit(1)
|
||||||
|
).all()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"查询动作记录失败: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 合并两部分动作记录
|
||||||
|
actions = list(actions_in_range) + list(action_after_latest)
|
||||||
|
|
||||||
|
# 构建动作文本列表
|
||||||
|
action_messages: List[Tuple[float, str]] = []
|
||||||
|
for action in actions:
|
||||||
|
if action_display_prompt := action.action_display_prompt or "":
|
||||||
|
action_time = action.timestamp.timestamp()
|
||||||
|
action_messages.append((action_time, action_display_prompt))
|
||||||
|
|
||||||
|
return action_messages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_action_str_single(
|
||||||
|
action_content: Tuple[float, str], timestamp_mode: Optional[str | TimestampMode] = None
|
||||||
|
) -> str:
|
||||||
|
action_time, action_text = action_content
|
||||||
|
action_header = "你执行了: "
|
||||||
|
if timestamp_mode:
|
||||||
|
timestamp_str = translate_timestamp_to_human_readable(action_time, mode=timestamp_mode)
|
||||||
|
action_header = f"[{timestamp_str}] {action_header}"
|
||||||
|
return f"{action_header}{action_text}"
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from src.llm_models.utils_model import LLMRequest
|
|||||||
|
|
||||||
install(extra_lines=3)
|
install(extra_lines=3)
|
||||||
|
|
||||||
logger = get_logger("chat_voice")
|
logger = get_logger("voice_utils")
|
||||||
|
|
||||||
# TODO: 在LLMRequest重构后修改这里
|
# TODO: 在LLMRequest重构后修改这里
|
||||||
asr_model = LLMRequest(model_set=model_config.model_task_config.voice, request_type="audio")
|
asr_model = LLMRequest(model_set=model_config.model_task_config.voice, request_type="audio")
|
||||||
|
|||||||
Reference in New Issue
Block a user