diff --git a/src/bw_learner/expression_learner.py b/src/bw_learner/expression_learner.py index 0dc4c726..54e413ad 100644 --- a/src/bw_learner/expression_learner.py +++ b/src/bw_learner/expression_learner.py @@ -19,6 +19,7 @@ from .expression_utils import check_expression_suitability, parse_expression_res if TYPE_CHECKING: from src.chat.message_receive.message import SessionMessage + from .jargon_miner import JargonMiner logger = get_logger("expressor") @@ -39,11 +40,15 @@ class ExpressionLearner: # 消息缓存 self._messages_cache: List["SessionMessage"] = [] - async def add_messages(self, messages: List["SessionMessage"]) -> None: + def add_messages(self, messages: List["SessionMessage"]) -> None: """添加消息到缓存""" self._messages_cache.extend(messages) - async def learn(self): + def get_cache_size(self) -> int: + """获取当前消息缓存的大小""" + return len(self._messages_cache) + + async def learn(self, jargon_miner: Optional["JargonMiner"] = None): """学习主流程""" if not self._messages_cache: logger.debug("没有消息可供学习,跳过学习过程") @@ -73,6 +78,15 @@ class ExpressionLearner: expressions, jargon_entries = parse_expression_response(response) # TODO: 完成学习 + # 从缓存检查 jargon 是否出现在 message 中 + + # ====== 黑话相关 ====== + def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None): + if not jargon_miner: + return [] + # TODO: 完成检测逻辑 + + # ====== DB 操作相关 ====== async def _upsert_expression_to_db(self, situation: str, style: str): expr, similarity = self._find_similar_expression(situation) or (None, 0) if expr: @@ -132,6 +146,7 @@ class ExpressionLearner: # count 增加后,立即进行一次检查 await self._check_expression(expr) + # ====== 概括方法 ====== async def _compose_situation_text(self, content_list: List[str]) -> Optional[str]: texts = [c.strip() for c in content_list if c.strip()] if not texts: @@ -142,7 +157,6 @@ class ExpressionLearner: f"{description}\n" "只输出概括内容。" ) - try: summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2) if summary := summary.strip(): diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index 23211925..0e1d364b 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -241,18 +241,17 @@ class HeartFChatting: return if self._expression_learner.get_cache_size() < self._min_messages_for_extraction: return + if not self._enable_expression_learning: + return extraction_end_time = time.time() logger.info( f"聊天流 {self.session_name} 提取到 {len(messages)} 条消息," f"时间窗口: {self._last_extraction_time:.2f} - {extraction_end_time:.2f}" ) self._last_extraction_time = extraction_end_time - if self._enable_expression_learning: - asyncio.create_task(self._expression_learning()) - - async def _expression_learning(self): try: - learnt_style = await self._expression_learner.learn() + jargon_miner = self._jargon_miner if self._enable_jargon_learning else None + learnt_style = await self._expression_learner.learn(jargon_miner) if learnt_style: logger.info(f"{self.log_prefix} 表达学习完成") else: diff --git a/src/common/utils/math_utils.py b/src/common/utils/math_utils.py index d37ba793..9b70296c 100644 --- a/src/common/utils/math_utils.py +++ b/src/common/utils/math_utils.py @@ -1,4 +1,17 @@ +from enum import Enum + import hashlib +import time + + +class TimestampMode(Enum): + NORMAL = "%Y-%m-%d %H:%M:%S" + """标准格式,例如 2024-01-01 12:00:00""" + NORMAL_NO_YMD = "%H:%M:%S" + """仅显示时间不显示年月日,例如 12:00:00""" + RELATIVE = "relative" + """相对时间,例如 5分钟前、2小时前等""" + def number_to_short_id(original_id: int, salt: str, length: int = 6) -> str: """ @@ -32,3 +45,33 @@ def number_to_short_id(original_id: int, salt: str, length: int = 6) -> str: temp_num //= base return short_id + + +def translate_timestamp_to_human_readable(timestamp: float, mode: TimestampMode) -> str: + """将时间戳按照指定模式转换为人类可读的格式 + + Args: + timestamp (float): 需要转换的时间戳 + mode (TimestampMode): 时间戳转换模式,支持NORMAL、NORMAL_NO_YMD和RELATIVE三种模式 + Returns: + str: 转换后的时间字符串 + """ + if mode in [TimestampMode.NORMAL, TimestampMode.NORMAL_NO_YMD]: + return time.strftime(mode.value, time.localtime(timestamp)) + elif mode == TimestampMode.RELATIVE: + time_diff = time.time() - timestamp + + if time_diff < 20: + return "刚刚" + elif time_diff < 60: + return f"{int(time_diff)}秒前" + elif time_diff < 3600: + return f"{int(time_diff // 60)}分钟前" + elif time_diff < 86400: + return f"{int(time_diff // 3600)}小时前" + elif time_diff < 2592000: + return f"{int(time_diff // 86400)}天前" + else: + return time.strftime(TimestampMode.NORMAL.value, time.localtime(timestamp)) + else: + raise ValueError(f"不支持的时间戳转换模式: {mode}") diff --git a/src/common/utils/utils_message.py b/src/common/utils/utils_message.py index c6844f1f..03939e8a 100644 --- a/src/common/utils/utils_message.py +++ b/src/common/utils/utils_message.py @@ -1,3 +1,4 @@ +from enum import Enum from maim_message import MessageBase, Seg from typing import List, Tuple, Optional, Dict, TYPE_CHECKING @@ -22,7 +23,7 @@ from src.common.data_models.message_component_data_model import ( ) from src.config.config import global_config -from .math_utils import number_to_short_id +from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable if TYPE_CHECKING: from src.chat.message_receive.message import SessionMessage @@ -151,6 +152,9 @@ class MessageUtils: extract_pictures: bool = False, replace_bot_name: bool = False, target_bot_name: Optional[str] = None, + timestamp_mode: Optional[TimestampMode] = None, + show_message_id_prefix: bool = False, + truncate_message: bool = False, ) -> Tuple[str, Dict[str, Tuple[str, str]]]: """ 将消息构建为LLM可读的文本格式 @@ -161,7 +165,10 @@ class MessageUtils: show_lineno (bool): 是否在每条消息前显示行号 extract_pictures (bool): 是否提取图片信息并在文本中显示占位符 replace_bot_name (bool): 是否将消息中的机器人名称替换为统一的占位符 - target_bot_name (Optional[str]): 如果replace_bot_name为True,指定要替换的机器人名称 + target_bot_name (Optional[str]): 如果replace_bot_name为True,指定要替换的机器人名称,比如可以把机器人名称替换为“你” + timestamp_mode (Optional[TimestampMode]): 时间戳显示模式,默认为None表示不显示时间戳 + show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀 + truncate_message (bool): 是否截断过长的消息文本,避免生成过长的输入给LLM Returns: return (Tuple[str, Dict[str, Tuple[str, str]]]): 构建后的消息文本,以及映射表(匿名ID, 原始名称) """