TempUpdate

This commit is contained in:
UnCLAS-Prommer
2026-03-10 02:01:49 +08:00
committed by SengokuCola
parent 46cb0278d7
commit 9e2afaf6bc
4 changed files with 73 additions and 10 deletions

View File

@@ -19,6 +19,7 @@ from .expression_utils import check_expression_suitability, parse_expression_res
if TYPE_CHECKING:
from src.chat.message_receive.message import SessionMessage
from .jargon_miner import JargonMiner
logger = get_logger("expressor")
@@ -39,11 +40,15 @@ class ExpressionLearner:
# 消息缓存
self._messages_cache: List["SessionMessage"] = []
async def add_messages(self, messages: List["SessionMessage"]) -> None:
def add_messages(self, messages: List["SessionMessage"]) -> None:
"""添加消息到缓存"""
self._messages_cache.extend(messages)
async def learn(self):
def get_cache_size(self) -> int:
"""获取当前消息缓存的大小"""
return len(self._messages_cache)
async def learn(self, jargon_miner: Optional["JargonMiner"] = None):
"""学习主流程"""
if not self._messages_cache:
logger.debug("没有消息可供学习,跳过学习过程")
@@ -73,6 +78,15 @@ class ExpressionLearner:
expressions, jargon_entries = parse_expression_response(response)
# TODO: 完成学习
# 从缓存检查 jargon 是否出现在 message 中
# ====== 黑话相关 ======
def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None):
if not jargon_miner:
return []
# TODO: 完成检测逻辑
# ====== DB 操作相关 ======
async def _upsert_expression_to_db(self, situation: str, style: str):
expr, similarity = self._find_similar_expression(situation) or (None, 0)
if expr:
@@ -132,6 +146,7 @@ class ExpressionLearner:
# count 增加后,立即进行一次检查
await self._check_expression(expr)
# ====== 概括方法 ======
async def _compose_situation_text(self, content_list: List[str]) -> Optional[str]:
texts = [c.strip() for c in content_list if c.strip()]
if not texts:
@@ -142,7 +157,6 @@ class ExpressionLearner:
f"{description}\n"
"只输出概括内容。"
)
try:
summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2)
if summary := summary.strip():

View File

@@ -241,18 +241,17 @@ class HeartFChatting:
return
if self._expression_learner.get_cache_size() < self._min_messages_for_extraction:
return
if not self._enable_expression_learning:
return
extraction_end_time = time.time()
logger.info(
f"聊天流 {self.session_name} 提取到 {len(messages)} 条消息,"
f"时间窗口: {self._last_extraction_time:.2f} - {extraction_end_time:.2f}"
)
self._last_extraction_time = extraction_end_time
if self._enable_expression_learning:
asyncio.create_task(self._expression_learning())
async def _expression_learning(self):
try:
learnt_style = await self._expression_learner.learn()
jargon_miner = self._jargon_miner if self._enable_jargon_learning else None
learnt_style = await self._expression_learner.learn(jargon_miner)
if learnt_style:
logger.info(f"{self.log_prefix} 表达学习完成")
else:

View File

@@ -1,4 +1,17 @@
from enum import Enum
import hashlib
import time
class TimestampMode(Enum):
NORMAL = "%Y-%m-%d %H:%M:%S"
"""标准格式,例如 2024-01-01 12:00:00"""
NORMAL_NO_YMD = "%H:%M:%S"
"""仅显示时间不显示年月日,例如 12:00:00"""
RELATIVE = "relative"
"""相对时间,例如 5分钟前、2小时前等"""
def number_to_short_id(original_id: int, salt: str, length: int = 6) -> str:
"""
@@ -32,3 +45,33 @@ def number_to_short_id(original_id: int, salt: str, length: int = 6) -> str:
temp_num //= base
return short_id
def translate_timestamp_to_human_readable(timestamp: float, mode: TimestampMode) -> str:
"""将时间戳按照指定模式转换为人类可读的格式
Args:
timestamp (float): 需要转换的时间戳
mode (TimestampMode): 时间戳转换模式支持NORMAL、NORMAL_NO_YMD和RELATIVE三种模式
Returns:
str: 转换后的时间字符串
"""
if mode in [TimestampMode.NORMAL, TimestampMode.NORMAL_NO_YMD]:
return time.strftime(mode.value, time.localtime(timestamp))
elif mode == TimestampMode.RELATIVE:
time_diff = time.time() - timestamp
if time_diff < 20:
return "刚刚"
elif time_diff < 60:
return f"{int(time_diff)}秒前"
elif time_diff < 3600:
return f"{int(time_diff // 60)}分钟前"
elif time_diff < 86400:
return f"{int(time_diff // 3600)}小时前"
elif time_diff < 2592000:
return f"{int(time_diff // 86400)}天前"
else:
return time.strftime(TimestampMode.NORMAL.value, time.localtime(timestamp))
else:
raise ValueError(f"不支持的时间戳转换模式: {mode}")

View File

@@ -1,3 +1,4 @@
from enum import Enum
from maim_message import MessageBase, Seg
from typing import List, Tuple, Optional, Dict, TYPE_CHECKING
@@ -22,7 +23,7 @@ from src.common.data_models.message_component_data_model import (
)
from src.config.config import global_config
from .math_utils import number_to_short_id
from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable
if TYPE_CHECKING:
from src.chat.message_receive.message import SessionMessage
@@ -151,6 +152,9 @@ class MessageUtils:
extract_pictures: bool = False,
replace_bot_name: bool = False,
target_bot_name: Optional[str] = None,
timestamp_mode: Optional[TimestampMode] = None,
show_message_id_prefix: bool = False,
truncate_message: bool = False,
) -> Tuple[str, Dict[str, Tuple[str, str]]]:
"""
将消息构建为LLM可读的文本格式
@@ -161,7 +165,10 @@ class MessageUtils:
show_lineno (bool): 是否在每条消息前显示行号
extract_pictures (bool): 是否提取图片信息并在文本中显示占位符
replace_bot_name (bool): 是否将消息中的机器人名称替换为统一的占位符
target_bot_name (Optional[str]): 如果replace_bot_name为True指定要替换的机器人名称
target_bot_name (Optional[str]): 如果replace_bot_name为True指定要替换的机器人名称,比如可以把机器人名称替换为“你”
timestamp_mode (Optional[TimestampMode]): 时间戳显示模式默认为None表示不显示时间戳
show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀
truncate_message (bool): 是否截断过长的消息文本避免生成过长的输入给LLM
Returns:
return (Tuple[str, Dict[str, Tuple[str, str]]]): 构建后的消息文本以及映射表匿名ID, 原始名称)
"""