TempUpdate
This commit is contained in:
committed by
SengokuCola
parent
46cb0278d7
commit
9e2afaf6bc
@@ -19,6 +19,7 @@ from .expression_utils import check_expression_suitability, parse_expression_res
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.chat.message_receive.message import SessionMessage
|
||||
from .jargon_miner import JargonMiner
|
||||
|
||||
|
||||
logger = get_logger("expressor")
|
||||
@@ -39,11 +40,15 @@ class ExpressionLearner:
|
||||
# 消息缓存
|
||||
self._messages_cache: List["SessionMessage"] = []
|
||||
|
||||
async def add_messages(self, messages: List["SessionMessage"]) -> None:
|
||||
def add_messages(self, messages: List["SessionMessage"]) -> None:
|
||||
"""添加消息到缓存"""
|
||||
self._messages_cache.extend(messages)
|
||||
|
||||
async def learn(self):
|
||||
def get_cache_size(self) -> int:
|
||||
"""获取当前消息缓存的大小"""
|
||||
return len(self._messages_cache)
|
||||
|
||||
async def learn(self, jargon_miner: Optional["JargonMiner"] = None):
|
||||
"""学习主流程"""
|
||||
if not self._messages_cache:
|
||||
logger.debug("没有消息可供学习,跳过学习过程")
|
||||
@@ -73,6 +78,15 @@ class ExpressionLearner:
|
||||
expressions, jargon_entries = parse_expression_response(response)
|
||||
# TODO: 完成学习
|
||||
|
||||
# 从缓存检查 jargon 是否出现在 message 中
|
||||
|
||||
# ====== 黑话相关 ======
|
||||
def _check_cached_jargons_in_messages(self, jargon_miner: Optional["JargonMiner"] = None):
|
||||
if not jargon_miner:
|
||||
return []
|
||||
# TODO: 完成检测逻辑
|
||||
|
||||
# ====== DB 操作相关 ======
|
||||
async def _upsert_expression_to_db(self, situation: str, style: str):
|
||||
expr, similarity = self._find_similar_expression(situation) or (None, 0)
|
||||
if expr:
|
||||
@@ -132,6 +146,7 @@ class ExpressionLearner:
|
||||
# count 增加后,立即进行一次检查
|
||||
await self._check_expression(expr)
|
||||
|
||||
# ====== 概括方法 ======
|
||||
async def _compose_situation_text(self, content_list: List[str]) -> Optional[str]:
|
||||
texts = [c.strip() for c in content_list if c.strip()]
|
||||
if not texts:
|
||||
@@ -142,7 +157,6 @@ class ExpressionLearner:
|
||||
f"{description}\n"
|
||||
"只输出概括内容。"
|
||||
)
|
||||
|
||||
try:
|
||||
summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2)
|
||||
if summary := summary.strip():
|
||||
|
||||
@@ -241,18 +241,17 @@ class HeartFChatting:
|
||||
return
|
||||
if self._expression_learner.get_cache_size() < self._min_messages_for_extraction:
|
||||
return
|
||||
if not self._enable_expression_learning:
|
||||
return
|
||||
extraction_end_time = time.time()
|
||||
logger.info(
|
||||
f"聊天流 {self.session_name} 提取到 {len(messages)} 条消息,"
|
||||
f"时间窗口: {self._last_extraction_time:.2f} - {extraction_end_time:.2f}"
|
||||
)
|
||||
self._last_extraction_time = extraction_end_time
|
||||
if self._enable_expression_learning:
|
||||
asyncio.create_task(self._expression_learning())
|
||||
|
||||
async def _expression_learning(self):
|
||||
try:
|
||||
learnt_style = await self._expression_learner.learn()
|
||||
jargon_miner = self._jargon_miner if self._enable_jargon_learning else None
|
||||
learnt_style = await self._expression_learner.learn(jargon_miner)
|
||||
if learnt_style:
|
||||
logger.info(f"{self.log_prefix} 表达学习完成")
|
||||
else:
|
||||
|
||||
@@ -1,4 +1,17 @@
|
||||
from enum import Enum
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
|
||||
class TimestampMode(Enum):
|
||||
NORMAL = "%Y-%m-%d %H:%M:%S"
|
||||
"""标准格式,例如 2024-01-01 12:00:00"""
|
||||
NORMAL_NO_YMD = "%H:%M:%S"
|
||||
"""仅显示时间不显示年月日,例如 12:00:00"""
|
||||
RELATIVE = "relative"
|
||||
"""相对时间,例如 5分钟前、2小时前等"""
|
||||
|
||||
|
||||
def number_to_short_id(original_id: int, salt: str, length: int = 6) -> str:
|
||||
"""
|
||||
@@ -32,3 +45,33 @@ def number_to_short_id(original_id: int, salt: str, length: int = 6) -> str:
|
||||
temp_num //= base
|
||||
|
||||
return short_id
|
||||
|
||||
|
||||
def translate_timestamp_to_human_readable(timestamp: float, mode: TimestampMode) -> str:
|
||||
"""将时间戳按照指定模式转换为人类可读的格式
|
||||
|
||||
Args:
|
||||
timestamp (float): 需要转换的时间戳
|
||||
mode (TimestampMode): 时间戳转换模式,支持NORMAL、NORMAL_NO_YMD和RELATIVE三种模式
|
||||
Returns:
|
||||
str: 转换后的时间字符串
|
||||
"""
|
||||
if mode in [TimestampMode.NORMAL, TimestampMode.NORMAL_NO_YMD]:
|
||||
return time.strftime(mode.value, time.localtime(timestamp))
|
||||
elif mode == TimestampMode.RELATIVE:
|
||||
time_diff = time.time() - timestamp
|
||||
|
||||
if time_diff < 20:
|
||||
return "刚刚"
|
||||
elif time_diff < 60:
|
||||
return f"{int(time_diff)}秒前"
|
||||
elif time_diff < 3600:
|
||||
return f"{int(time_diff // 60)}分钟前"
|
||||
elif time_diff < 86400:
|
||||
return f"{int(time_diff // 3600)}小时前"
|
||||
elif time_diff < 2592000:
|
||||
return f"{int(time_diff // 86400)}天前"
|
||||
else:
|
||||
return time.strftime(TimestampMode.NORMAL.value, time.localtime(timestamp))
|
||||
else:
|
||||
raise ValueError(f"不支持的时间戳转换模式: {mode}")
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from enum import Enum
|
||||
from maim_message import MessageBase, Seg
|
||||
from typing import List, Tuple, Optional, Dict, TYPE_CHECKING
|
||||
|
||||
@@ -22,7 +23,7 @@ from src.common.data_models.message_component_data_model import (
|
||||
)
|
||||
from src.config.config import global_config
|
||||
|
||||
from .math_utils import number_to_short_id
|
||||
from .math_utils import number_to_short_id, TimestampMode, translate_timestamp_to_human_readable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.chat.message_receive.message import SessionMessage
|
||||
@@ -151,6 +152,9 @@ class MessageUtils:
|
||||
extract_pictures: bool = False,
|
||||
replace_bot_name: bool = False,
|
||||
target_bot_name: Optional[str] = None,
|
||||
timestamp_mode: Optional[TimestampMode] = None,
|
||||
show_message_id_prefix: bool = False,
|
||||
truncate_message: bool = False,
|
||||
) -> Tuple[str, Dict[str, Tuple[str, str]]]:
|
||||
"""
|
||||
将消息构建为LLM可读的文本格式
|
||||
@@ -161,7 +165,10 @@ class MessageUtils:
|
||||
show_lineno (bool): 是否在每条消息前显示行号
|
||||
extract_pictures (bool): 是否提取图片信息并在文本中显示占位符
|
||||
replace_bot_name (bool): 是否将消息中的机器人名称替换为统一的占位符
|
||||
target_bot_name (Optional[str]): 如果replace_bot_name为True,指定要替换的机器人名称
|
||||
target_bot_name (Optional[str]): 如果replace_bot_name为True,指定要替换的机器人名称,比如可以把机器人名称替换为“你”
|
||||
timestamp_mode (Optional[TimestampMode]): 时间戳显示模式,默认为None表示不显示时间戳
|
||||
show_message_id_prefix (bool): 是否在每条消息前显示消息ID前缀
|
||||
truncate_message (bool): 是否截断过长的消息文本,避免生成过长的输入给LLM
|
||||
Returns:
|
||||
return (Tuple[str, Dict[str, Tuple[str, str]]]): 构建后的消息文本,以及映射表(匿名ID, 原始名称)
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user