Merge branch 'Mai-with-u:main' into feat-lpmm知识库加强

This commit is contained in:
Dawn ARC
2025-12-10 18:12:42 +08:00
committed by GitHub
89 changed files with 7173 additions and 2706 deletions

View File

@@ -235,13 +235,13 @@ class BrainChatting:
if recent_messages_list is None:
recent_messages_list = []
_reply_text = "" # 初始化reply_text变量避免UnboundLocalError
# -------------------------------------------------------------------------
# ReflectTracker Check
# 在每次回复前检查一次上下文,看是否有反思问题得到了解答
# -------------------------------------------------------------------------
from src.express.reflect_tracker import reflect_tracker_manager
tracker = reflect_tracker_manager.get_tracker(self.stream_id)
if tracker:
resolved = await tracker.trigger_tracker()
@@ -254,6 +254,7 @@ class BrainChatting:
# 检查是否需要提问表达反思
# -------------------------------------------------------------------------
from src.express.expression_reflector import expression_reflector_manager
reflector = expression_reflector_manager.get_or_create_reflector(self.stream_id)
asyncio.create_task(reflector.check_and_ask())

View File

@@ -356,7 +356,7 @@ async def clean_unused_emojis(emoji_dir: str, emoji_objects: List["MaiEmoji"], r
if cleaned_count > 0:
logger.info(f"[清理] 在目录 {emoji_dir} 中清理了 {cleaned_count} 个破损表情包。")
else:
logger.info(f"[清理] 目录 {emoji_dir} 中没有需要清理的。")
logger.debug(f"[清理] 目录 {emoji_dir} 中没有需要清理的。")
except Exception as e:
logger.error(f"[错误] 清理未使用表情包文件时出错 ({emoji_dir}): {str(e)}")

View File

@@ -1,5 +1,6 @@
from datetime import datetime
import time
import asyncio
from typing import Dict
from src.chat.utils.chat_message_builder import (
@@ -46,6 +47,8 @@ class FrequencyControl:
self.frequency_model = LLMRequest(
model_set=model_config.model_task_config.utils_small, request_type="frequency.adjust"
)
# 频率调整锁,防止并发执行
self._adjust_lock = asyncio.Lock()
def get_talk_frequency_adjust(self) -> float:
"""获取发言频率调整值"""
@@ -56,68 +59,78 @@ class FrequencyControl:
self.talk_frequency_adjust = max(0.1, min(5.0, value))
async def trigger_frequency_adjust(self) -> None:
msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id,
timestamp_start=self.last_frequency_adjust_time,
timestamp_end=time.time(),
)
if time.time() - self.last_frequency_adjust_time < 160 or len(msg_list) <= 20:
return
else:
new_msg_list = get_raw_msg_by_timestamp_with_chat(
# 使用异步锁防止并发执行
async with self._adjust_lock:
# 在锁内检查,避免并发触发
current_time = time.time()
previous_adjust_time = self.last_frequency_adjust_time
msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id,
timestamp_start=self.last_frequency_adjust_time,
timestamp_end=time.time(),
limit=20,
limit_mode="latest",
timestamp_start=previous_adjust_time,
timestamp_end=current_time,
)
message_str = build_readable_messages(
new_msg_list,
replace_bot_name=True,
timestamp_mode="relative",
read_mark=0.0,
show_actions=False,
)
time_block = f"当前时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
bot_name = global_config.bot.nickname
bot_nickname = (
f",也有人叫你{','.join(global_config.bot.alias_names)}" if global_config.bot.alias_names else ""
)
name_block = f"你的名字是{bot_name}{bot_nickname},请注意哪些是你自己的发言。"
if current_time - previous_adjust_time < 160 or len(msg_list) <= 20:
return
prompt = await global_prompt_manager.format_prompt(
"frequency_adjust_prompt",
name_block=name_block,
time_block=time_block,
message_str=message_str,
)
response, (reasoning_content, _, _) = await self.frequency_model.generate_response_async(
prompt,
)
# 立即更新调整时间,防止并发触发
self.last_frequency_adjust_time = current_time
# logger.info(f"频率调整 prompt: {prompt}")
# logger.info(f"频率调整 response: {response}")
try:
new_msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id,
timestamp_start=previous_adjust_time,
timestamp_end=current_time,
limit=20,
limit_mode="latest",
)
if global_config.debug.show_prompt:
logger.info(f"频率调整 prompt: {prompt}")
logger.info(f"频率调整 response: {response}")
logger.info(f"频率调整 reasoning_content: {reasoning_content}")
message_str = build_readable_messages(
new_msg_list,
replace_bot_name=True,
timestamp_mode="relative",
read_mark=0.0,
show_actions=False,
)
time_block = f"当前时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
bot_name = global_config.bot.nickname
bot_nickname = (
f",也有人叫你{','.join(global_config.bot.alias_names)}" if global_config.bot.alias_names else ""
)
name_block = f"你的名字是{bot_name}{bot_nickname},请注意哪些是你自己的发言。"
final_value_by_api = frequency_api.get_current_talk_value(self.chat_id)
prompt = await global_prompt_manager.format_prompt(
"frequency_adjust_prompt",
name_block=name_block,
time_block=time_block,
message_str=message_str,
)
response, (reasoning_content, _, _) = await self.frequency_model.generate_response_async(
prompt,
)
# LLM依然输出过多内容时取消本次调整。合法最多4个字但有的模型可能会输出一些markdown换行符等需要长度宽限
if len(response) < 20:
if "过于频繁" in response:
logger.info(f"频率调整: 过于频繁,调整值到{final_value_by_api}")
self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 0.8))
elif "过少" in response:
logger.info(f"频率调整: 过少,调整值到{final_value_by_api}")
self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 1.2))
self.last_frequency_adjust_time = time.time()
else:
logger.info("频率调整response不符合要求取消本次调整")
# logger.info(f"频率调整 prompt: {prompt}")
# logger.info(f"频率调整 response: {response}")
if global_config.debug.show_prompt:
logger.info(f"频率调整 prompt: {prompt}")
logger.info(f"频率调整 response: {response}")
logger.info(f"频率调整 reasoning_content: {reasoning_content}")
final_value_by_api = frequency_api.get_current_talk_value(self.chat_id)
# LLM依然输出过多内容时取消本次调整。合法最多4个字但有的模型可能会输出一些markdown换行符等需要长度宽限
if len(response) < 20:
if "过于频繁" in response:
logger.info(f"频率调整: 过于频繁,调整值到{final_value_by_api}")
self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 0.8))
elif "过少" in response:
logger.info(f"频率调整: 过少,调整值到{final_value_by_api}")
self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 1.2))
except Exception as e:
logger.error(f"频率调整失败: {e}")
# 即使失败也保持时间戳更新,避免频繁重试
class FrequencyControlManager:

View File

@@ -29,7 +29,7 @@ from src.chat.utils.chat_message_builder import (
build_readable_messages_with_id,
get_raw_msg_before_timestamp_with_chat,
)
from src.chat.utils.chat_history_summarizer import ChatHistorySummarizer
from src.hippo_memorizer.chat_history_summarizer import ChatHistorySummarizer
if TYPE_CHECKING:
from src.common.data_models.database_data_model import DatabaseMessages
@@ -400,7 +400,7 @@ class HeartFChatting:
# ReflectTracker Check
# 在每次回复前检查一次上下文,看是否有反思问题得到了解答
# -------------------------------------------------------------------------
reflector = expression_reflector_manager.get_or_create_reflector(self.stream_id)
await reflector.check_and_ask()
tracker = reflect_tracker_manager.get_tracker(self.stream_id)
@@ -410,7 +410,6 @@ class HeartFChatting:
reflect_tracker_manager.remove_tracker(self.stream_id)
logger.info(f"{self.log_prefix} ReflectTracker resolved and removed.")
start_time = time.time()
async with global_prompt_manager.async_message_scope(self.chat_stream.context.get_template_name()):
asyncio.create_task(self.expression_learner.trigger_learning_for_chat())
@@ -427,7 +426,9 @@ class HeartFChatting:
# asyncio.create_task(self.chat_history_summarizer.process())
cycle_timers, thinking_id = self.start_cycle()
logger.info(f"{self.log_prefix} 开始第{self._cycle_counter}次思考(频率: {global_config.chat.get_talk_value(self.stream_id)})")
logger.info(
f"{self.log_prefix} 开始第{self._cycle_counter}次思考(频率: {global_config.chat.get_talk_value(self.stream_id)})"
)
# 第一步:动作检查
available_actions: Dict[str, ActionInfo] = {}

View File

@@ -39,6 +39,11 @@ class HeartFCMessageReceiver:
message_data: 原始消息字符串
"""
try:
# 通知消息不处理
if message.is_notify:
logger.debug("通知消息,跳过处理")
return
# 1. 消息解析与初始化
userinfo = message.message_info.user_info
chat = message.chat_stream

View File

@@ -33,6 +33,11 @@ class MessageStorage:
async def store_message(message: Union[MessageSending, MessageRecv], chat_stream: ChatStream) -> None:
"""存储消息到数据库"""
try:
# 通知消息不存储
if isinstance(message, MessageRecv) and message.is_notify:
logger.debug("通知消息,跳过存储")
return
pattern = r"<MainRule>.*?</MainRule>|<schedule>.*?</schedule>|<UserMessage>.*?</UserMessage>"
# print(message)

View File

@@ -15,12 +15,72 @@ install(extra_lines=3)
logger = get_logger("sender")
# WebUI 聊天室的消息广播器(延迟导入避免循环依赖)
_webui_chat_broadcaster = None
# 虚拟群 ID 前缀(与 chat_routes.py 保持一致)
VIRTUAL_GROUP_ID_PREFIX = "webui_virtual_group_"
def get_webui_chat_broadcaster():
"""获取 WebUI 聊天室广播器"""
global _webui_chat_broadcaster
if _webui_chat_broadcaster is None:
try:
from src.webui.chat_routes import chat_manager, WEBUI_CHAT_PLATFORM
_webui_chat_broadcaster = (chat_manager, WEBUI_CHAT_PLATFORM)
except ImportError:
_webui_chat_broadcaster = (None, None)
return _webui_chat_broadcaster
def is_webui_virtual_group(group_id: str) -> bool:
"""检查是否是 WebUI 虚拟群"""
return group_id and group_id.startswith(VIRTUAL_GROUP_ID_PREFIX)
async def _send_message(message: MessageSending, show_log=True) -> bool:
"""合并后的消息发送函数包含WS发送和日志记录"""
message_preview = truncate_message(message.processed_plain_text, max_length=200)
platform = message.message_info.platform
group_id = message.message_info.group_info.group_id if message.message_info.group_info else None
try:
# 检查是否是 WebUI 平台的消息,或者是 WebUI 虚拟群的消息
chat_manager, webui_platform = get_webui_chat_broadcaster()
is_webui_message = (platform == webui_platform) or is_webui_virtual_group(group_id)
if is_webui_message and chat_manager is not None:
# WebUI 聊天室消息(包括虚拟身份模式),通过 WebSocket 广播
import time
from src.config.config import global_config
await chat_manager.broadcast(
{
"type": "bot_message",
"content": message.processed_plain_text,
"message_type": "text",
"timestamp": time.time(),
"group_id": group_id, # 包含群 ID 以便前端区分不同的聊天标签
"sender": {
"name": global_config.bot.nickname,
"avatar": None,
"is_bot": True,
},
}
)
# 注意:机器人消息会由 MessageStorage.store_message 自动保存到数据库
# 无需手动保存
if show_log:
if is_webui_virtual_group(group_id):
logger.info(f"已将消息 '{message_preview}' 发往 WebUI 虚拟群 (平台: {platform})")
else:
logger.info(f"已将消息 '{message_preview}' 发往 WebUI 聊天室")
return True
# 直接调用API发送消息
await get_global_api().send_message(message)
if show_log:

View File

@@ -181,8 +181,12 @@ class ActionPlanner:
found_ids = set(matches)
missing_ids = found_ids - available_ids
if missing_ids:
logger.info(f"{self.log_prefix}planner理由中引用的消息ID不在当前上下文中: {missing_ids}, 可用ID: {list(available_ids)[:10]}...")
logger.info(f"{self.log_prefix}planner理由替换: 找到{len(matches)}个消息ID引用其中{len(found_ids & available_ids)}个在上下文中")
logger.info(
f"{self.log_prefix}planner理由中引用的消息ID不在当前上下文中: {missing_ids}, 可用ID: {list(available_ids)[:10]}..."
)
logger.info(
f"{self.log_prefix}planner理由替换: 找到{len(matches)}个消息ID引用其中{len(found_ids & available_ids)}个在上下文中"
)
def _replace(match: re.Match[str]) -> str:
msg_id = match.group(0)
@@ -222,7 +226,8 @@ class ActionPlanner:
# 非no_reply动作需要target_message_id
target_message = None
if target_message_id := action_json.get("target_message_id"):
target_message_id = action_json.get("target_message_id")
if target_message_id:
# 根据target_message_id查找原始消息
target_message = self.find_message_by_id(target_message_id, message_id_list)
if target_message is None:
@@ -233,6 +238,14 @@ class ActionPlanner:
target_message = message_id_list[-1][1]
logger.debug(f"{self.log_prefix}动作'{action}'缺少target_message_id使用最新消息作为target_message")
if action != "no_reply" and target_message is not None and self._is_message_from_self(target_message):
logger.info(
f"{self.log_prefix}Planner选择了自己的消息 {target_message_id or target_message.message_id} 作为目标,强制使用 no_reply"
)
reasoning = f"目标消息 {target_message_id or target_message.message_id} 来自机器人自身,违反不回复自身消息规则。原始理由: {reasoning}"
action = "no_reply"
target_message = None
# 验证action是否可用
available_action_names = [action_name for action_name, _ in current_available_actions]
internal_action_names = ["no_reply", "reply", "wait_time", "no_reply_until_call"]
@@ -277,6 +290,16 @@ class ActionPlanner:
return action_planner_infos
def _is_message_from_self(self, message: "DatabaseMessages") -> bool:
"""判断消息是否由机器人自身发送"""
try:
return str(message.user_info.user_id) == str(global_config.bot.qq_account) and (
message.user_info.platform or ""
) == (global_config.bot.platform or "")
except AttributeError:
logger.warning(f"{self.log_prefix}检测消息发送者失败,缺少必要字段")
return False
async def plan(
self,
available_actions: Dict[str, ActionInfo],
@@ -754,20 +777,20 @@ class ActionPlanner:
json_content_start = json_start_pos + 7 # ```json的长度
# 提取从```json之后到内容结尾的所有内容
incomplete_json_str = content[json_content_start:].strip()
# 提取JSON之前的内容作为推理文本
if json_start_pos > 0:
reasoning_content = content[:json_start_pos].strip()
reasoning_content = re.sub(r"^//\s*", "", reasoning_content, flags=re.MULTILINE)
reasoning_content = reasoning_content.strip()
if incomplete_json_str:
try:
# 清理可能的注释和格式问题
json_str = re.sub(r"//.*?\n", "\n", incomplete_json_str)
json_str = re.sub(r"/\*.*?\*/", "", json_str, flags=re.DOTALL)
json_str = json_str.strip()
if json_str:
# 尝试按行分割每行可能是一个JSON对象
lines = [line.strip() for line in json_str.split("\n") if line.strip()]
@@ -782,7 +805,7 @@ class ActionPlanner:
json_objects.append(item)
except json.JSONDecodeError:
pass
# 如果按行解析没有成功尝试将整个块作为一个JSON对象或数组
if not json_objects:
try:

View File

@@ -839,8 +839,6 @@ class DefaultReplyer:
continue
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
if duration > 12:
logger.warning(f"回复生成前信息获取耗时过长: {chinese_name} 耗时: {duration:.1f}s请使用更快的模型")
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
expression_habits_block, selected_expressions = results_dict["expression_habits"]

View File

@@ -760,8 +760,6 @@ class PrivateReplyer:
continue
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
if duration > 12:
logger.warning(f"回复生成前信息获取耗时过长: {chinese_name} 耗时: {duration:.1f}s请使用更快的模型")
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
expression_habits_block, selected_expressions = results_dict["expression_habits"]

View File

@@ -1,493 +0,0 @@
"""
聊天内容概括器
用于累积、打包和压缩聊天记录
"""
import asyncio
import json
import time
from typing import List, Optional, Set
from dataclasses import dataclass
from src.common.logger import get_logger
from src.common.data_models.database_data_model import DatabaseMessages
from src.config.config import global_config, model_config
from src.llm_models.utils_model import LLMRequest
from src.plugin_system.apis import message_api
from src.chat.utils.chat_message_builder import build_readable_messages
from src.person_info.person_info import Person
from src.chat.message_receive.chat_stream import get_chat_manager
logger = get_logger("chat_history_summarizer")
@dataclass
class MessageBatch:
"""消息批次"""
messages: List[DatabaseMessages]
start_time: float
end_time: float
is_preparing: bool = False # 是否处于准备结束模式
class ChatHistorySummarizer:
"""聊天内容概括器"""
def __init__(self, chat_id: str, check_interval: int = 60):
"""
初始化聊天内容概括器
Args:
chat_id: 聊天ID
check_interval: 定期检查间隔默认60秒
"""
self.chat_id = chat_id
self._chat_display_name = self._get_chat_display_name()
self.log_prefix = f"[{self._chat_display_name}]"
# 记录时间点,用于计算新消息
self.last_check_time = time.time()
# 当前累积的消息批次
self.current_batch: Optional[MessageBatch] = None
# LLM请求器用于压缩聊天内容
self.summarizer_llm = LLMRequest(
model_set=model_config.model_task_config.utils, request_type="chat_history_summarizer"
)
# 后台循环相关
self.check_interval = check_interval # 检查间隔(秒)
self._periodic_task: Optional[asyncio.Task] = None
self._running = False
def _get_chat_display_name(self) -> str:
"""获取聊天显示名称"""
try:
chat_name = get_chat_manager().get_stream_name(self.chat_id)
if chat_name:
return chat_name
# 如果获取失败使用简化的chat_id显示
if len(self.chat_id) > 20:
return f"{self.chat_id[:8]}..."
return self.chat_id
except Exception:
# 如果获取失败使用简化的chat_id显示
if len(self.chat_id) > 20:
return f"{self.chat_id[:8]}..."
return self.chat_id
async def process(self, current_time: Optional[float] = None):
"""
处理聊天内容概括
Args:
current_time: 当前时间戳如果为None则使用time.time()
"""
if current_time is None:
current_time = time.time()
try:
# 获取从上次检查时间到当前时间的新消息
new_messages = message_api.get_messages_by_time_in_chat(
chat_id=self.chat_id,
start_time=self.last_check_time,
end_time=current_time,
limit=0,
limit_mode="latest",
filter_mai=False, # 不过滤bot消息因为需要检查bot是否发言
filter_command=False,
)
if not new_messages:
# 没有新消息,检查是否需要打包
if self.current_batch and self.current_batch.messages:
await self._check_and_package(current_time)
self.last_check_time = current_time
return
logger.debug(
f"{self.log_prefix} 开始处理聊天概括,时间窗口: {self.last_check_time:.2f} -> {current_time:.2f}"
)
# 有新消息,更新最后检查时间
self.last_check_time = current_time
# 如果有当前批次,添加新消息
if self.current_batch:
before_count = len(self.current_batch.messages)
self.current_batch.messages.extend(new_messages)
self.current_batch.end_time = current_time
logger.info(f"{self.log_prefix} 更新聊天话题: {before_count} -> {len(self.current_batch.messages)} 条消息")
else:
# 创建新批次
self.current_batch = MessageBatch(
messages=new_messages,
start_time=new_messages[0].time if new_messages else current_time,
end_time=current_time,
)
logger.info(f"{self.log_prefix} 新建聊天话题: {len(new_messages)} 条消息")
# 检查是否需要打包
await self._check_and_package(current_time)
except Exception as e:
logger.error(f"{self.log_prefix} 处理聊天内容概括时出错: {e}")
import traceback
traceback.print_exc()
async def _check_and_package(self, current_time: float):
"""检查是否需要打包"""
if not self.current_batch or not self.current_batch.messages:
return
messages = self.current_batch.messages
message_count = len(messages)
last_message_time = messages[-1].time if messages else current_time
time_since_last_message = current_time - last_message_time
# 格式化时间差显示
if time_since_last_message < 60:
time_str = f"{time_since_last_message:.1f}"
elif time_since_last_message < 3600:
time_str = f"{time_since_last_message / 60:.1f}分钟"
else:
time_str = f"{time_since_last_message / 3600:.1f}小时"
preparing_status = "" if self.current_batch.is_preparing else ""
logger.info(
f"{self.log_prefix} 批次状态检查 | 消息数: {message_count} | 距最后消息: {time_str} | 准备结束模式: {preparing_status}"
)
# 检查打包条件
should_package = False
# 条件1: 消息长度超过120直接打包
if message_count >= 120:
should_package = True
logger.info(f"{self.log_prefix} 触发打包条件: 消息数量达到 {message_count} 条(阈值: 120条")
# 条件2: 最后一条消息的时间和当前时间差>600秒直接打包
elif time_since_last_message > 600:
should_package = True
logger.info(f"{self.log_prefix} 触发打包条件: 距最后消息 {time_str}(阈值: 10分钟")
# 条件3: 消息长度超过100进入准备结束模式
elif message_count > 100:
if not self.current_batch.is_preparing:
self.current_batch.is_preparing = True
logger.info(f"{self.log_prefix} 消息数量 {message_count} 条超过阈值100条进入准备结束模式")
# 在准备结束模式下,如果最后一条消息的时间和当前时间差>10秒就打包
if time_since_last_message > 10:
should_package = True
logger.info(f"{self.log_prefix} 触发打包条件: 准备结束模式下,距最后消息 {time_str}(阈值: 10秒")
if should_package:
await self._package_and_store()
async def _package_and_store(self):
"""打包并存储聊天记录"""
if not self.current_batch or not self.current_batch.messages:
return
messages = self.current_batch.messages
start_time = self.current_batch.start_time
end_time = self.current_batch.end_time
logger.info(
f"{self.log_prefix} 开始打包批次 | 消息数: {len(messages)} | 时间范围: {start_time:.2f} - {end_time:.2f}"
)
# 检查是否有bot发言
# 第一条消息前推600s到最后一条消息的时间内
check_start_time = max(start_time - 600, 0)
check_end_time = end_time
# 使用包含边界的时间范围查询
bot_messages = message_api.get_messages_by_time_in_chat_inclusive(
chat_id=self.chat_id,
start_time=check_start_time,
end_time=check_end_time,
limit=0,
limit_mode="latest",
filter_mai=False,
filter_command=False,
)
# 检查是否有bot的发言
has_bot_message = False
bot_user_id = str(global_config.bot.qq_account)
for msg in bot_messages:
if msg.user_info.user_id == bot_user_id:
has_bot_message = True
break
if not has_bot_message:
logger.info(
f"{self.log_prefix} 批次内无Bot发言丢弃批次 | 检查时间范围: {check_start_time:.2f} - {check_end_time:.2f}"
)
self.current_batch = None
return
# 有bot发言进行压缩和存储
try:
# 构建对话原文
original_text = build_readable_messages(
messages=messages,
replace_bot_name=True,
timestamp_mode="normal_no_YMD",
read_mark=0.0,
truncate=False,
show_actions=False,
)
# 获取参与的所有人的昵称
participants_set: Set[str] = set()
for msg in messages:
# 使用 msg.user_platform扁平化字段或 msg.user_info.platform
platform = (
getattr(msg, "user_platform", None)
or (msg.user_info.platform if msg.user_info else None)
or msg.chat_info.platform
)
person = Person(platform=platform, user_id=msg.user_info.user_id)
person_name = person.person_name
if person_name:
participants_set.add(person_name)
participants = list(participants_set)
logger.info(f"{self.log_prefix} 批次参与者: {', '.join(participants) if participants else '未知'}")
# 使用LLM压缩聊天内容
success, theme, keywords, summary = await self._compress_with_llm(original_text)
if not success:
logger.warning(f"{self.log_prefix} LLM压缩失败不存储到数据库 | 消息数: {len(messages)}")
# 清空当前批次,避免重复处理
self.current_batch = None
return
logger.info(
f"{self.log_prefix} LLM压缩完成 | 主题: {theme} | 关键词数: {len(keywords)} | 概括长度: {len(summary)}"
)
# 存储到数据库
await self._store_to_database(
start_time=start_time,
end_time=end_time,
original_text=original_text,
participants=participants,
theme=theme,
keywords=keywords,
summary=summary,
)
logger.info(f"{self.log_prefix} 成功打包并存储聊天记录 | 消息数: {len(messages)} | 主题: {theme}")
# 清空当前批次
self.current_batch = None
except Exception as e:
logger.error(f"{self.log_prefix} 打包和存储聊天记录时出错: {e}")
import traceback
traceback.print_exc()
# 出错时也清空批次,避免重复处理
self.current_batch = None
async def _compress_with_llm(self, original_text: str) -> tuple[bool, str, List[str], str]:
"""
使用LLM压缩聊天内容
Returns:
tuple[bool, str, List[str], str]: (是否成功, 主题, 关键词列表, 概括)
"""
prompt = f"""请对以下聊天记录进行概括,提取以下信息:
1. 主题这段对话的主要内容一个简短的标题不超过20字
2. 关键词这段对话的关键词用列表形式返回3-10个关键词
3. 概括对这段话的平文本概括50-200字
请以JSON格式返回格式如下
{{
"theme": "主题",
"keywords": ["关键词1", "关键词2", ...],
"summary": "概括内容"
}}
聊天记录:
{original_text}
请直接返回JSON不要包含其他内容。"""
try:
response, _ = await self.summarizer_llm.generate_response_async(
prompt=prompt,
temperature=0.3,
max_tokens=500,
)
# 解析JSON响应
import re
# 移除可能的markdown代码块标记
json_str = response.strip()
json_str = re.sub(r"^```json\s*", "", json_str, flags=re.MULTILINE)
json_str = re.sub(r"^```\s*", "", json_str, flags=re.MULTILINE)
json_str = json_str.strip()
# 尝试找到JSON对象的开始和结束位置
# 查找第一个 { 和最后一个匹配的 }
start_idx = json_str.find("{")
if start_idx == -1:
raise ValueError("未找到JSON对象开始标记")
# 从后往前查找最后一个 }
end_idx = json_str.rfind("}")
if end_idx == -1 or end_idx <= start_idx:
raise ValueError("未找到JSON对象结束标记")
# 提取JSON字符串
json_str = json_str[start_idx : end_idx + 1]
# 尝试解析JSON
try:
result = json.loads(json_str)
except json.JSONDecodeError:
# 如果解析失败,尝试修复字符串值中的中文引号
# 简单方法:将字符串值中的中文引号替换为转义的英文引号
# 使用状态机方法:遍历字符串,在字符串值内部替换中文引号
fixed_chars = []
in_string = False
escape_next = False
i = 0
while i < len(json_str):
char = json_str[i]
if escape_next:
fixed_chars.append(char)
escape_next = False
elif char == "\\":
fixed_chars.append(char)
escape_next = True
elif char == '"' and not escape_next:
fixed_chars.append(char)
in_string = not in_string
elif in_string and (char == '"' or char == '"'):
# 在字符串值内部,将中文引号替换为转义的英文引号
fixed_chars.append('\\"')
else:
fixed_chars.append(char)
i += 1
json_str = "".join(fixed_chars)
# 再次尝试解析
result = json.loads(json_str)
theme = result.get("theme", "未命名对话")
keywords = result.get("keywords", [])
summary = result.get("summary", "无概括")
# 确保keywords是列表
if isinstance(keywords, str):
keywords = [keywords]
return True, theme, keywords, summary
except Exception as e:
logger.error(f"{self.log_prefix} LLM压缩聊天内容时出错: {e}")
logger.error(f"{self.log_prefix} LLM响应: {response if 'response' in locals() else 'N/A'}")
# 返回失败标志和默认值
return False, "未命名对话", [], "压缩失败,无法生成概括"
async def _store_to_database(
self,
start_time: float,
end_time: float,
original_text: str,
participants: List[str],
theme: str,
keywords: List[str],
summary: str,
):
"""存储到数据库"""
try:
from src.common.database.database_model import ChatHistory
from src.plugin_system.apis import database_api
# 准备数据
data = {
"chat_id": self.chat_id,
"start_time": start_time,
"end_time": end_time,
"original_text": original_text,
"participants": json.dumps(participants, ensure_ascii=False),
"theme": theme,
"keywords": json.dumps(keywords, ensure_ascii=False),
"summary": summary,
"count": 0,
}
# 使用db_save存储使用start_time和chat_id作为唯一标识
# 由于可能有多条记录我们使用组合键但peewee不支持所以使用start_time作为唯一标识
# 但为了避免冲突我们使用组合键chat_id + start_time
# 由于peewee不支持组合键我们直接创建新记录不提供key_field和key_value
saved_record = await database_api.db_save(
ChatHistory,
data=data,
)
if saved_record:
logger.debug(f"{self.log_prefix} 成功存储聊天历史记录到数据库")
else:
logger.warning(f"{self.log_prefix} 存储聊天历史记录到数据库失败")
except Exception as e:
logger.error(f"{self.log_prefix} 存储到数据库时出错: {e}")
import traceback
traceback.print_exc()
raise
async def start(self):
"""启动后台定期检查循环"""
if self._running:
logger.warning(f"{self.log_prefix} 后台循环已在运行,无需重复启动")
return
self._running = True
self._periodic_task = asyncio.create_task(self._periodic_check_loop())
logger.info(f"{self.log_prefix} 已启动后台定期检查循环 | 检查间隔: {self.check_interval}")
async def stop(self):
"""停止后台定期检查循环"""
self._running = False
if self._periodic_task:
self._periodic_task.cancel()
try:
await self._periodic_task
except asyncio.CancelledError:
pass
self._periodic_task = None
logger.info(f"{self.log_prefix} 已停止后台定期检查循环")
async def _periodic_check_loop(self):
"""后台定期检查循环"""
try:
while self._running:
# 执行一次检查
await self.process()
# 等待指定间隔后再次检查
await asyncio.sleep(self.check_interval)
except asyncio.CancelledError:
logger.info(f"{self.log_prefix} 后台检查循环被取消")
raise
except Exception as e:
logger.error(f"{self.log_prefix} 后台检查循环出错: {e}")
import traceback
traceback.print_exc()
self._running = False

View File

@@ -959,7 +959,7 @@ async def build_anonymous_messages(messages: List[DatabaseMessages], show_ids: b
header = f"[{i + 1}] {anon_name}"
else:
header = f"{anon_name}"
output_lines.append(header)
stripped_line = content.strip()
if stripped_line:

View File

@@ -25,7 +25,7 @@ class MemoryForgetTask(AsyncTask):
"""执行遗忘检查"""
try:
current_time = time.time()
logger.info("[记忆遗忘] 开始遗忘检查...")
# logger.info("[记忆遗忘] 开始遗忘检查...")
# 执行4个阶段的遗忘检查
await self._forget_stage_1(current_time)
@@ -33,7 +33,7 @@ class MemoryForgetTask(AsyncTask):
await self._forget_stage_3(current_time)
await self._forget_stage_4(current_time)
logger.info("[记忆遗忘] 遗忘检查完成")
# logger.info("[记忆遗忘] 遗忘检查完成")
except Exception as e:
logger.error(f"[记忆遗忘] 执行遗忘检查时出错: {e}", exc_info=True)

View File

@@ -227,6 +227,8 @@ class StatisticOutputTask(AsyncTask):
"",
self._format_model_classified_stat(stats["last_hour"]),
"",
self._format_module_classified_stat(stats["last_hour"]),
"",
self._format_chat_stat(stats["last_hour"]),
self.SEP_LINE,
"",
@@ -737,11 +739,13 @@ class StatisticOutputTask(AsyncTask):
"""
if stats[TOTAL_REQ_CNT] <= 0:
return ""
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f}"
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f} {:>12} {:>12}"
total_replies = stats.get(TOTAL_REPLY_CNT, 0)
output = [
"按模型分类统计:",
" 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒)",
" 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒) 每次回复平均调用次数 每次回复平均Token数",
]
for model_name, count in sorted(stats[REQ_CNT_BY_MODEL].items()):
name = f"{model_name[:29]}..." if len(model_name) > 32 else model_name
@@ -751,11 +755,19 @@ class StatisticOutputTask(AsyncTask):
cost = stats[COST_BY_MODEL][model_name]
avg_time_cost = stats[AVG_TIME_COST_BY_MODEL][model_name]
std_time_cost = stats[STD_TIME_COST_BY_MODEL][model_name]
# 计算每次回复平均值
avg_count_per_reply = count / total_replies if total_replies > 0 else 0.0
avg_tokens_per_reply = tokens / total_replies if total_replies > 0 else 0.0
# 格式化大数字
formatted_count = _format_large_number(count)
formatted_in_tokens = _format_large_number(in_tokens)
formatted_out_tokens = _format_large_number(out_tokens)
formatted_tokens = _format_large_number(tokens)
formatted_avg_count = _format_large_number(avg_count_per_reply) if total_replies > 0 else "N/A"
formatted_avg_tokens = _format_large_number(avg_tokens_per_reply) if total_replies > 0 else "N/A"
output.append(
data_fmt.format(
name,
@@ -766,6 +778,62 @@ class StatisticOutputTask(AsyncTask):
cost,
avg_time_cost,
std_time_cost,
formatted_avg_count,
formatted_avg_tokens,
)
)
output.append("")
return "\n".join(output)
@staticmethod
def _format_module_classified_stat(stats: Dict[str, Any]) -> str:
"""
格式化按模块分类的统计数据
"""
if stats[TOTAL_REQ_CNT] <= 0:
return ""
data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f} {:>12} {:>12}"
total_replies = stats.get(TOTAL_REPLY_CNT, 0)
output = [
"按模块分类统计:",
" 模块名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒) 每次回复平均调用次数 每次回复平均Token数",
]
for module_name, count in sorted(stats[REQ_CNT_BY_MODULE].items()):
name = f"{module_name[:29]}..." if len(module_name) > 32 else module_name
in_tokens = stats[IN_TOK_BY_MODULE][module_name]
out_tokens = stats[OUT_TOK_BY_MODULE][module_name]
tokens = stats[TOTAL_TOK_BY_MODULE][module_name]
cost = stats[COST_BY_MODULE][module_name]
avg_time_cost = stats[AVG_TIME_COST_BY_MODULE][module_name]
std_time_cost = stats[STD_TIME_COST_BY_MODULE][module_name]
# 计算每次回复平均值
avg_count_per_reply = count / total_replies if total_replies > 0 else 0.0
avg_tokens_per_reply = tokens / total_replies if total_replies > 0 else 0.0
# 格式化大数字
formatted_count = _format_large_number(count)
formatted_in_tokens = _format_large_number(in_tokens)
formatted_out_tokens = _format_large_number(out_tokens)
formatted_tokens = _format_large_number(tokens)
formatted_avg_count = _format_large_number(avg_count_per_reply) if total_replies > 0 else "N/A"
formatted_avg_tokens = _format_large_number(avg_tokens_per_reply) if total_replies > 0 else "N/A"
output.append(
data_fmt.format(
name,
formatted_count,
formatted_in_tokens,
formatted_out_tokens,
formatted_tokens,
cost,
avg_time_cost,
std_time_cost,
formatted_avg_count,
formatted_avg_tokens,
)
)
@@ -849,6 +917,7 @@ class StatisticOutputTask(AsyncTask):
# format总在线时间
# 按模型分类统计
total_replies = stat_data.get(TOTAL_REPLY_CNT, 0)
model_rows = "\n".join(
[
f"<tr>"
@@ -860,11 +929,13 @@ class StatisticOutputTask(AsyncTask):
f"<td>{stat_data[COST_BY_MODEL][model_name]:.2f} ¥</td>"
f"<td>{stat_data[AVG_TIME_COST_BY_MODEL][model_name]:.1f} 秒</td>"
f"<td>{stat_data[STD_TIME_COST_BY_MODEL][model_name]:.1f} 秒</td>"
f"<td>{_format_large_number(count / total_replies, html=True) if total_replies > 0 else 'N/A'}</td>"
f"<td>{_format_large_number(stat_data[TOTAL_TOK_BY_MODEL][model_name] / total_replies, html=True) if total_replies > 0 else 'N/A'}</td>"
f"</tr>"
for model_name, count in sorted(stat_data[REQ_CNT_BY_MODEL].items())
]
if stat_data[REQ_CNT_BY_MODEL]
else ["<tr><td colspan='8' style='text-align: center; color: #999;'>暂无数据</td></tr>"]
else ["<tr><td colspan='10' style='text-align: center; color: #999;'>暂无数据</td></tr>"]
)
# 按请求类型分类统计
type_rows = "\n".join(
@@ -878,11 +949,13 @@ class StatisticOutputTask(AsyncTask):
f"<td>{stat_data[COST_BY_TYPE][req_type]:.2f} ¥</td>"
f"<td>{stat_data[AVG_TIME_COST_BY_TYPE][req_type]:.1f} 秒</td>"
f"<td>{stat_data[STD_TIME_COST_BY_TYPE][req_type]:.1f} 秒</td>"
f"<td>{_format_large_number(count / total_replies, html=True) if total_replies > 0 else 'N/A'}</td>"
f"<td>{_format_large_number(stat_data[TOTAL_TOK_BY_TYPE][req_type] / total_replies, html=True) if total_replies > 0 else 'N/A'}</td>"
f"</tr>"
for req_type, count in sorted(stat_data[REQ_CNT_BY_TYPE].items())
]
if stat_data[REQ_CNT_BY_TYPE]
else ["<tr><td colspan='8' style='text-align: center; color: #999;'>暂无数据</td></tr>"]
else ["<tr><td colspan='10' style='text-align: center; color: #999;'>暂无数据</td></tr>"]
)
# 按模块分类统计
module_rows = "\n".join(
@@ -896,11 +969,13 @@ class StatisticOutputTask(AsyncTask):
f"<td>{stat_data[COST_BY_MODULE][module_name]:.2f} ¥</td>"
f"<td>{stat_data[AVG_TIME_COST_BY_MODULE][module_name]:.1f} 秒</td>"
f"<td>{stat_data[STD_TIME_COST_BY_MODULE][module_name]:.1f} 秒</td>"
f"<td>{_format_large_number(count / total_replies, html=True) if total_replies > 0 else 'N/A'}</td>"
f"<td>{_format_large_number(stat_data[TOTAL_TOK_BY_MODULE][module_name] / total_replies, html=True) if total_replies > 0 else 'N/A'}</td>"
f"</tr>"
for module_name, count in sorted(stat_data[REQ_CNT_BY_MODULE].items())
]
if stat_data[REQ_CNT_BY_MODULE]
else ["<tr><td colspan='8' style='text-align: center; color: #999;'>暂无数据</td></tr>"]
else ["<tr><td colspan='10' style='text-align: center; color: #999;'>暂无数据</td></tr>"]
)
# 聊天消息统计
@@ -975,7 +1050,7 @@ class StatisticOutputTask(AsyncTask):
<h2>按模型分类统计</h2>
<div class=\"table-wrap\">
<table>
<thead><tr><th>模型名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr></thead>
<thead><tr><th>模型名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th><th>每次回复平均调用次数</th><th>每次回复平均Token数</th></tr></thead>
<tbody>
{model_rows}
</tbody>
@@ -986,7 +1061,7 @@ class StatisticOutputTask(AsyncTask):
<div class=\"table-wrap\">
<table>
<thead>
<tr><th>模块名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr>
<tr><th>模块名称</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th><th>每次回复平均调用次数</th><th>每次回复平均Token数</th></tr>
</thead>
<tbody>
{module_rows}
@@ -998,7 +1073,7 @@ class StatisticOutputTask(AsyncTask):
<div class=\"table-wrap\">
<table>
<thead>
<tr><th>请求类型</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th></tr>
<tr><th>请求类型</th><th>调用次数</th><th>输入Token</th><th>输出Token</th><th>Token总量</th><th>累计花费</th><th>平均耗时(秒)</th><th>标准差(秒)</th><th>每次回复平均调用次数</th><th>每次回复平均Token数</th></tr>
</thead>
<tbody>
{type_rows}

View File

@@ -164,6 +164,47 @@ class ImageManager:
tag_str = ",".join(emotion_list)
return f"[表情包:{tag_str}]"
async def _save_emoji_file_if_needed(self, image_base64: str, image_hash: str, image_format: str) -> None:
"""如果启用了steal_emoji且表情包未注册保存文件到data/emoji目录
Args:
image_base64: 图片的base64编码
image_hash: 图片的MD5哈希值
image_format: 图片格式
"""
if not global_config.emoji.steal_emoji:
return
try:
from src.chat.emoji_system.emoji_manager import EMOJI_DIR
from src.chat.emoji_system.emoji_manager import get_emoji_manager
# 确保目录存在
os.makedirs(EMOJI_DIR, exist_ok=True)
# 检查是否已存在该表情包(通过哈希值)
emoji_manager = get_emoji_manager()
existing_emoji = await emoji_manager.get_emoji_from_manager(image_hash)
if existing_emoji:
logger.debug(f"[自动保存] 表情包已注册,跳过保存: {image_hash[:8]}...")
return
# 生成文件名使用哈希值前8位 + 格式
filename = f"{image_hash[:8]}.{image_format}"
file_path = os.path.join(EMOJI_DIR, filename)
# 检查文件是否已存在(可能之前保存过但未注册)
if not os.path.exists(file_path):
# 保存文件
if base64_to_image(image_base64, file_path):
logger.info(f"[自动保存] 表情包已保存到 {file_path} (Hash: {image_hash[:8]}...)")
else:
logger.warning(f"[自动保存] 保存表情包文件失败: {file_path}")
else:
logger.debug(f"[自动保存] 表情包文件已存在,跳过: {file_path}")
except Exception as save_error:
logger.warning(f"[自动保存] 保存表情包文件时出错: {save_error}")
async def get_emoji_description(self, image_base64: str) -> str:
"""获取表情包描述优先使用EmojiDescriptionCache表中的缓存数据"""
try:
@@ -193,12 +234,18 @@ class ImageManager:
cache_record = EmojiDescriptionCache.get_or_none(EmojiDescriptionCache.emoji_hash == image_hash)
if cache_record:
# 优先使用情感标签,如果没有则使用详细描述
result_text = ""
if cache_record.emotion_tags:
logger.info(f"[缓存命中] 使用EmojiDescriptionCache表中的情感标签: {cache_record.emotion_tags[:50]}...")
return f"[表情包:{cache_record.emotion_tags}]"
result_text = f"[表情包:{cache_record.emotion_tags}]"
elif cache_record.description:
logger.info(f"[缓存命中] 使用EmojiDescriptionCache表中的描述: {cache_record.description[:50]}...")
return f"[表情包:{cache_record.description}]"
result_text = f"[表情包:{cache_record.description}]"
# 即使缓存命中如果启用了steal_emoji也检查是否需要保存文件
if result_text:
await self._save_emoji_file_if_needed(image_base64, image_hash, image_format)
return result_text
except Exception as e:
logger.debug(f"查询EmojiDescriptionCache时出错: {e}")
@@ -290,6 +337,9 @@ class ImageManager:
except Exception as e:
logger.error(f"保存表情包描述和情感标签缓存失败: {str(e)}")
# 如果启用了steal_emoji自动保存表情包文件到data/emoji目录
await self._save_emoji_file_if_needed(image_base64, image_hash, image_format)
return f"[表情包:{final_emotion}]"
except Exception as e: