ref:记忆检索不再基于问题,而是基于上下文;添加一些调试用配置

This commit is contained in:
SengokuCola
2026-01-12 19:05:07 +08:00
parent 87d4c7c38a
commit ccc9af57b8
8 changed files with 354 additions and 361 deletions

View File

@@ -15,7 +15,7 @@ from json_repair import repair_json
from src.common.logger import get_logger
from src.common.data_models.database_data_model import DatabaseMessages
from src.config.config import model_config
from src.config.config import model_config, global_config
from src.llm_models.utils_model import LLMRequest
from src.plugin_system.apis import message_api
from src.chat.utils.chat_message_builder import build_readable_messages
@@ -370,18 +370,24 @@ class ChatHistorySummarizer:
logger.debug(f"{self.log_prefix} 批次状态检查 | 消息数: {message_count} | 距上次检查: {time_str}")
# 检查话题检查触发条件
# 检查"话题检查"触发条件
should_check = False
# 条件1: 消息数量 >= 100触发一次检查
if message_count >= 80:
should_check = True
logger.info(f"{self.log_prefix} 触发检查条件: 消息数量达到 {message_count} 条(阈值: 100条")
# 从配置中获取阈值
message_threshold = global_config.memory.chat_history_topic_check_message_threshold
time_threshold_hours = global_config.memory.chat_history_topic_check_time_hours
min_messages = global_config.memory.chat_history_topic_check_min_messages
time_threshold_seconds = time_threshold_hours * 3600
# 条件2: 距离上一次检查 > 3600 * 8 秒8小时且消息数量 >= 20 条,触发一次检查
elif time_since_last_check > 3600 * 8 and message_count >= 20:
# 条件1: 消息数量达到阈值,触发一次检查
if message_count >= message_threshold:
should_check = True
logger.info(f"{self.log_prefix} 触发检查条件: 距上次检查 {time_str}(阈值: 8小时消息数量达到 {message_count} 条(阈值: 20条)")
logger.info(f"{self.log_prefix} 触发检查条件: 消息数量达到 {message_count} 条(阈值: {message_threshold}条)")
# 条件2: 距离上一次检查超过时间阈值且消息数量达到最小阈值,触发一次检查
elif time_since_last_check > time_threshold_seconds and message_count >= min_messages:
should_check = True
logger.info(f"{self.log_prefix} 触发检查条件: 距上次检查 {time_str}(阈值: {time_threshold_hours}小时)且消息数量达到 {message_count} 条(阈值: {min_messages}条)")
if should_check:
await self._run_topic_check_and_update_cache(messages)
@@ -528,14 +534,18 @@ class ChatHistorySummarizer:
item.no_update_checks += 1
# 6. 检查是否有话题需要打包存储
# 从配置中获取阈值
no_update_checks_threshold = global_config.memory.chat_history_finalize_no_update_checks
message_count_threshold = global_config.memory.chat_history_finalize_message_count
topics_to_finalize: List[str] = []
for topic, item in self.topic_cache.items():
if item.no_update_checks >= 3:
logger.info(f"{self.log_prefix} 话题[{topic}] 连续 3 次检查无新增内容,触发打包存储")
if item.no_update_checks >= no_update_checks_threshold:
logger.info(f"{self.log_prefix} 话题[{topic}] 连续 {no_update_checks_threshold} 次检查无新增内容,触发打包存储")
topics_to_finalize.append(topic)
continue
if len(item.messages) > 5:
logger.info(f"{self.log_prefix} 话题[{topic}] 消息条数超过 4,触发打包存储")
if len(item.messages) > message_count_threshold:
logger.info(f"{self.log_prefix} 话题[{topic}] 消息条数超过 {message_count_threshold},触发打包存储")
topics_to_finalize.append(topic)
for topic in topics_to_finalize:
@@ -976,6 +986,16 @@ class ChatHistorySummarizer:
else:
logger.warning(f"{self.log_prefix} 存储聊天历史记录到数据库失败")
# 如果配置开启同时导入到LPMM知识库
if global_config.lpmm_knowledge.enable and global_config.experimental.lpmm_memory:
await self._import_to_lpmm_knowledge(
theme=theme,
summary=summary,
key_point=key_point,
participants=participants,
original_text=original_text,
)
except Exception as e:
logger.error(f"{self.log_prefix} 存储到数据库时出错: {e}")
import traceback
@@ -983,6 +1003,82 @@ class ChatHistorySummarizer:
traceback.print_exc()
raise
async def _import_to_lpmm_knowledge(
self,
theme: str,
summary: str,
key_point: Optional[List[str]],
participants: List[str],
original_text: str,
):
"""
将聊天历史总结导入到LPMM知识库
Args:
theme: 话题主题
summary: 概括内容
key_point: 关键信息点列表
participants: 参与者列表
original_text: 原始文本(可能很长,需要截断)
"""
try:
from src.chat.knowledge.lpmm_ops import lpmm_ops
# 构造要导入的文本内容
# 格式:主题 + 概括 + 关键信息点 + 参与者信息
content_parts = []
# 1. 话题主题
if theme:
content_parts.append(f"话题:{theme}")
# 2. 概括内容
if summary:
content_parts.append(f"概括:{summary}")
# 3. 关键信息点
if key_point:
key_points_text = "".join(key_point)
content_parts.append(f"关键信息:{key_points_text}")
# 4. 参与者信息
if participants:
participants_text = "".join(participants)
content_parts.append(f"参与者:{participants_text}")
# 5. 原始文本摘要如果原始文本太长只取前500字
if original_text:
# 截断原始文本,避免过长
max_original_length = 500
if len(original_text) > max_original_length:
truncated_text = original_text[:max_original_length] + "..."
content_parts.append(f"原始内容摘要:{truncated_text}")
else:
content_parts.append(f"原始内容:{original_text}")
# 将所有部分合并为一个段落用双换行分隔符合lpmm_ops.add_content的格式要求
content_to_import = "\n\n".join(content_parts)
if not content_to_import.strip():
logger.warning(f"{self.log_prefix} 聊天历史总结内容为空,跳过导入知识库")
return
# 调用lpmm_ops导入
result = await lpmm_ops.add_content(content_to_import)
if result["status"] == "success":
logger.info(
f"{self.log_prefix} 成功将聊天历史总结导入到LPMM知识库 | 话题: {theme} | 新增段落数: {result.get('count', 0)}"
)
else:
logger.warning(
f"{self.log_prefix} 将聊天历史总结导入到LPMM知识库失败 | 话题: {theme} | 错误: {result.get('message', '未知错误')}"
)
except Exception as e:
# 导入失败不应该影响数据库存储,只记录错误
logger.error(f"{self.log_prefix} 导入聊天历史总结到LPMM知识库时出错: {e}", exc_info=True)
async def start(self):
"""启动后台定期检查循环"""
if self._running: