fix:修复并发导致的重复表达学习问题

This commit is contained in:
SengokuCola
2025-12-02 12:47:54 +08:00
parent ec90951539
commit c562ebe97a
3 changed files with 364 additions and 323 deletions

View File

@@ -1,5 +1,6 @@
from datetime import datetime
import time
import asyncio
from typing import Dict
from src.chat.utils.chat_message_builder import (
@@ -46,6 +47,8 @@ class FrequencyControl:
self.frequency_model = LLMRequest(
model_set=model_config.model_task_config.utils_small, request_type="frequency.adjust"
)
# 频率调整锁,防止并发执行
self._adjust_lock = asyncio.Lock()
def get_talk_frequency_adjust(self) -> float:
"""获取发言频率调整值"""
@@ -56,19 +59,29 @@ class FrequencyControl:
self.talk_frequency_adjust = max(0.1, min(5.0, value))
async def trigger_frequency_adjust(self) -> None:
# 使用异步锁防止并发执行
async with self._adjust_lock:
# 在锁内检查,避免并发触发
current_time = time.time()
previous_adjust_time = self.last_frequency_adjust_time
msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id,
timestamp_start=self.last_frequency_adjust_time,
timestamp_end=time.time(),
timestamp_start=previous_adjust_time,
timestamp_end=current_time,
)
if time.time() - self.last_frequency_adjust_time < 160 or len(msg_list) <= 20:
if current_time - previous_adjust_time < 160 or len(msg_list) <= 20:
return
else:
# 立即更新调整时间,防止并发触发
self.last_frequency_adjust_time = current_time
try:
new_msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id,
timestamp_start=self.last_frequency_adjust_time,
timestamp_end=time.time(),
timestamp_start=previous_adjust_time,
timestamp_end=current_time,
limit=20,
limit_mode="latest",
)
@@ -115,9 +128,9 @@ class FrequencyControl:
elif "过少" in response:
logger.info(f"频率调整: 过少,调整值到{final_value_by_api}")
self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 1.2))
self.last_frequency_adjust_time = time.time()
else:
logger.info("频率调整response不符合要求取消本次调整")
except Exception as e:
logger.error(f"频率调整失败: {e}")
# 即使失败也保持时间戳更新,避免频繁重试
class FrequencyControlManager:

View File

@@ -2,6 +2,7 @@ import time
import json
import os
import re
import asyncio
from typing import List, Optional, Tuple
import traceback
from src.common.logger import get_logger
@@ -91,6 +92,9 @@ class ExpressionLearner:
# 维护每个chat的上次学习时间
self.last_learning_time: float = time.time()
# 学习锁,防止并发执行学习任务
self._learning_lock = asyncio.Lock()
# 学习参数
_, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat(
self.chat_id
@@ -139,16 +143,24 @@ class ExpressionLearner:
Returns:
bool: 是否成功触发学习
"""
# 使用异步锁防止并发执行
async with self._learning_lock:
# 在锁内检查,避免并发触发
# 如果锁被持有,其他协程会等待,但等待期间条件可能已变化,所以需要再次检查
if not self.should_trigger_learning():
return
# 保存学习开始前的时间戳,用于获取消息范围
learning_start_timestamp = time.time()
previous_learning_time = self.last_learning_time
# 立即更新学习时间,防止并发触发
self.last_learning_time = learning_start_timestamp
try:
logger.info(f"在聊天流 {self.chat_name} 学习表达方式")
# 学习语言风格
learnt_style = await self.learn_and_store(num=25)
# 更新学习时间
self.last_learning_time = time.time()
# 学习语言风格,传递学习开始前的时间戳
learnt_style = await self.learn_and_store(num=25, timestamp_start=previous_learning_time)
if learnt_style:
logger.info(f"聊天流 {self.chat_name} 表达学习完成")
@@ -158,13 +170,18 @@ class ExpressionLearner:
except Exception as e:
logger.error(f"为聊天流 {self.chat_name} 触发学习失败: {e}")
traceback.print_exc()
# 即使失败也保持时间戳更新,避免频繁重试
return
async def learn_and_store(self, num: int = 10) -> List[Tuple[str, str, str]]:
async def learn_and_store(self, num: int = 10, timestamp_start: Optional[float] = None) -> List[Tuple[str, str, str]]:
"""
学习并存储表达方式
Args:
num: 学习数量
timestamp_start: 学习开始的时间戳如果为None则使用self.last_learning_time
"""
learnt_expressions = await self.learn_expression(num)
learnt_expressions = await self.learn_expression(num, timestamp_start=timestamp_start)
if learnt_expressions is None:
logger.info("没有学习到表达风格")
@@ -374,18 +391,22 @@ class ExpressionLearner:
return matched_expressions
async def learn_expression(self, num: int = 10) -> Optional[List[Tuple[str, str, str, str]]]:
async def learn_expression(self, num: int = 10, timestamp_start: Optional[float] = None) -> Optional[List[Tuple[str, str, str, str]]]:
"""从指定聊天流学习表达方式
Args:
num: 学习数量
timestamp_start: 学习开始的时间戳如果为None则使用self.last_learning_time
"""
current_time = time.time()
# 使用传入的时间戳如果没有则使用self.last_learning_time
start_timestamp = timestamp_start if timestamp_start is not None else self.last_learning_time
# 获取上次学习之后的消息
random_msg = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id,
timestamp_start=self.last_learning_time,
timestamp_start=start_timestamp,
timestamp_end=current_time,
limit=num,
)

View File

@@ -183,6 +183,9 @@ class JargonMiner:
self.cache_limit = 100
self.cache: OrderedDict[str, None] = OrderedDict()
# 黑话提取锁,防止并发执行
self._extraction_lock = asyncio.Lock()
def _add_to_cache(self, content: str) -> None:
"""将提取到的黑话加入缓存保持LRU语义"""
if not content:
@@ -436,7 +439,10 @@ class JargonMiner:
return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning)
async def run_once(self) -> None:
# 使用异步锁防止并发执行
async with self._extraction_lock:
try:
# 在锁内检查,避免并发触发
if not self.should_trigger():
return
@@ -448,6 +454,9 @@ class JargonMiner:
extraction_start_time = self.last_learning_time
extraction_end_time = time.time()
# 立即更新学习时间,防止并发触发
self.last_learning_time = extraction_end_time
# 拉取学习窗口内的消息
messages = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id,
@@ -684,13 +693,11 @@ class JargonMiner:
# 输出格式化的结果使用logger.info会自动应用jargon模块的颜色
logger.info(f"[{self.stream_name}]疑似黑话: {jargon_str}")
# 更新为本次提取的结束时间,确保不会重复提取相同的消息窗口
self.last_learning_time = extraction_end_time
if saved or updated:
logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated}chat_id={self.chat_id}")
except Exception as e:
logger.error(f"JargonMiner 运行失败: {e}")
# 即使失败也保持时间戳更新,避免频繁重试
class JargonMinerManager: