fix:裁切缓存,删除超时聊天流,修复内存溢出问题

This commit is contained in:
SengokuCola
2026-05-09 17:26:17 +08:00
parent c99363680d
commit 697147844a
4 changed files with 223 additions and 16 deletions

View File

@@ -0,0 +1,105 @@
from types import SimpleNamespace
import pytest
import time
from src.chat.heart_flow import heartflow_manager as heartflow_manager_module
from src.chat.heart_flow.heartflow_manager import HEARTFLOW_ACTIVE_RETENTION_SECONDS, HeartflowManager
from src.learners.expression_learner import ExpressionLearner
from src.maisaka.runtime import MAX_RETAINED_MESSAGE_CACHE_SIZE, MaisakaHeartFlowChatting
def _build_runtime_with_messages(message_count: int) -> MaisakaHeartFlowChatting:
runtime = object.__new__(MaisakaHeartFlowChatting)
runtime.log_prefix = "[test]"
runtime.message_cache = [SimpleNamespace(message_id=f"msg-{index}") for index in range(message_count)]
runtime._last_processed_index = message_count
runtime._expression_learner = ExpressionLearner("session-1")
runtime._expression_learner.mark_all_processed(runtime.message_cache)
return runtime
def test_prune_processed_message_cache_keeps_bounded_recent_window() -> None:
runtime = _build_runtime_with_messages(MAX_RETAINED_MESSAGE_CACHE_SIZE + 25)
runtime._prune_processed_message_cache()
assert len(runtime.message_cache) == MAX_RETAINED_MESSAGE_CACHE_SIZE
assert runtime.message_cache[0].message_id == "msg-25"
assert runtime._last_processed_index == MAX_RETAINED_MESSAGE_CACHE_SIZE
assert runtime._expression_learner.last_processed_index == MAX_RETAINED_MESSAGE_CACHE_SIZE
def test_prune_processed_message_cache_keeps_unlearned_messages() -> None:
runtime = _build_runtime_with_messages(MAX_RETAINED_MESSAGE_CACHE_SIZE + 25)
runtime._expression_learner.discard_processed_prefix(MAX_RETAINED_MESSAGE_CACHE_SIZE + 5)
runtime._prune_processed_message_cache()
assert len(runtime.message_cache) == MAX_RETAINED_MESSAGE_CACHE_SIZE + 5
assert runtime.message_cache[0].message_id == "msg-20"
assert runtime._expression_learner.last_processed_index == 0
def test_collect_pending_messages_uses_single_pending_received_time() -> None:
runtime = _build_runtime_with_messages(2)
runtime._last_processed_index = 0
runtime._oldest_pending_message_received_at = 123.0
runtime._last_message_received_at = 456.0
runtime._reply_latency_measurement_started_at = None
pending_messages = runtime._collect_pending_messages()
assert [message.message_id for message in pending_messages] == ["msg-0", "msg-1"]
assert runtime._reply_latency_measurement_started_at == 123.0
assert runtime._oldest_pending_message_received_at is None
@pytest.mark.asyncio
async def test_heartflow_manager_evicts_lru_chat_over_limit(monkeypatch: pytest.MonkeyPatch) -> None:
manager = HeartflowManager()
stopped_session_ids: list[str] = []
old_active_at = time.time() - HEARTFLOW_ACTIVE_RETENTION_SECONDS - 1
class FakeChat:
def __init__(self, session_id: str) -> None:
self.session_id = session_id
async def stop(self) -> None:
stopped_session_ids.append(self.session_id)
monkeypatch.setattr(heartflow_manager_module, "HEARTFLOW_MAX_ACTIVE_CHATS", 2)
manager.heartflow_chat_list["session-1"] = FakeChat("session-1")
manager.heartflow_chat_list["session-2"] = FakeChat("session-2")
manager.heartflow_chat_list["session-3"] = FakeChat("session-3")
manager._chat_last_active_at["session-1"] = old_active_at
manager._chat_last_active_at["session-2"] = old_active_at
manager._chat_last_active_at["session-3"] = time.time()
await manager._evict_over_limit_chats(protected_session_id="session-3")
assert stopped_session_ids == ["session-1"]
assert list(manager.heartflow_chat_list) == ["session-2", "session-3"]
@pytest.mark.asyncio
async def test_heartflow_manager_keeps_recent_chats_even_over_limit(monkeypatch: pytest.MonkeyPatch) -> None:
manager = HeartflowManager()
stopped_session_ids: list[str] = []
class FakeChat:
def __init__(self, session_id: str) -> None:
self.session_id = session_id
async def stop(self) -> None:
stopped_session_ids.append(self.session_id)
monkeypatch.setattr(heartflow_manager_module, "HEARTFLOW_MAX_ACTIVE_CHATS", 2)
for session_id in ("session-1", "session-2", "session-3"):
manager.heartflow_chat_list[session_id] = FakeChat(session_id)
manager._chat_last_active_at[session_id] = time.time()
await manager._evict_over_limit_chats(protected_session_id="session-3")
assert stopped_session_ids == []
assert list(manager.heartflow_chat_list) == ["session-1", "session-2", "session-3"]

View File

@@ -1,31 +1,39 @@
import asyncio
import traceback
from collections import OrderedDict
from typing import Dict
import asyncio
import time
import traceback
from src.chat.message_receive.chat_manager import chat_manager
from src.common.logger import get_logger
from src.maisaka.runtime import MaisakaHeartFlowChatting
logger = get_logger("heartflow")
HEARTFLOW_ACTIVE_RETENTION_SECONDS = 24 * 60 * 60
HEARTFLOW_MAX_ACTIVE_CHATS = 100
class HeartflowManager:
"""管理 session 级别的 Maisaka 心流实例。"""
def __init__(self) -> None:
self.heartflow_chat_list: Dict[str, MaisakaHeartFlowChatting] = {}
self.heartflow_chat_list: OrderedDict[str, MaisakaHeartFlowChatting] = OrderedDict()
self._chat_create_locks: Dict[str, asyncio.Lock] = {}
self._chat_last_active_at: Dict[str, float] = {}
async def get_or_create_heartflow_chat(self, session_id: str) -> MaisakaHeartFlowChatting:
"""获取或创建指定会话对应的 Maisaka runtime。"""
try:
if chat := self.heartflow_chat_list.get(session_id):
self._touch_chat(session_id)
return chat
create_lock = self._chat_create_locks.setdefault(session_id, asyncio.Lock())
async with create_lock:
if chat := self.heartflow_chat_list.get(session_id):
self._touch_chat(session_id)
return chat
chat_session = chat_manager.get_session_by_session_id(session_id)
@@ -35,16 +43,59 @@ class HeartflowManager:
new_chat = MaisakaHeartFlowChatting(session_id=session_id)
await new_chat.start()
self.heartflow_chat_list[session_id] = new_chat
self._touch_chat(session_id)
await self._evict_over_limit_chats(protected_session_id=session_id)
return new_chat
except Exception as exc:
logger.error(f"创建心流聊天 {session_id} 失败: {exc}", exc_info=True)
traceback.print_exc()
raise
def _touch_chat(self, session_id: str) -> None:
"""记录会话最近活跃时间,并维护心流实例的 LRU 顺序。"""
self._chat_last_active_at[session_id] = time.time()
self.heartflow_chat_list.move_to_end(session_id)
async def _evict_over_limit_chats(self, *, protected_session_id: str) -> None:
"""当实例数量超过上限时,仅淘汰 24 小时内无消息的旧会话。"""
while len(self.heartflow_chat_list) > HEARTFLOW_MAX_ACTIVE_CHATS:
session_id = self._find_evictable_session_id(protected_session_id=protected_session_id)
if session_id is None:
return
await self._evict_chat(session_id, reason="cache_limit")
def _find_evictable_session_id(self, *, protected_session_id: str) -> str | None:
"""按 LRU 查找超过活跃保护窗口的可淘汰会话。"""
expire_before = time.time() - HEARTFLOW_ACTIVE_RETENTION_SECONDS
for session_id in self.heartflow_chat_list:
if session_id == protected_session_id:
continue
last_active_at = self._chat_last_active_at.get(session_id, 0.0)
if last_active_at <= expire_before:
return session_id
return None
async def _evict_chat(self, session_id: str, *, reason: str) -> None:
"""停止并移除指定会话的心流实例。"""
chat = self.heartflow_chat_list.pop(session_id, None)
self._chat_last_active_at.pop(session_id, None)
lock = self._chat_create_locks.get(session_id)
if lock is not None and not lock.locked():
self._chat_create_locks.pop(session_id, None)
if chat is None:
return
try:
await chat.stop()
logger.info(f"已淘汰心流聊天 {session_id}: reason={reason}")
except Exception as exc:
logger.warning(f"淘汰心流聊天 {session_id} 失败: {exc}", exc_info=True)
def adjust_talk_frequency(self, session_id: str, frequency: float) -> None:
"""调整指定聊天流的说话频率。"""
chat = self.heartflow_chat_list.get(session_id)
if chat:
self._touch_chat(session_id)
chat.adjust_talk_frequency(frequency)
logger.info(f"已调整聊天 {session_id} 的说话频率为 {frequency}")
else:

View File

@@ -160,6 +160,23 @@ class ExpressionLearner:
self._last_processed_index = 0
self.min_messages_for_extraction = 10
@property
def last_processed_index(self) -> int:
"""返回表达学习已经消费到的消息缓存下标。"""
return self._last_processed_index
def mark_all_processed(self, message_cache: List["SessionMessage"]) -> None:
"""在跳过表达学习时,将现有消息标记为已处理,避免阻塞缓存裁剪。"""
self._last_processed_index = len(message_cache)
def mark_processed_until(self, processed_end_index: int) -> None:
"""将指定缓存下标之前的消息标记为已处理。"""
self._last_processed_index = max(self._last_processed_index, processed_end_index)
def discard_processed_prefix(self, removed_count: int) -> None:
"""同步 runtime 对消息缓存前缀的裁剪。"""
self._last_processed_index = max(0, self._last_processed_index - removed_count)
@staticmethod
def _get_runtime_manager() -> Any:
"""获取插件运行时管理器。
@@ -274,7 +291,8 @@ class ExpressionLearner:
jargon_miner: Optional["JargonMiner"] = None,
) -> bool:
"""学习表达方式"""
pending_messages = message_cache[self._last_processed_index :]
processed_end_index = len(message_cache)
pending_messages = message_cache[self._last_processed_index : processed_end_index]
if not pending_messages:
logger.debug("没有待处理消息")
return False
@@ -303,6 +321,7 @@ class ExpressionLearner:
response = generation_result.response
except Exception as e:
logger.error(f"学习表达方式失败: {e}")
self._last_processed_index = processed_end_index
return False
expressions: List[Tuple[str, str, str]]
@@ -336,7 +355,7 @@ class ExpressionLearner:
)
if after_extract_result.aborted:
logger.info(f"{self.session_id} 表达方式选择 Hook 中止")
self._last_processed_index = len(message_cache)
self._last_processed_index = processed_end_index
return False
after_extract_kwargs = after_extract_result.kwargs
@@ -352,7 +371,7 @@ class ExpressionLearner:
if not expressions:
logger.info("没有可学习的表达方式")
self._last_processed_index = len(message_cache)
self._last_processed_index = processed_end_index
return False
logger.info(f"可学习的表达方式: {expressions}")
@@ -361,7 +380,7 @@ class ExpressionLearner:
learnt_expressions = self._filter_expressions(expressions, pending_messages)
if not learnt_expressions:
logger.info("没有可学习的表达方式通过过滤")
self._last_processed_index = len(message_cache)
self._last_processed_index = processed_end_index
return False
learnt_expressions_str = "\n".join(f"{situation}->{style}" for situation, style in learnt_expressions)
@@ -386,7 +405,7 @@ class ExpressionLearner:
continue
await self._upsert_expression_to_db(situation, style)
self._last_processed_index = len(message_cache)
self._last_processed_index = processed_end_index
return True
def _check_cached_jargons_in_messages(

View File

@@ -56,6 +56,7 @@ from .tool_provider import MaisakaBuiltinToolProvider
logger = get_logger("maisaka_runtime")
MAX_INTERNAL_ROUNDS = 10
MAX_RETAINED_MESSAGE_CACHE_SIZE = 200
class MaisakaHeartFlowChatting:
@@ -97,7 +98,7 @@ class MaisakaHeartFlowChatting:
self._deferred_message_turn_task: Optional[asyncio.Task[None]] = None
self._message_debounce_seconds = 1.0
self._message_debounce_required = False
self._message_received_at_by_id: dict[str, float] = {}
self._oldest_pending_message_received_at: Optional[float] = None
self._last_message_received_at = 0.0
self._talk_frequency_adjust = 1.0
self._reply_latency_measurement_started_at: Optional[float] = None
@@ -311,9 +312,11 @@ class MaisakaHeartFlowChatting:
self._ensure_background_tasks_running()
received_at = time.time()
self._last_message_received_at = received_at
if self._oldest_pending_message_received_at is None:
self._oldest_pending_message_received_at = received_at
self._update_message_trigger_state(message)
self.message_cache.append(message)
self._message_received_at_by_id[message.message_id] = received_at
self._prune_processed_message_cache()
if self._is_reply_effect_tracking_enabled():
asyncio.create_task(self._reply_effect_tracker.observe_user_message(message))
if self._agent_state == self._STATE_RUNNING:
@@ -502,6 +505,28 @@ class MaisakaHeartFlowChatting:
return None
def _prune_processed_message_cache(self) -> None:
"""裁剪 runtime 与表达学习器都已经消费过的旧消息。"""
excess_count = len(self.message_cache) - MAX_RETAINED_MESSAGE_CACHE_SIZE
if excess_count <= 0:
return
removable_count = min(
excess_count,
self._last_processed_index,
self._expression_learner.last_processed_index,
)
if removable_count <= 0:
return
del self.message_cache[:removable_count]
self._last_processed_index = max(0, self._last_processed_index - removable_count)
self._expression_learner.discard_processed_prefix(removable_count)
logger.debug(
f"{self.log_prefix} 已清理 Maisaka 旧消息缓存: "
f"清理数量={removable_count} 保留数量={len(self.message_cache)}"
)
def _should_trigger_message_turn_by_idle_compensation(
self,
*,
@@ -1016,12 +1041,10 @@ class MaisakaHeartFlowChatting:
# f"收集 {len(unique_messages)} 条新消息"
# )
if unique_messages and self._reply_latency_measurement_started_at is None:
self._reply_latency_measurement_started_at = min(
self._message_received_at_by_id.get(message.message_id, self._last_message_received_at)
for message in unique_messages
self._reply_latency_measurement_started_at = (
self._oldest_pending_message_received_at or self._last_message_received_at
)
for message in unique_messages:
self._message_received_at_by_id.pop(message.message_id, None)
self._oldest_pending_message_received_at = None
return unique_messages
async def _wait_for_message_quiet_period(self) -> None:
@@ -1090,10 +1113,19 @@ class MaisakaHeartFlowChatting:
async def _trigger_batch_learning(self, messages: list[SessionMessage]) -> None:
"""按同一批消息触发表达方式和黑话学习。"""
processed_end_index = len(self.message_cache)
if not self._enable_expression_learning:
self._expression_learner.mark_all_processed(self.message_cache)
self._prune_processed_message_cache()
return
try:
await self._trigger_expression_learning(messages)
except Exception as exc:
logger.error(f"{self.log_prefix} 表达学习任务异常退出: {exc}")
self._expression_learner.mark_processed_until(processed_end_index)
finally:
self._prune_processed_message_cache()
def _should_trigger_learning(
self,