feat：进一步优化缓存

2026-04-25 00:26:32 +08:00
parent 11f423b851
commit 705452793d
6 changed files with 69 additions and 88 deletions
--- a/src/maisaka/builtin_tool/context.py
+++ b/src/maisaka/builtin_tool/context.py
@@ -244,7 +244,10 @@ class BuiltinToolRuntimeContext:
        history_message = SessionBackedMessage.from_session_message(
            message,
            raw_message=build_prefixed_message_sequence(message.raw_message, planner_prefix),
-            visible_text=build_session_message_visible_text(message),
+            visible_text=build_session_message_visible_text(
+                message,
+                include_reply_components=source_kind != "guided_reply",
+            ),
            source_kind=source_kind,
        )
        self.runtime._chat_history.append(history_message)
--- a/src/maisaka/chat_loop_service.py
+++ b/src/maisaka/chat_loop_service.py
@@ -650,7 +650,6 @@ class MaisakaChatLoopService:

        selected_indices.reverse()
        selected_history = [filtered_history[index] for index in selected_indices]
-        selected_history, _ = MaisakaChatLoopService._hide_early_assistant_messages(selected_history)
        selected_history, _ = drop_orphan_tool_results(selected_history)
        selected_history, _ = normalize_tool_result_order(selected_history)
        tool_message_count = sum(1 for message in selected_history if isinstance(message, ToolResultMessage))
@@ -709,38 +708,3 @@ class MaisakaChatLoopService:
            return resolve_enable_visual_planner()
        return True

-    @staticmethod
-    def _hide_early_assistant_messages(
-        selected_history: List[LLMContextMessage],
-    ) -> tuple[List[LLMContextMessage], int]:
-        """隐藏上下文中最早 50% 的 assistant 文本消息，但保留工具调用链路。"""
-
-        assistant_indices = [
-            index
-            for index, message in enumerate(selected_history)
-            if isinstance(message, AssistantMessage)
-        ]
-        hidden_assistant_count = len(assistant_indices) // 2
-        if hidden_assistant_count <= 0:
-            return selected_history, 0
-
-        removed_assistant_indices = set(assistant_indices[:hidden_assistant_count])
-
-        filtered_history: List[LLMContextMessage] = []
-        for index, message in enumerate(selected_history):
-            if index in removed_assistant_indices:
-                if not message.tool_calls:
-                    continue
-                filtered_history.append(
-                    AssistantMessage(
-                        content="",
-                        timestamp=message.timestamp,
-                        tool_calls=list(message.tool_calls),
-                        source_kind=message.source_kind,
-                    )
-                )
-                continue
-            filtered_history.append(message)
-
-        return filtered_history, hidden_assistant_count
-
--- a/src/maisaka/history_post_processor.py
+++ b/src/maisaka/history_post_processor.py
@@ -1,11 +1,13 @@
 """Maisaka 历史消息轮次结束后处理。"""

 from dataclasses import dataclass
+from math import ceil

 from .context_messages import AssistantMessage, LLMContextMessage
 from .history_utils import drop_leading_orphan_tool_results, drop_orphan_tool_results, normalize_tool_result_order

-EARLY_TRIM_RATIO = 0.2
+EARLY_TRIM_RATIO = 0.3
+TRIM_THRESHOLD_RATIO = 1.2


@dataclass(slots=True)
@@ -26,27 +28,32 @@ def process_chat_history_after_cycle(
    """在每轮结束后统一执行历史裁切与清理。"""

    processed_history = list(chat_history)
-    removed_assistant_thought_count = _remove_early_assistant_thoughts(processed_history)
-
-    processed_history, orphan_removed_count = drop_orphan_tool_results(processed_history)
-    processed_history, moved_tool_result_count = normalize_tool_result_order(processed_history)
-    remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
-    removed_overflow_count = 0
-
-    while remaining_context_count > max_context_size and processed_history:
-        removed_message = processed_history.pop(0)
-        removed_overflow_count += 1
-        if removed_message.count_in_context:
-            remaining_context_count -= 1
-
-    processed_history, leading_orphan_removed_count = drop_leading_orphan_tool_results(processed_history)
-    removed_overflow_count += leading_orphan_removed_count
-    remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
-    removed_count = (
-        removed_assistant_thought_count
-        + orphan_removed_count
-        + removed_overflow_count
+    processed_history, normalized_removed_count, moved_tool_result_count = _normalize_history_structure(
+        processed_history
    )
+    remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
+
+    compact_removed_count = 0
+    trim_threshold = ceil(max_context_size * TRIM_THRESHOLD_RATIO)
+    if remaining_context_count > trim_threshold:
+        removed_early_message_count = _remove_early_history_messages(processed_history)
+        processed_history, removed_after_message_trim_count, moved_after_message_trim_count = (
+            _normalize_history_structure(processed_history)
+        )
+        removed_assistant_thought_count = _remove_early_assistant_thoughts(processed_history)
+        processed_history, removed_after_thought_trim_count, moved_after_thought_trim_count = (
+            _normalize_history_structure(processed_history)
+        )
+        compact_removed_count = (
+            removed_early_message_count
+            + removed_after_message_trim_count
+            + removed_assistant_thought_count
+            + removed_after_thought_trim_count
+        )
+        moved_tool_result_count += moved_after_message_trim_count + moved_after_thought_trim_count
+
+    remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
+    removed_count = normalized_removed_count + compact_removed_count
    changed_count = removed_count + moved_tool_result_count
    return HistoryPostProcessResult(
        history=processed_history,
@@ -56,8 +63,34 @@ def process_chat_history_after_cycle(
    )


+def _normalize_history_structure(
+    chat_history: list[LLMContextMessage],
+) -> tuple[list[LLMContextMessage], int, int]:
+    """规范化历史消息结构，保证工具调用链符合 LLM 消息协议。"""
+
+    processed_history, orphan_removed_count = drop_orphan_tool_results(chat_history)
+    processed_history, moved_tool_result_count = normalize_tool_result_order(processed_history)
+    processed_history, leading_orphan_removed_count = drop_leading_orphan_tool_results(processed_history)
+    return (
+        processed_history,
+        orphan_removed_count + leading_orphan_removed_count,
+        moved_tool_result_count,
+    )
+
+
+def _remove_early_history_messages(chat_history: list[LLMContextMessage]) -> int:
+    """移除最早 30% 的全部历史消息。"""
+
+    remove_count = int(len(chat_history) * EARLY_TRIM_RATIO)
+    if remove_count <= 0:
+        return 0
+
+    del chat_history[:remove_count]
+    return remove_count
+
+
 def _remove_early_assistant_thoughts(chat_history: list[LLMContextMessage]) -> int:
-    """移除最早 20% 的非工具 assistant 思考内容。"""
+    """移除最早 30% 的非工具 assistant 思考内容。"""

    candidate_indexes = [
        index
--- a/src/maisaka/history_utils.py
+++ b/src/maisaka/history_utils.py
@@ -2,7 +2,7 @@

 from typing import TYPE_CHECKING

-from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
+from src.common.data_models.message_component_data_model import MessageSequence, ReplyComponent, TextComponent

 from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
 from .message_adapter import build_visible_text_from_sequence, clone_message_sequence, format_speaker_content
@@ -28,6 +28,8 @@ def build_prefixed_message_sequence(
 def build_session_message_visible_text(
    message: "SessionMessage",
    source_sequence: MessageSequence | None = None,
+    *,
+    include_reply_components: bool = True,
 ) -> str:
    """将真实会话消息转换为 Maisaka 可见文本。"""

@@ -46,6 +48,8 @@ def build_session_message_visible_text(
        )
    )
    for component in clone_message_sequence(normalized_sequence).components:
+        if not include_reply_components and isinstance(component, ReplyComponent):
+            continue
        visible_sequence.components.append(component)
    return build_visible_text_from_sequence(visible_sequence).strip()

--- a/src/maisaka/runtime.py
+++ b/src/maisaka/runtime.py
@@ -233,7 +233,10 @@ class MaisakaHeartFlowChatting:
            history_message = SessionBackedMessage.from_session_message(
                message,
                raw_message=build_prefixed_message_sequence(message.raw_message, planner_prefix),
-                visible_text=build_session_message_visible_text(message),
+                visible_text=build_session_message_visible_text(
+                    message,
+                    include_reply_components=source_kind != "guided_reply",
+                ),
                source_kind=source_kind,
            )
            self._chat_history.append(history_message)