diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py
index 81d66129..63ec38e8 100644
--- a/src/maisaka/chat_loop_service.py
+++ b/src/maisaka/chat_loop_service.py
@@ -41,6 +41,11 @@ from .display.prompt_cli_renderer import PromptCLIVisualizer
 from .visual_mode_utils import resolve_enable_visual_planner
 
 TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"}
+REQUEST_TYPE_BY_REQUEST_KIND = {
+    "planner": "maisaka_planner",
+    "timing_gate": "maisaka_timing_gate",
+}
+CONTEXT_SELECTION_CACHE_STABILITY_RATIO = 2.0
 
 
 @dataclass(slots=True)
@@ -212,7 +217,7 @@ class MaisakaChatLoopService:
             self._chat_system_prompt = f"{self._personality_prompt}\n\nYou are a helpful AI assistant."
         else:
             self._chat_system_prompt = chat_system_prompt
-        self._llm_chat = LLMServiceClient(task_name="planner", request_type="maisaka_planner")
+        self._llm_chat_clients: dict[str, LLMServiceClient] = {}
 
     @property
     def personality_prompt(self) -> str:
@@ -220,6 +225,30 @@ class MaisakaChatLoopService:
 
         return self._personality_prompt
 
+    @staticmethod
+    def _resolve_llm_request_type(request_kind: str) -> str:
+        """根据 Maisaka 请求类型解析 LLM 统计口径。"""
+
+        normalized_request_kind = str(request_kind or "").strip()
+        return REQUEST_TYPE_BY_REQUEST_KIND.get(
+            normalized_request_kind,
+            f"maisaka_{normalized_request_kind}" if normalized_request_kind else "maisaka_planner",
+        )
+
+    def _get_llm_chat_client(self, request_kind: str) -> LLMServiceClient:
+        """获取当前请求类型对应的 planner LLM 客户端。"""
+
+        request_type = self._resolve_llm_request_type(request_kind)
+        llm_client = self._llm_chat_clients.get(request_type)
+        if llm_client is None:
+            llm_client = LLMServiceClient(
+                task_name="planner",
+                request_type=request_type,
+                session_id=self._session_id,
+            )
+            self._llm_chat_clients[request_type] = llm_client
+        return llm_client
+
     @staticmethod
     def _get_runtime_manager() -> Any:
         """获取插件运行时管理器。
@@ -321,7 +350,13 @@ class MaisakaChatLoopService:
 
     @staticmethod
     def _build_time_block() -> str:
-        """构建当前时间提示块。"""
+        """构建静态时间提示块。"""
+
+        return "当前时间会在每次请求末尾以用户消息形式提供。"
+
+    @staticmethod
+    def _build_current_time_user_message() -> str:
+        """构建追加到请求末尾的当前时间消息。"""
 
         return f"当前时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
 
@@ -446,7 +481,11 @@ class MaisakaChatLoopService:
                 messages.append(llm_message)
 
         normalized_injected_messages: List[Message] = []
-        for injected_message in injected_user_messages or []:
+        final_user_messages = [
+            *(injected_user_messages or []),
+            self._build_current_time_user_message(),
+        ]
+        for injected_message in final_user_messages:
             normalized_message = str(injected_message or "").strip()
             if not normalized_message:
                 continue
@@ -458,31 +497,10 @@ class MaisakaChatLoopService:
             )
 
         if normalized_injected_messages:
-            insertion_index = self._resolve_injected_user_messages_insertion_index(messages)
-            messages[insertion_index:insertion_index] = normalized_injected_messages
+            messages.extend(normalized_injected_messages)
 
         return messages
 
-    @staticmethod
-    def _resolve_injected_user_messages_insertion_index(messages: Sequence[Message]) -> int:
-        """计算 injected meta user messages 在请求中的插入位置。
-
-        规则与 deferred attachment 更接近：
-        - 从尾部向前寻找最近的 stopping point；
-        - stopping point 为 assistant 消息或 tool 结果消息；
-        - 找到后插入到其后面；
-        - 若不存在 stopping point，则退回到 system 消息之后。
-        """
-
-        for index in range(len(messages) - 1, -1, -1):
-            message = messages[index]
-            if message.role in {RoleType.Assistant, RoleType.Tool}:
-                return index + 1
-
-        if messages and messages[0].role == RoleType.System:
-            return 1
-        return 0
-
     async def chat_loop_step(
         self,
         chat_history: List[LLMContextMessage],
@@ -575,7 +593,8 @@ class MaisakaChatLoopService:
                 tool_definitions=list(all_tools),
             )
 
-        generation_result = await self._llm_chat.generate_response_with_messages(
+        llm_chat = self._get_llm_chat_client(request_kind)
+        generation_result = await llm_chat.generate_response_with_messages(
             message_factory=message_factory,
             options=LLMGenerationOptions(
                 tool_options=all_tools if all_tools else None,
@@ -654,7 +673,11 @@ class MaisakaChatLoopService:
             chat_history,
             request_kind=request_kind,
         )
-        effective_context_size = max(1, int(max_context_size or global_config.chat.max_context_size))
+        base_context_size = max(1, int(max_context_size or global_config.chat.max_context_size))
+        effective_context_size = max(
+            base_context_size,
+            int(base_context_size * CONTEXT_SELECTION_CACHE_STABILITY_RATIO),
+        )
         selected_indices: List[int] = []
         counted_message_count = 0
 
@@ -690,9 +713,11 @@ class MaisakaChatLoopService:
         selected_history, _ = normalize_tool_result_order(selected_history)
         tool_message_count = sum(1 for message in selected_history if isinstance(message, ToolResultMessage))
         normal_message_count = len(selected_history) - tool_message_count
+        stability_text = f"|cache_window {base_context_size}->{effective_context_size}"
         selection_reason = (
             f"实际发送 {len(selected_history)} 条消息"
             f"|消息 {normal_message_count} 条|tool {tool_message_count} 条"
+            f"{stability_text}"
         )
         return (
             selected_history,
diff --git a/src/maisaka/history_post_processor.py b/src/maisaka/history_post_processor.py
index 5b3a125d..aa038f08 100644
--- a/src/maisaka/history_post_processor.py
+++ b/src/maisaka/history_post_processor.py
@@ -3,11 +3,11 @@
 from dataclasses import dataclass
 from math import ceil
 
-from .context_messages import AssistantMessage, LLMContextMessage
+from .context_messages import LLMContextMessage
 from .history_utils import drop_leading_orphan_tool_results, drop_orphan_tool_results, normalize_tool_result_order
 
-EARLY_TRIM_RATIO = 0.3
-TRIM_THRESHOLD_RATIO = 1.2
+TRIM_TARGET_RATIO = 1.0
+TRIM_THRESHOLD_RATIO = 2.0
 
 
 @dataclass(slots=True)
@@ -36,21 +36,16 @@ def process_chat_history_after_cycle(
     compact_removed_count = 0
     trim_threshold = ceil(max_context_size * TRIM_THRESHOLD_RATIO)
     if remaining_context_count > trim_threshold:
-        removed_early_message_count = _remove_early_history_messages(processed_history)
-        processed_history, removed_after_message_trim_count, moved_after_message_trim_count = (
-            _normalize_history_structure(processed_history)
+        target_context_count = max(1, int(max_context_size * TRIM_TARGET_RATIO))
+        removed_early_message_count = _trim_history_to_context_target(
+            processed_history,
+            target_context_count=target_context_count,
         )
-        removed_assistant_thought_count = _remove_early_assistant_thoughts(processed_history)
-        processed_history, removed_after_thought_trim_count, moved_after_thought_trim_count = (
-            _normalize_history_structure(processed_history)
+        processed_history, removed_after_trim_count, moved_after_trim_count = _normalize_history_structure(
+            processed_history
         )
-        compact_removed_count = (
-            removed_early_message_count
-            + removed_after_message_trim_count
-            + removed_assistant_thought_count
-            + removed_after_thought_trim_count
-        )
-        moved_tool_result_count += moved_after_message_trim_count + moved_after_thought_trim_count
+        compact_removed_count = removed_early_message_count + removed_after_trim_count
+        moved_tool_result_count += moved_after_trim_count
 
     remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
     removed_count = normalized_removed_count + compact_removed_count
@@ -78,42 +73,27 @@ def _normalize_history_structure(
     )
 
 
-def _remove_early_history_messages(chat_history: list[LLMContextMessage]) -> int:
-    """移除最早 30% 的全部历史消息。"""
+def _trim_history_to_context_target(
+    chat_history: list[LLMContextMessage],
+    *,
+    target_context_count: int,
+) -> int:
+    """移除最早的一段历史，直到普通上下文消息数量降到目标值以内。"""
+
+    remaining_context_count = sum(1 for message in chat_history if message.count_in_context)
+    if remaining_context_count <= target_context_count:
+        return 0
+
+    remove_count = 0
+    for message in chat_history:
+        remove_count += 1
+        if message.count_in_context:
+            remaining_context_count -= 1
+            if remaining_context_count <= target_context_count:
+                break
 
-    remove_count = int(len(chat_history) * EARLY_TRIM_RATIO)
     if remove_count <= 0:
         return 0
 
     del chat_history[:remove_count]
     return remove_count
-
-
-def _remove_early_assistant_thoughts(chat_history: list[LLMContextMessage]) -> int:
-    """移除最早 30% 的非工具 assistant 思考内容。"""
-
-    candidate_indexes = [
-        index
-        for index, message in enumerate(chat_history)
-        if isinstance(message, AssistantMessage)
-        and not message.tool_calls
-        and message.source_kind != "perception"
-        and bool(message.content.strip())
-    ]
-    remove_count = int(len(candidate_indexes) * EARLY_TRIM_RATIO)
-    if remove_count <= 0:
-        return 0
-
-    removed_indexes = set(candidate_indexes[:remove_count])
-    filtered_history: list[LLMContextMessage] = []
-    removed_total = 0
-    for index, message in enumerate(chat_history):
-        if index in removed_indexes:
-            removed_total += 1
-            continue
-        filtered_history.append(message)
-
-    chat_history[:] = filtered_history
-    return removed_total
-
-
diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py
index ab7484a2..037f6618 100644
--- a/src/maisaka/reasoning_engine.py
+++ b/src/maisaka/reasoning_engine.py
@@ -52,7 +52,7 @@ if TYPE_CHECKING:
 
 logger = get_logger("maisaka_reasoning_engine")
 
-TIMING_GATE_CONTEXT_LIMIT = 24
+TIMING_GATE_CONTEXT_DROP_HEAD_RATIO = 0.7
 TIMING_GATE_MAX_TOKENS = 384
 TIMING_GATE_MAX_ATTEMPTS = 3
 TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"}
@@ -124,7 +124,6 @@ class MaisakaReasoningEngine:
     async def _run_timing_gate_sub_agent(
         self,
         *,
-        context_message_limit: int,
         system_prompt: str,
         tool_definitions: list[dict[str, Any]],
     ) -> Any:
@@ -134,7 +133,10 @@ class MaisakaReasoningEngine:
         """
 
         return await self._runtime.run_sub_agent(
-            context_message_limit=context_message_limit,
+            context_message_limit=self._runtime._max_context_size,
+            drop_head_context_count=int(
+                self._runtime._max_context_size * TIMING_GATE_CONTEXT_DROP_HEAD_RATIO,
+            ),
             system_prompt=system_prompt,
             request_kind="timing_gate",
             interrupt_flag=None,
@@ -255,7 +257,6 @@ class MaisakaReasoningEngine:
         invalid_tool_text = ""
         for attempt_index in range(TIMING_GATE_MAX_ATTEMPTS):
             response = await self._run_timing_gate_sub_agent(
-                context_message_limit=TIMING_GATE_CONTEXT_LIMIT,
                 system_prompt=self._build_timing_gate_system_prompt(),
                 tool_definitions=get_timing_tools(),
             )
diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py
index a3db92bf..a96eaf11 100644
--- a/src/maisaka/runtime.py
+++ b/src/maisaka/runtime.py
@@ -45,6 +45,7 @@ from .context_messages import (
 from .display.display_utils import build_tool_call_summary_lines, format_token_count
 from .display.prompt_cli_renderer import PromptCLIVisualizer
 from .display.stage_status_board import remove_stage_status, update_stage_status
+from .history_utils import drop_leading_orphan_tool_results
 from .reasoning_engine import MaisakaReasoningEngine
 from .reply_effect import ReplyEffectTracker
 from .reply_effect.image_utils import extract_visual_attachments_from_sequence
@@ -583,6 +584,7 @@ class MaisakaHeartFlowChatting:
         self,
         *,
         context_message_limit: int,
+        drop_head_context_count: int = 0,
         system_prompt: str,
         request_kind: str = "sub_agent",
         extra_messages: Optional[Sequence[LLMContextMessage]] = None,
@@ -598,7 +600,10 @@ class MaisakaHeartFlowChatting:
             request_kind=request_kind,
             max_context_size=context_message_limit,
         )
-        sub_agent_history = list(selected_history)
+        sub_agent_history = self._drop_head_context_messages(
+            selected_history,
+            drop_head_context_count,
+        )
         if extra_messages:
             sub_agent_history.extend(list(extra_messages))
 
@@ -616,6 +621,31 @@ class MaisakaHeartFlowChatting:
             tool_definitions=[] if tool_definitions is None else tool_definitions,
         )
 
+    @staticmethod
+    def _drop_head_context_messages(
+        chat_history: Sequence[LLMContextMessage],
+        drop_context_count: int,
+    ) -> list[LLMContextMessage]:
+        """从已选上下文头部丢弃指定数量的普通上下文消息。"""
+
+        if drop_context_count <= 0:
+            return list(chat_history)
+
+        first_kept_index = 0
+        dropped_context_count = 0
+        while (
+            first_kept_index < len(chat_history)
+            and dropped_context_count < drop_context_count
+        ):
+            message = chat_history[first_kept_index]
+            if message.count_in_context:
+                dropped_context_count += 1
+            first_kept_index += 1
+
+        trimmed_history = list(chat_history[first_kept_index:])
+        trimmed_history, _ = drop_leading_orphan_tool_results(trimmed_history)
+        return trimmed_history
+
     async def _run_reply_effect_judge(self, prompt: str) -> str:
         """运行回复效果观察器使用的临时 LLM 评审。"""