diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py index 81d66129..63ec38e8 100644 --- a/src/maisaka/chat_loop_service.py +++ b/src/maisaka/chat_loop_service.py @@ -41,6 +41,11 @@ from .display.prompt_cli_renderer import PromptCLIVisualizer from .visual_mode_utils import resolve_enable_visual_planner TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"} +REQUEST_TYPE_BY_REQUEST_KIND = { + "planner": "maisaka_planner", + "timing_gate": "maisaka_timing_gate", +} +CONTEXT_SELECTION_CACHE_STABILITY_RATIO = 2.0 @dataclass(slots=True) @@ -212,7 +217,7 @@ class MaisakaChatLoopService: self._chat_system_prompt = f"{self._personality_prompt}\n\nYou are a helpful AI assistant." else: self._chat_system_prompt = chat_system_prompt - self._llm_chat = LLMServiceClient(task_name="planner", request_type="maisaka_planner") + self._llm_chat_clients: dict[str, LLMServiceClient] = {} @property def personality_prompt(self) -> str: @@ -220,6 +225,30 @@ class MaisakaChatLoopService: return self._personality_prompt + @staticmethod + def _resolve_llm_request_type(request_kind: str) -> str: + """根据 Maisaka 请求类型解析 LLM 统计口径。""" + + normalized_request_kind = str(request_kind or "").strip() + return REQUEST_TYPE_BY_REQUEST_KIND.get( + normalized_request_kind, + f"maisaka_{normalized_request_kind}" if normalized_request_kind else "maisaka_planner", + ) + + def _get_llm_chat_client(self, request_kind: str) -> LLMServiceClient: + """获取当前请求类型对应的 planner LLM 客户端。""" + + request_type = self._resolve_llm_request_type(request_kind) + llm_client = self._llm_chat_clients.get(request_type) + if llm_client is None: + llm_client = LLMServiceClient( + task_name="planner", + request_type=request_type, + session_id=self._session_id, + ) + self._llm_chat_clients[request_type] = llm_client + return llm_client + @staticmethod def _get_runtime_manager() -> Any: """获取插件运行时管理器。 @@ -321,7 +350,13 @@ class MaisakaChatLoopService: @staticmethod def _build_time_block() -> str: - """构建当前时间提示块。""" + """构建静态时间提示块。""" + + return "当前时间会在每次请求末尾以用户消息形式提供。" + + @staticmethod + def _build_current_time_user_message() -> str: + """构建追加到请求末尾的当前时间消息。""" return f"当前时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" @@ -446,7 +481,11 @@ class MaisakaChatLoopService: messages.append(llm_message) normalized_injected_messages: List[Message] = [] - for injected_message in injected_user_messages or []: + final_user_messages = [ + *(injected_user_messages or []), + self._build_current_time_user_message(), + ] + for injected_message in final_user_messages: normalized_message = str(injected_message or "").strip() if not normalized_message: continue @@ -458,31 +497,10 @@ class MaisakaChatLoopService: ) if normalized_injected_messages: - insertion_index = self._resolve_injected_user_messages_insertion_index(messages) - messages[insertion_index:insertion_index] = normalized_injected_messages + messages.extend(normalized_injected_messages) return messages - @staticmethod - def _resolve_injected_user_messages_insertion_index(messages: Sequence[Message]) -> int: - """计算 injected meta user messages 在请求中的插入位置。 - - 规则与 deferred attachment 更接近: - - 从尾部向前寻找最近的 stopping point; - - stopping point 为 assistant 消息或 tool 结果消息; - - 找到后插入到其后面; - - 若不存在 stopping point,则退回到 system 消息之后。 - """ - - for index in range(len(messages) - 1, -1, -1): - message = messages[index] - if message.role in {RoleType.Assistant, RoleType.Tool}: - return index + 1 - - if messages and messages[0].role == RoleType.System: - return 1 - return 0 - async def chat_loop_step( self, chat_history: List[LLMContextMessage], @@ -575,7 +593,8 @@ class MaisakaChatLoopService: tool_definitions=list(all_tools), ) - generation_result = await self._llm_chat.generate_response_with_messages( + llm_chat = self._get_llm_chat_client(request_kind) + generation_result = await llm_chat.generate_response_with_messages( message_factory=message_factory, options=LLMGenerationOptions( tool_options=all_tools if all_tools else None, @@ -654,7 +673,11 @@ class MaisakaChatLoopService: chat_history, request_kind=request_kind, ) - effective_context_size = max(1, int(max_context_size or global_config.chat.max_context_size)) + base_context_size = max(1, int(max_context_size or global_config.chat.max_context_size)) + effective_context_size = max( + base_context_size, + int(base_context_size * CONTEXT_SELECTION_CACHE_STABILITY_RATIO), + ) selected_indices: List[int] = [] counted_message_count = 0 @@ -690,9 +713,11 @@ class MaisakaChatLoopService: selected_history, _ = normalize_tool_result_order(selected_history) tool_message_count = sum(1 for message in selected_history if isinstance(message, ToolResultMessage)) normal_message_count = len(selected_history) - tool_message_count + stability_text = f"|cache_window {base_context_size}->{effective_context_size}" selection_reason = ( f"实际发送 {len(selected_history)} 条消息" f"|消息 {normal_message_count} 条|tool {tool_message_count} 条" + f"{stability_text}" ) return ( selected_history, diff --git a/src/maisaka/history_post_processor.py b/src/maisaka/history_post_processor.py index 5b3a125d..aa038f08 100644 --- a/src/maisaka/history_post_processor.py +++ b/src/maisaka/history_post_processor.py @@ -3,11 +3,11 @@ from dataclasses import dataclass from math import ceil -from .context_messages import AssistantMessage, LLMContextMessage +from .context_messages import LLMContextMessage from .history_utils import drop_leading_orphan_tool_results, drop_orphan_tool_results, normalize_tool_result_order -EARLY_TRIM_RATIO = 0.3 -TRIM_THRESHOLD_RATIO = 1.2 +TRIM_TARGET_RATIO = 1.0 +TRIM_THRESHOLD_RATIO = 2.0 @dataclass(slots=True) @@ -36,21 +36,16 @@ def process_chat_history_after_cycle( compact_removed_count = 0 trim_threshold = ceil(max_context_size * TRIM_THRESHOLD_RATIO) if remaining_context_count > trim_threshold: - removed_early_message_count = _remove_early_history_messages(processed_history) - processed_history, removed_after_message_trim_count, moved_after_message_trim_count = ( - _normalize_history_structure(processed_history) + target_context_count = max(1, int(max_context_size * TRIM_TARGET_RATIO)) + removed_early_message_count = _trim_history_to_context_target( + processed_history, + target_context_count=target_context_count, ) - removed_assistant_thought_count = _remove_early_assistant_thoughts(processed_history) - processed_history, removed_after_thought_trim_count, moved_after_thought_trim_count = ( - _normalize_history_structure(processed_history) + processed_history, removed_after_trim_count, moved_after_trim_count = _normalize_history_structure( + processed_history ) - compact_removed_count = ( - removed_early_message_count - + removed_after_message_trim_count - + removed_assistant_thought_count - + removed_after_thought_trim_count - ) - moved_tool_result_count += moved_after_message_trim_count + moved_after_thought_trim_count + compact_removed_count = removed_early_message_count + removed_after_trim_count + moved_tool_result_count += moved_after_trim_count remaining_context_count = sum(1 for message in processed_history if message.count_in_context) removed_count = normalized_removed_count + compact_removed_count @@ -78,42 +73,27 @@ def _normalize_history_structure( ) -def _remove_early_history_messages(chat_history: list[LLMContextMessage]) -> int: - """移除最早 30% 的全部历史消息。""" +def _trim_history_to_context_target( + chat_history: list[LLMContextMessage], + *, + target_context_count: int, +) -> int: + """移除最早的一段历史,直到普通上下文消息数量降到目标值以内。""" + + remaining_context_count = sum(1 for message in chat_history if message.count_in_context) + if remaining_context_count <= target_context_count: + return 0 + + remove_count = 0 + for message in chat_history: + remove_count += 1 + if message.count_in_context: + remaining_context_count -= 1 + if remaining_context_count <= target_context_count: + break - remove_count = int(len(chat_history) * EARLY_TRIM_RATIO) if remove_count <= 0: return 0 del chat_history[:remove_count] return remove_count - - -def _remove_early_assistant_thoughts(chat_history: list[LLMContextMessage]) -> int: - """移除最早 30% 的非工具 assistant 思考内容。""" - - candidate_indexes = [ - index - for index, message in enumerate(chat_history) - if isinstance(message, AssistantMessage) - and not message.tool_calls - and message.source_kind != "perception" - and bool(message.content.strip()) - ] - remove_count = int(len(candidate_indexes) * EARLY_TRIM_RATIO) - if remove_count <= 0: - return 0 - - removed_indexes = set(candidate_indexes[:remove_count]) - filtered_history: list[LLMContextMessage] = [] - removed_total = 0 - for index, message in enumerate(chat_history): - if index in removed_indexes: - removed_total += 1 - continue - filtered_history.append(message) - - chat_history[:] = filtered_history - return removed_total - - diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py index ab7484a2..037f6618 100644 --- a/src/maisaka/reasoning_engine.py +++ b/src/maisaka/reasoning_engine.py @@ -52,7 +52,7 @@ if TYPE_CHECKING: logger = get_logger("maisaka_reasoning_engine") -TIMING_GATE_CONTEXT_LIMIT = 24 +TIMING_GATE_CONTEXT_DROP_HEAD_RATIO = 0.7 TIMING_GATE_MAX_TOKENS = 384 TIMING_GATE_MAX_ATTEMPTS = 3 TIMING_GATE_TOOL_NAMES = {"continue", "no_reply", "wait"} @@ -124,7 +124,6 @@ class MaisakaReasoningEngine: async def _run_timing_gate_sub_agent( self, *, - context_message_limit: int, system_prompt: str, tool_definitions: list[dict[str, Any]], ) -> Any: @@ -134,7 +133,10 @@ class MaisakaReasoningEngine: """ return await self._runtime.run_sub_agent( - context_message_limit=context_message_limit, + context_message_limit=self._runtime._max_context_size, + drop_head_context_count=int( + self._runtime._max_context_size * TIMING_GATE_CONTEXT_DROP_HEAD_RATIO, + ), system_prompt=system_prompt, request_kind="timing_gate", interrupt_flag=None, @@ -255,7 +257,6 @@ class MaisakaReasoningEngine: invalid_tool_text = "" for attempt_index in range(TIMING_GATE_MAX_ATTEMPTS): response = await self._run_timing_gate_sub_agent( - context_message_limit=TIMING_GATE_CONTEXT_LIMIT, system_prompt=self._build_timing_gate_system_prompt(), tool_definitions=get_timing_tools(), ) diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py index a3db92bf..a96eaf11 100644 --- a/src/maisaka/runtime.py +++ b/src/maisaka/runtime.py @@ -45,6 +45,7 @@ from .context_messages import ( from .display.display_utils import build_tool_call_summary_lines, format_token_count from .display.prompt_cli_renderer import PromptCLIVisualizer from .display.stage_status_board import remove_stage_status, update_stage_status +from .history_utils import drop_leading_orphan_tool_results from .reasoning_engine import MaisakaReasoningEngine from .reply_effect import ReplyEffectTracker from .reply_effect.image_utils import extract_visual_attachments_from_sequence @@ -583,6 +584,7 @@ class MaisakaHeartFlowChatting: self, *, context_message_limit: int, + drop_head_context_count: int = 0, system_prompt: str, request_kind: str = "sub_agent", extra_messages: Optional[Sequence[LLMContextMessage]] = None, @@ -598,7 +600,10 @@ class MaisakaHeartFlowChatting: request_kind=request_kind, max_context_size=context_message_limit, ) - sub_agent_history = list(selected_history) + sub_agent_history = self._drop_head_context_messages( + selected_history, + drop_head_context_count, + ) if extra_messages: sub_agent_history.extend(list(extra_messages)) @@ -616,6 +621,31 @@ class MaisakaHeartFlowChatting: tool_definitions=[] if tool_definitions is None else tool_definitions, ) + @staticmethod + def _drop_head_context_messages( + chat_history: Sequence[LLMContextMessage], + drop_context_count: int, + ) -> list[LLMContextMessage]: + """从已选上下文头部丢弃指定数量的普通上下文消息。""" + + if drop_context_count <= 0: + return list(chat_history) + + first_kept_index = 0 + dropped_context_count = 0 + while ( + first_kept_index < len(chat_history) + and dropped_context_count < drop_context_count + ): + message = chat_history[first_kept_index] + if message.count_in_context: + dropped_context_count += 1 + first_kept_index += 1 + + trimmed_history = list(chat_history[first_kept_index:]) + trimmed_history, _ = drop_leading_orphan_tool_results(trimmed_history) + return trimmed_history + async def _run_reply_effect_judge(self, prompt: str) -> str: """运行回复效果观察器使用的临时 LLM 评审。"""