feat：显示实时占用上下文，移除旧记忆系统

2026-04-01 13:18:17 +08:00
parent 503a257d66
commit d713aa9576
11 changed files with 55 additions and 2663 deletions
--- a/src/maisaka/chat_loop_service.py
+++ b/src/maisaka/chat_loop_service.py
@@ -10,7 +10,6 @@ from typing import Any, Dict, List, Optional, Sequence
 import asyncio
 import json
 import random
-import re

 from PIL import Image as PILImage
 from pydantic import BaseModel, Field as PydanticField
@@ -28,7 +27,7 @@ from src.config.config import global_config
 from src.core.tooling import ToolRegistry, ToolSpec
 from src.know_u.knowledge import extract_category_ids_from_result
 from src.llm_models.model_client.base_client import BaseClient
-from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart
+from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
 from src.llm_models.payload_content.resp_format import RespFormat, RespFormatType
 from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput, ToolOption, normalize_tool_options
 from src.services.llm_service import LLMServiceClient
@@ -697,58 +696,29 @@ class MaisakaChatLoopService:
        )

    @staticmethod
-    def _estimate_text_tokens(text: str) -> int:
-        """估算单段文本的输入 token 数。"""
-        normalized_text = text.strip()
-        if not normalized_text:
-            return 0
-
-        cjk_char_count = sum(1 for char in normalized_text if "\u4e00" <= char <= "\u9fff")
-        latin_chunks = re.findall(r"[A-Za-z0-9_]+", normalized_text)
-        latin_token_count = sum(max(1, (len(chunk) + 3) // 4) for chunk in latin_chunks)
-        punctuation_count = len(re.findall(r"[^\w\s]", normalized_text))
-        whitespace_bonus = max(1, normalized_text.count("\n"))
-        return cjk_char_count + latin_token_count + punctuation_count + whitespace_bonus
+    def _format_token_count(token_count: int) -> str:
+        """格式化 token 数量展示文本。"""
+        if token_count >= 10_000:
+            return f"{token_count / 1000:.1f}k"
+        return str(token_count)

    @classmethod
-    def _estimate_request_tokens(cls, messages: Sequence[Message]) -> int:
-        """估算本轮请求消息的总输入 token 数。"""
-        total_tokens = 0
-        for message in messages:
-            total_tokens += 4
-            total_tokens += cls._estimate_text_tokens(str(message.role.value))
-            if message.tool_call_id:
-                total_tokens += cls._estimate_text_tokens(message.tool_call_id)
-            if message.tool_calls:
-                for tool_call in message.tool_calls:
-                    total_tokens += cls._estimate_text_tokens(getattr(tool_call, "func_name", "") or "")
-                    total_tokens += cls._estimate_text_tokens(
-                        json.dumps(getattr(tool_call, "args", {}) or {}, ensure_ascii=False)
-                    )
-            for part in message.parts:
-                if isinstance(part, TextMessagePart):
-                    total_tokens += cls._estimate_text_tokens(part.text)
-                    continue
-                if isinstance(part, ImageMessagePart):
-                    total_tokens += max(256, len(part.image_base64) // 12)
-        return total_tokens
-
-    @staticmethod
    def _build_prompt_stats_text(
+        cls,
        *,
        selected_history_count: int,
        built_message_count: int,
-        input_token_count: int,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
    ) -> str:
        """构造本轮 prompt 的统计信息文本。"""
-        if input_token_count >= 10_000:
-            input_token_text = f"{input_token_count / 1000:.1f}k"
-        else:
-            input_token_text = str(input_token_count)
        return (
            f"已选上下文消息数={selected_history_count} "
            f"大模型消息数={built_message_count} "
-            f"估算输入Token={input_token_text}"
+            f"实际输入Token={cls._format_token_count(prompt_tokens)} "
+            f"输出Token={cls._format_token_count(completion_tokens)} "
+            f"总Token={cls._format_token_count(total_tokens)}"
        )

    async def chat_loop_step(self, chat_history: List[LLMContextMessage]) -> ChatResponse:
@@ -764,13 +734,6 @@ class MaisakaChatLoopService:
        await self.ensure_chat_prompt_loaded()
        selected_history, selection_reason = self._select_llm_context_messages(chat_history)
        built_messages = self._build_request_messages(selected_history)
-        input_token_count = self._estimate_request_tokens(built_messages)
-        prompt_stats_text = self._build_prompt_stats_text(
-            selected_history_count=len(selected_history),
-            built_message_count=len(built_messages),
-            input_token_count=input_token_count,
-        )
-        display_subtitle = f"{selection_reason} | {prompt_stats_text}"

        def message_factory(_client: BaseClient) -> List[Message]:
            """返回当前轮次已经构建好的请求消息。
@@ -806,7 +769,7 @@ class MaisakaChatLoopService:
                Panel(
                    Group(*ordered_panels),
                    title="MaiSaka 大模型请求 - 对话单步",
-                    subtitle=display_subtitle,
+                    subtitle=selection_reason,
                    border_style="cyan",
                    padding=(0, 1),
                )
@@ -820,7 +783,6 @@ class MaisakaChatLoopService:
            f"工具数={len(all_tools)} "
            f"启用打断={self._interrupt_flag is not None}"
        )
-        logger.info(f"??Prompt??: {prompt_stats_text}")
        generation_result = await self._llm_chat.generate_response_with_messages(
            message_factory=message_factory,
            options=LLMGenerationOptions(
@@ -833,6 +795,15 @@ class MaisakaChatLoopService:
        request_elapsed = perf_counter() - request_started_at
        logger.info(f"规划器请求完成，耗时={request_elapsed:.3f} 秒")

+        prompt_stats_text = self._build_prompt_stats_text(
+            selected_history_count=len(selected_history),
+            built_message_count=len(built_messages),
+            prompt_tokens=generation_result.prompt_tokens,
+            completion_tokens=generation_result.completion_tokens,
+            total_tokens=generation_result.total_tokens,
+        )
+        logger.info(f"本轮Prompt统计: {prompt_stats_text}")
+
        tool_call_summaries = [
            {
                "调用编号": getattr(tool_call, "call_id", getattr(tool_call, "id", None)),