feat:为日志添加上限和配置防止膨胀

2026-04-25 14:45:35 +08:00
parent 8168fe0d8a
commit be2248b283
8 changed files with 232 additions and 12 deletions
--- a/src/chat/replyer/maisaka_generator_base.py
+++ b/src/chat/replyer/maisaka_generator_base.py
@@ -421,6 +421,9 @@ class BaseMaisakaReplyGenerator:
        model_name: str,
        messages: List[Message],
    ) -> None:
+        if not global_config.debug.record_reply_request:
+            return
+
        try:
            DEBUG_REPLY_CACHE_DIR.mkdir(parents=True, exist_ok=True)
            request_body = {
--- a/src/common/logger.py
+++ b/src/common/logger.py
@@ -46,8 +46,8 @@ def get_file_handler():
        # 使用基于时间戳的handler，简单的轮转份数限制
        _file_handler = TimestampedFileHandler(
            log_dir=LOG_DIR,
-            max_bytes=5 * 1024 * 1024,  # 5MB
-            backup_count=30,
+            max_bytes=max(1024, int(LOG_CONFIG.get("log_file_max_bytes", 5 * 1024 * 1024) or 5 * 1024 * 1024)),
+            backup_count=max(1, int(LOG_CONFIG.get("max_log_files", 30) or 30)),
            encoding="utf-8",
        )
        # 设置文件handler的日志级别
@@ -305,6 +305,9 @@ def load_log_config():  # sourcery skip: use-contextlib-suppress
        "log_level": "INFO",  # 全局日志级别（向下兼容）
        "console_log_level": "INFO",  # 控制台日志级别
        "file_log_level": "DEBUG",  # 文件日志级别
+        "log_file_max_bytes": 5 * 1024 * 1024,  # 单个日志文件最大大小
+        "max_log_files": 30,  # 最多保留的日志文件数量
+        "log_cleanup_days": 30,  # 日志保留天数
        "suppress_libraries": [
            "faiss",
            "httpx",
@@ -837,13 +840,15 @@ def initialize_logging(verbose: bool = True):
        logger.info("日志系统已初始化:")
        logger.info(f"  - 控制台级别: {console_level}")
        logger.info(f"  - 文件级别: {file_level}")
-        logger.info("  - 轮转份数: 30个文件|自动清理: 30天前的日志")
+        max_log_files = max(1, int(LOG_CONFIG.get("max_log_files", 30) or 30))
+        log_cleanup_days = max(1, int(LOG_CONFIG.get("log_cleanup_days", 30) or 30))
+        logger.info(f"  - 轮转份数: {max_log_files}个文件|自动清理: {log_cleanup_days}天前的日志")


 def cleanup_old_logs():
    """清理过期的日志文件"""
    try:
-        cleanup_days = 30  # 硬编码30天
+        cleanup_days = max(1, int(LOG_CONFIG.get("log_cleanup_days", 30) or 30))
        cutoff_date = datetime.now() - timedelta(days=cleanup_days)
        deleted_count = 0
        deleted_size = 0
@@ -894,7 +899,9 @@ def start_log_cleanup_task(verbose: bool = True):

    if verbose:
        logger = get_logger("logger")
-        logger.info("已启动日志清理任务，将自动清理30天前的日志文件（轮转份数限制: 30个文件）")
+        max_log_files = max(1, int(LOG_CONFIG.get("max_log_files", 30) or 30))
+        log_cleanup_days = max(1, int(LOG_CONFIG.get("log_cleanup_days", 30) or 30))
+        logger.info(f"已启动日志清理任务，将自动清理{log_cleanup_days}天前的日志文件（轮转份数限制: {max_log_files}个文件）")


 def shutdown_logging():
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -24,6 +24,7 @@ from .official_configs import (
    EmojiConfig,
    ExpressionConfig,
    KeywordReactionConfig,
+    LogConfig,
    MaimMessageConfig,
    MCPConfig,
    MemoryConfig,
@@ -55,7 +56,7 @@ BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
 MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
 LEGACY_ENV_PATH: Path = (PROJECT_ROOT / ".env").resolve().absolute()
 MMC_VERSION: str = "1.0.0"
-CONFIG_VERSION: str = "8.9.11"
+CONFIG_VERSION: str = "8.9.17"
 MODEL_CONFIG_VERSION: str = "1.14.2"

 logger = get_logger("config")
@@ -109,6 +110,9 @@ class Config(ConfigBase):
    telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig)
    """遥测配置类"""

+    log: LogConfig = Field(default_factory=LogConfig)
+    """日志配置类"""
+
    debug: DebugConfig = Field(default_factory=DebugConfig)
    """调试配置类"""

--- a/src/config/official_configs.py
+++ b/src/config/official_configs.py
@@ -1074,6 +1074,151 @@ class ResponseSplitterConfig(ConfigBase):
    """是否在句子数量超出回复允许的最大句子数时一次性返回全部内容"""


+class LogConfig(ConfigBase):
+    """日志配置类"""
+
+    __ui_label__ = "日志"
+    __ui_icon__ = "file-text"
+
+    date_style: str = Field(
+        default="m-d H:i:s",
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "clock",
+        },
+    )
+    """日期格式"""
+
+    log_level_style: Literal["lite", "compact", "full"] = Field(
+        default="lite",
+        json_schema_extra={
+            "x-widget": "select",
+            "x-icon": "list",
+        },
+    )
+    """日志等级显示样式"""
+
+    color_text: Literal["none", "title", "full"] = Field(
+        default="full",
+        json_schema_extra={
+            "x-widget": "select",
+            "x-icon": "palette",
+        },
+    )
+    """控制台日志颜色模式"""
+
+    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
+        default="INFO",
+        json_schema_extra={
+            "x-widget": "select",
+            "x-icon": "list-filter",
+        },
+    )
+    """全局日志级别"""
+
+    console_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
+        default="INFO",
+        json_schema_extra={
+            "x-widget": "select",
+            "x-icon": "terminal",
+        },
+    )
+    """控制台日志级别"""
+
+    file_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
+        default="DEBUG",
+        json_schema_extra={
+            "x-widget": "select",
+            "x-icon": "file-json",
+        },
+    )
+    """文件日志级别"""
+
+    log_file_max_bytes: int = Field(
+        default=5 * 1024 * 1024,
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "hard-drive",
+        },
+    )
+    """单个日志文件最大字节数"""
+
+    max_log_files: int = Field(
+        default=30,
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "files",
+        },
+    )
+    """最多保留的主日志文件数量"""
+
+    log_cleanup_days: int = Field(
+        default=30,
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "calendar-days",
+        },
+    )
+    """主日志文件保留天数"""
+
+    llm_request_snapshot_limit: int = Field(
+        default=128,
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "archive",
+        },
+    )
+    """失败请求快照最多保留数量"""
+
+    maisaka_prompt_preview_limit: int = Field(
+        default=256,
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "panel-top",
+        },
+    )
+    """每个会话最多保留的 Maisaka Prompt 预览组数"""
+
+    maisaka_reply_effect_limit: int = Field(
+        default=256,
+        json_schema_extra={
+            "x-widget": "input",
+            "x-icon": "clipboard-check",
+        },
+    )
+    """每个会话最多保留的 Maisaka 回复效果记录数"""
+
+    suppress_libraries: list[str] = Field(
+        default_factory=lambda: [
+            "faiss",
+            "httpx",
+            "urllib3",
+            "asyncio",
+            "websockets",
+            "httpcore",
+            "requests",
+            "sqlalchemy",
+            "openai",
+            "uvicorn",
+            "jieba",
+        ],
+        json_schema_extra={
+            "x-widget": "custom",
+            "x-icon": "volume-x",
+        },
+    )
+    """完全屏蔽日志的第三方库列表"""
+
+    library_log_levels: dict[str, str] = Field(
+        default_factory=lambda: {"aiohttp": "WARNING"},
+        json_schema_extra={
+            "x-widget": "custom",
+            "x-icon": "sliders-horizontal",
+        },
+    )
+    """特定第三方库的日志级别"""
+
+
 class TelemetryConfig(ConfigBase):
    """遥测配置类"""

@@ -1149,6 +1294,15 @@ class DebugConfig(ConfigBase):
    )
    """是否开启回复效果评分追踪，默认关闭，需要手动打开"""

+    record_reply_request: bool = Field(
+        default=False,
+        json_schema_extra={
+            "x-widget": "switch",
+            "x-icon": "file-json",
+        },
+    )
+    """是否记录 Replyer 请求体，默认关闭"""
+

 class ExtraPromptItem(ConfigBase):
    platform: str = Field(
--- a/src/llm_models/model_client/openai_client.py
+++ b/src/llm_models/model_client/openai_client.py
@@ -134,6 +134,11 @@ def _save_debug_provider_request_payload(model_name: str, request_payload: Dict[
    if model_name != "deepseek-v4p":
        return

+    from src.config.config import global_config
+
+    if not global_config.debug.record_reply_request:
+        return
+
    try:
        DEBUG_REPLY_CACHE_DIR.mkdir(parents=True, exist_ok=True)
        file_path = DEBUG_REPLY_CACHE_DIR / _build_debug_provider_request_filename(model_name)
--- a/src/llm_models/request_snapshot.py
+++ b/src/llm_models/request_snapshot.py
@@ -20,6 +20,7 @@ REPLAY_SCRIPT_RELATIVE_PATH = Path("scripts") / "replay_llm_request.py"
 REPLAY_SCRIPT_PATH = PROJECT_ROOT / REPLAY_SCRIPT_RELATIVE_PATH
 FILENAME_SAFE_PATTERN = re.compile(r"[^A-Za-z0-9._-]+")
 SNAPSHOT_VERSION = 1
+DEFAULT_LLM_REQUEST_SNAPSHOT_LIMIT = 128

 logger = get_logger("llm_request_snapshot")

@@ -385,6 +386,29 @@ def build_replay_command(snapshot_path: Path) -> str:
    return f'uv run python {REPLAY_SCRIPT_RELATIVE_PATH.as_posix()} "{snapshot_path.resolve()}"'


+def _get_llm_request_snapshot_limit() -> int:
+    try:
+        from src.config.config import global_config
+
+        return max(1, int(global_config.log.llm_request_snapshot_limit or DEFAULT_LLM_REQUEST_SNAPSHOT_LIMIT))
+    except Exception:
+        return DEFAULT_LLM_REQUEST_SNAPSHOT_LIMIT
+
+
+def _trim_llm_request_snapshots() -> None:
+    limit = _get_llm_request_snapshot_limit()
+    snapshot_files = [file_path for file_path in LLM_REQUEST_LOG_DIR.glob("*.json") if file_path.is_file()]
+    if len(snapshot_files) <= limit:
+        return
+
+    sorted_files = sorted(snapshot_files, key=lambda file_path: file_path.stat().st_mtime)
+    for old_file in sorted_files[: len(snapshot_files) - limit]:
+        try:
+            old_file.unlink()
+        except FileNotFoundError:
+            continue
+
+
 def save_failed_request_snapshot(
    *,
    api_provider: APIProvider,
@@ -438,6 +462,7 @@ def save_failed_request_snapshot(
            json.dumps(snapshot_payload, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )
+        _trim_llm_request_snapshots()
        return snapshot_path
    except Exception:
        logger.exception("淇濆瓨 LLM 澶辫触璇锋眰蹇収鏃跺彂鐢熷紓甯?")
--- a/src/maisaka/display/prompt_preview_logger.py
+++ b/src/maisaka/display/prompt_preview_logger.py
@@ -13,7 +13,7 @@ class PromptPreviewLogger:
    """负责保存 Maisaka Prompt 预览文件并控制目录容量。"""

    _BASE_DIR = Path("logs") / "maisaka_prompt"
-    _MAX_PREVIEW_GROUPS_PER_CHAT = 1024
+    _DEFAULT_MAX_PREVIEW_GROUPS_PER_CHAT = 256
    _TRIM_COUNT = 100

    @classmethod
@@ -54,20 +54,21 @@ class PromptPreviewLogger:
    def _trim_overflow(cls, chat_dir: Path) -> None:
        """超过阈值时按批次删除最老的若干组预览文件。"""

+        max_preview_groups = cls._get_max_preview_groups_per_chat()
        grouped_files: dict[str, list[Path]] = {}
        for file_path in chat_dir.iterdir():
            if not file_path.is_file():
                continue
            grouped_files.setdefault(file_path.stem, []).append(file_path)

-        if len(grouped_files) <= cls._MAX_PREVIEW_GROUPS_PER_CHAT:
+        if len(grouped_files) <= max_preview_groups:
            return

        sorted_groups = sorted(
            grouped_files.items(),
            key=lambda item: min(path.stat().st_mtime for path in item[1]),
        )
-        overflow_count = len(grouped_files) - cls._MAX_PREVIEW_GROUPS_PER_CHAT
+        overflow_count = len(grouped_files) - max_preview_groups
        trim_count = min(len(sorted_groups), max(cls._TRIM_COUNT, overflow_count))
        for _, file_group in sorted_groups[:trim_count]:
            for old_file in file_group:
@@ -75,3 +76,13 @@ class PromptPreviewLogger:
                    old_file.unlink()
                except FileNotFoundError:
                    continue
+
+    @classmethod
+    def _get_max_preview_groups_per_chat(cls) -> int:
+        try:
+            from src.config.config import global_config
+
+            configured_limit = global_config.log.maisaka_prompt_preview_limit
+            return max(1, int(configured_limit or cls._DEFAULT_MAX_PREVIEW_GROUPS_PER_CHAT))
+        except Exception:
+            return cls._DEFAULT_MAX_PREVIEW_GROUPS_PER_CHAT
--- a/src/maisaka/reply_effect/storage.py
+++ b/src/maisaka/reply_effect/storage.py
@@ -13,7 +13,7 @@ from .path_utils import BASE_DIR, build_reply_effect_chat_dir, normalize_preview
 class ReplyEffectStorage:
    """负责回复效果记录的独立 JSON 文件存储。"""

-    _MAX_RECORDS_PER_CHAT = 1024
+    _DEFAULT_MAX_RECORDS_PER_CHAT = 256
    _TRIM_COUNT = 100

    def __init__(self, base_dir: Path | None = None) -> None:
@@ -61,15 +61,26 @@ class ReplyEffectStorage:
    def _trim_overflow(self, chat_dir: Path) -> None:
        """超过容量时删除最旧的回复效果记录。"""

+        max_records = self._get_max_records_per_chat()
        files = [file_path for file_path in chat_dir.glob("*.json") if file_path.is_file()]
-        if len(files) <= self._MAX_RECORDS_PER_CHAT:
+        if len(files) <= max_records:
            return

        sorted_files = sorted(files, key=lambda file_path: file_path.stat().st_mtime)
-        overflow_count = len(files) - self._MAX_RECORDS_PER_CHAT
+        overflow_count = len(files) - max_records
        trim_count = min(len(sorted_files), max(self._TRIM_COUNT, overflow_count))
        for old_file in sorted_files[:trim_count]:
            try:
                old_file.unlink()
            except FileNotFoundError:
                continue
+
+    @classmethod
+    def _get_max_records_per_chat(cls) -> int:
+        try:
+            from src.config.config import global_config
+
+            configured_limit = global_config.log.maisaka_reply_effect_limit
+            return max(1, int(configured_limit or cls._DEFAULT_MAX_RECORDS_PER_CHAT))
+        except Exception:
+            return cls._DEFAULT_MAX_RECORDS_PER_CHAT