perf：优化麦麦观察体验，优化推理检索体验

2026-05-07 20:15:14 +08:00
parent 2a7722f84e
commit 827cdbd441
23 changed files with 1206 additions and 376 deletions
--- a/src/webui/routers/reasoning_process.py
+++ b/src/webui/routers/reasoning_process.py
@@ -37,6 +37,7 @@ class ReasoningPromptListResponse(BaseModel):
    page_size: int
    stages: list[str] = Field(default_factory=list)
    sessions: list[str] = Field(default_factory=list)
+    selected_session: str = ""


 class ReasoningPromptContentResponse(BaseModel):
@@ -76,15 +77,54 @@ def _relative_posix_path(path: Path) -> str:
    return path.relative_to(PROMPT_LOG_ROOT).as_posix()


-def _collect_prompt_files() -> tuple[list[ReasoningPromptFile], list[str], list[str]]:
+def _is_safe_name(name: str) -> bool:
+    path = Path(name)
+    return bool(name) and not path.is_absolute() and ".." not in path.parts and len(path.parts) == 1
+
+
+def _list_stage_names() -> list[str]:
    if not PROMPT_LOG_ROOT.is_dir():
-        return [], [], []
+        return []
+
+    return sorted(path.name for path in PROMPT_LOG_ROOT.iterdir() if path.is_dir() and _is_safe_name(path.name))
+
+
+def _resolve_stage_name(stage: str) -> str:
+    normalized_stage = str(stage or "").strip()
+    if not normalized_stage or normalized_stage == "all":
+        return "planner"
+    if not _is_safe_name(normalized_stage):
+        raise HTTPException(status_code=400, detail="阶段名称不合法")
+    return normalized_stage
+
+
+def _list_session_names(stage: str) -> list[str]:
+    stage_dir = PROMPT_LOG_ROOT / stage
+    if not stage_dir.is_dir():
+        return []
+
+    session_dirs = [path for path in stage_dir.iterdir() if path.is_dir() and _is_safe_name(path.name)]
+    session_dirs.sort(key=lambda path: path.stat().st_mtime, reverse=True)
+    return [path.name for path in session_dirs]
+
+
+def _resolve_session_name(session: str, sessions: list[str]) -> str:
+    normalized_session = str(session or "").strip()
+    if not normalized_session or normalized_session in {"all", "auto"}:
+        return sessions[0] if sessions else ""
+    if not _is_safe_name(normalized_session):
+        raise HTTPException(status_code=400, detail="会话名称不合法")
+    return normalized_session if normalized_session in sessions else ""
+
+
+def _collect_prompt_files(stage: str, session: str) -> list[ReasoningPromptFile]:
+    session_dir = PROMPT_LOG_ROOT / stage / session
+    if not session or not session_dir.is_dir():
+        return []

    records: dict[tuple[str, str, str], dict[str, object]] = {}
-    stages: set[str] = set()
-    sessions: set[str] = set()

-    for file_path in PROMPT_LOG_ROOT.rglob("*"):
+    for file_path in session_dir.iterdir():
        if not file_path.is_file() or file_path.suffix.lower() not in ALLOWED_SUFFIXES:
            continue

@@ -97,17 +137,15 @@ def _collect_prompt_files() -> tuple[list[ReasoningPromptFile], list[str], list[
        if len(parts) < 3:
            continue

-        stage, session_id = parts[0], parts[1]
+        stage_name, session_id = parts[0], parts[1]
        stem = file_path.stem
-        key = (stage, session_id, stem)
+        key = (stage_name, session_id, stem)
        stat = file_path.stat()

-        stages.add(stage)
-        sessions.add(session_id)
        record = records.setdefault(
            key,
            {
-                "stage": stage,
+                "stage": stage_name,
                "session_id": session_id,
                "stem": stem,
                "timestamp": int(stem) if stem.isdigit() else None,
@@ -127,26 +165,26 @@ def _collect_prompt_files() -> tuple[list[ReasoningPromptFile], list[str], list[

    items = [ReasoningPromptFile(**record) for record in records.values()]
    items.sort(key=lambda item: (item.modified_at, item.timestamp or 0), reverse=True)
-    return items, sorted(stages), sorted(sessions)
+    return items


@router.get("/files", response_model=ReasoningPromptListResponse)
 async def list_reasoning_prompt_files(
-    stage: str = Query("all"),
-    session: str = Query("all"),
+    stage: str = Query("planner"),
+    session: str = Query("auto"),
    search: str = Query(""),
    page: int = Query(1, ge=1),
    page_size: int = Query(50, ge=10, le=200),
 ):
    """列出 logs/maisaka_prompt 下的推理过程日志。"""

-    items, stages, sessions = _collect_prompt_files()
+    stages = _list_stage_names()
+    selected_stage = _resolve_stage_name(stage)
+    sessions = _list_session_names(selected_stage)
+    selected_session = _resolve_session_name(session, sessions)
+    items = _collect_prompt_files(selected_stage, selected_session)
    normalized_search = search.strip().lower()

-    if stage != "all":
-        items = [item for item in items if item.stage == stage]
-    if session != "all":
-        items = [item for item in items if item.session_id == session]
    if normalized_search:
        items = [
            item
@@ -167,6 +205,7 @@ async def list_reasoning_prompt_files(
        page_size=page_size,
        stages=stages,
        sessions=sessions,
+        selected_session=selected_session,
    )


--- a/src/webui/routers/system.py
+++ b/src/webui/routers/system.py
@@ -5,14 +5,19 @@
 """

 from datetime import datetime
-from typing import Optional
+from pathlib import Path
+from typing import Literal, Optional

 from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
+from sqlalchemy import func, inspect, text
+from sqlmodel import col, select

 import os
 import time

+from src.common.database.database import engine, get_db_session
+from src.common.database.database_model import Images, ImageType
 from src.common.logger import get_logger
 from src.config.config import MMC_VERSION
 from src.webui.dashboard_update import (
@@ -27,6 +32,14 @@ router = APIRouter(prefix="/system", tags=["system"], dependencies=[Depends(requ
 logger = get_logger("webui_system")

 _start_time = time.time()
+_PROJECT_ROOT = Path(__file__).resolve().parents[3]
+_DATA_DIR = _PROJECT_ROOT / "data"
+_IMAGE_DIR = _DATA_DIR / "images"
+_EMOJI_DIR = _DATA_DIR / "emoji"
+_EMOJI_THUMBNAIL_DIR = _DATA_DIR / "emoji_thumbnails"
+_LOG_DIR = _PROJECT_ROOT / "logs"
+_DATABASE_FILE = _DATA_DIR / "MaiBot.db"
+_DATABASE_AUXILIARY_SUFFIXES = ("-wal", "-shm")


 class RestartResponse(BaseModel):
@@ -56,6 +69,211 @@ class DashboardVersionResponse(BaseModel):
    pypi_url: str = PYPI_PROJECT_URL


+class CacheDirectoryStats(BaseModel):
+    """本地缓存目录统计。"""
+
+    key: str
+    label: str
+    path: str
+    exists: bool
+    file_count: int
+    total_size: int
+    db_records: int = 0
+
+
+class DatabaseFileStats(BaseModel):
+    """数据库文件统计。"""
+
+    path: str
+    exists: bool
+    size: int
+
+
+class DatabaseTableStats(BaseModel):
+    """数据库表统计。"""
+
+    name: str
+    rows: int
+
+
+class DatabaseStorageStats(BaseModel):
+    """数据库存储统计。"""
+
+    files: list[DatabaseFileStats]
+    tables: list[DatabaseTableStats]
+    total_size: int
+
+
+class LocalCacheStatsResponse(BaseModel):
+    """本地缓存统计响应。"""
+
+    directories: list[CacheDirectoryStats]
+    database: DatabaseStorageStats
+
+
+class LocalCacheCleanupRequest(BaseModel):
+    """本地缓存清理请求。"""
+
+    target: Literal["images", "emoji", "logs"]
+    tables: list[Literal["llm_usage", "tool_records", "mai_messages"]] = Field(default_factory=list)
+
+
+class LocalCacheCleanupResponse(BaseModel):
+    """本地缓存清理响应。"""
+
+    success: bool
+    message: str
+    target: str
+    removed_files: int = 0
+    removed_bytes: int = 0
+    removed_records: int = 0
+
+
+def _parse_version_parts(version: str | None) -> Optional[list[int]]:
+    """将版本号转换为可比较的整数列表。"""
+    if not version:
+        return None
+    parts: list[int] = []
+    for raw_part in version.split("."):
+        if not raw_part.isdigit():
+            return None
+        parts.append(int(raw_part))
+    return parts
+
+
+def _is_newer_version(latest: str | None, current: str | None) -> bool:
+    """判断 latest 是否新于 current。"""
+    latest_parts = _parse_version_parts(latest)
+    current_parts = _parse_version_parts(current)
+    if latest_parts is None or current_parts is None:
+        return False
+
+    max_len = max(len(latest_parts), len(current_parts))
+    latest_parts.extend([0] * (max_len - len(latest_parts)))
+    current_parts.extend([0] * (max_len - len(current_parts)))
+    return latest_parts > current_parts
+
+
+def _iter_files(directory: Path) -> list[Path]:
+    if not directory.exists() or not directory.is_dir():
+        return []
+    return [path for path in directory.rglob("*") if path.is_file()]
+
+
+def _get_directory_size(directory: Path) -> tuple[int, int]:
+    files = _iter_files(directory)
+    total_size = 0
+    for file_path in files:
+        try:
+            total_size += file_path.stat().st_size
+        except OSError:
+            logger.warning(f"读取缓存文件大小失败: {file_path}")
+    return len(files), total_size
+
+
+def _get_image_record_count(image_type: ImageType) -> int:
+    with get_db_session() as session:
+        statement = select(func.count()).select_from(Images).where(col(Images.image_type) == image_type)
+        return int(session.exec(statement).one())
+
+
+def _build_directory_stats(key: str, label: str, path: Path, image_type: ImageType | None = None) -> CacheDirectoryStats:
+    file_count, total_size = _get_directory_size(path)
+    return CacheDirectoryStats(
+        key=key,
+        label=label,
+        path=str(path),
+        exists=path.exists(),
+        file_count=file_count,
+        total_size=total_size,
+        db_records=_get_image_record_count(image_type) if image_type is not None else 0,
+    )
+
+
+def _get_database_files() -> list[DatabaseFileStats]:
+    db_paths = [_DATABASE_FILE, *[Path(f"{_DATABASE_FILE}{suffix}") for suffix in _DATABASE_AUXILIARY_SUFFIXES]]
+    result: list[DatabaseFileStats] = []
+    for db_path in db_paths:
+        exists = db_path.exists()
+        size = 0
+        if exists:
+            try:
+                size = db_path.stat().st_size
+            except OSError:
+                logger.warning(f"读取数据库文件大小失败: {db_path}")
+        result.append(DatabaseFileStats(path=str(db_path), exists=exists, size=size))
+    return result
+
+
+def _get_database_table_stats() -> list[DatabaseTableStats]:
+    inspector = inspect(engine)
+    table_stats: list[DatabaseTableStats] = []
+    with engine.connect() as connection:
+        for table_name in inspector.get_table_names():
+            quoted_table_name = table_name.replace('"', '""')
+            rows = connection.execute(text(f'SELECT COUNT(*) FROM "{quoted_table_name}"')).scalar_one()
+            table_stats.append(DatabaseTableStats(name=table_name, rows=int(rows)))
+    return sorted(table_stats, key=lambda item: item.name)
+
+
+def _build_database_stats() -> DatabaseStorageStats:
+    files = _get_database_files()
+    return DatabaseStorageStats(
+        files=files,
+        tables=_get_database_table_stats(),
+        total_size=sum(file.size for file in files),
+    )
+
+
+def _remove_directory_contents(directory: Path) -> tuple[int, int]:
+    if not directory.exists() or not directory.is_dir():
+        return 0, 0
+
+    removed_files = 0
+    removed_bytes = 0
+    for file_path in _iter_files(directory):
+        try:
+            file_size = file_path.stat().st_size
+            file_path.unlink()
+            removed_files += 1
+            removed_bytes += file_size
+        except OSError as exc:
+            logger.warning(f"删除缓存文件失败: {file_path}, error={exc}")
+
+    for child in sorted(directory.rglob("*"), key=lambda item: len(item.parts), reverse=True):
+        if child.is_dir():
+            try:
+                child.rmdir()
+            except OSError:
+                pass
+    return removed_files, removed_bytes
+
+
+def _delete_image_records(image_type: ImageType) -> int:
+    removed_records = 0
+    with get_db_session() as session:
+        statement = select(Images).where(col(Images.image_type) == image_type)
+        for record in session.exec(statement).all():
+            session.delete(record)
+            removed_records += 1
+    return removed_records
+
+
+def _delete_log_records(table_names: list[str]) -> int:
+    allowed_tables = {"llm_usage", "tool_records", "mai_messages"}
+    invalid_tables = set(table_names) - allowed_tables
+    if invalid_tables:
+        raise ValueError(f"不支持清理这些表: {', '.join(sorted(invalid_tables))}")
+
+    removed_records = 0
+    with engine.begin() as connection:
+        for table_name in table_names:
+            quoted_table_name = table_name.replace('"', '""')
+            result = connection.execute(text(f'DELETE FROM "{quoted_table_name}"'))
+            removed_records += int(result.rowcount or 0)
+    return removed_records
+
+
@router.post("/restart", response_model=RestartResponse)
 async def restart_maibot():
    """
@@ -120,6 +338,70 @@ async def get_dashboard_version(current_version: Optional[str] = None):
    )


+@router.get("/local-cache", response_model=LocalCacheStatsResponse)
+async def get_local_cache_stats():
+    """获取 data 目录下图片、表情包和数据库的本地存储情况。"""
+    try:
+        return LocalCacheStatsResponse(
+            directories=[
+                _build_directory_stats("images", "图片缓存", _IMAGE_DIR, ImageType.IMAGE),
+                _build_directory_stats("emoji", "表情包缓存", _EMOJI_DIR, ImageType.EMOJI),
+                _build_directory_stats("emoji_thumbnails", "表情包缩略图缓存", _EMOJI_THUMBNAIL_DIR),
+                _build_directory_stats("logs", "日志文件", _LOG_DIR),
+            ],
+            database=_build_database_stats(),
+        )
+    except Exception as e:
+        logger.exception(f"获取本地缓存统计失败: {e}")
+        raise HTTPException(status_code=500, detail=f"获取本地缓存统计失败: {str(e)}") from e
+
+
+@router.post("/local-cache/cleanup", response_model=LocalCacheCleanupResponse)
+async def cleanup_local_cache(request: LocalCacheCleanupRequest):
+    """清理指定的本地缓存区域。"""
+    try:
+        if request.target == "images":
+            removed_files, removed_bytes = _remove_directory_contents(_IMAGE_DIR)
+            removed_records = _delete_image_records(ImageType.IMAGE)
+            return LocalCacheCleanupResponse(
+                success=True,
+                message="图片缓存已清理",
+                target=request.target,
+                removed_files=removed_files,
+                removed_bytes=removed_bytes,
+                removed_records=removed_records,
+            )
+
+        if request.target == "emoji":
+            emoji_files, emoji_bytes = _remove_directory_contents(_EMOJI_DIR)
+            thumbnail_files, thumbnail_bytes = _remove_directory_contents(_EMOJI_THUMBNAIL_DIR)
+            removed_records = _delete_image_records(ImageType.EMOJI)
+            return LocalCacheCleanupResponse(
+                success=True,
+                message="表情包缓存已清理",
+                target=request.target,
+                removed_files=emoji_files + thumbnail_files,
+                removed_bytes=emoji_bytes + thumbnail_bytes,
+                removed_records=removed_records,
+            )
+
+        if not request.tables:
+            raise HTTPException(status_code=400, detail="请至少选择一个要清理的日志表")
+
+        removed_records = _delete_log_records(list(request.tables))
+        return LocalCacheCleanupResponse(
+            success=True,
+            message="日志记录已清理",
+            target=request.target,
+            removed_records=removed_records,
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"清理本地缓存失败: {e}")
+        raise HTTPException(status_code=500, detail=f"清理本地缓存失败: {str(e)}") from e
+
+
 # 可选：添加更多系统控制功能


--- a/src/webui/routers/websocket/unified.py
+++ b/src/webui/routers/websocket/unified.py
@@ -159,6 +159,15 @@ async def _handle_maisaka_monitor_subscribe(connection_id: str, request_id: Opti
        ok=True,
        data={"domain": "maisaka_monitor", "topic": "main"},
    )
+    from src.maisaka.display.stage_status_board import get_stage_status_snapshot
+
+    await websocket_manager.send_event(
+        connection_id,
+        domain="maisaka_monitor",
+        event="stage.snapshot",
+        topic="main",
+        data={"entries": get_stage_status_snapshot(), "timestamp": time.time()},
+    )


 async def _handle_subscribe(connection_id: str, message: Dict[str, Any]) -> None: