diff --git a/saka.py b/saka.py
index 77e8ac08..9d5b06e8 100644
--- a/saka.py
+++ b/saka.py
@@ -22,11 +22,15 @@ if str(_root) not in sys.path:
 if str(_maisaka_path) not in sys.path:
     sys.path.insert(0, str(_maisaka_path))
 
+from src.prompt.prompt_manager import prompt_manager
 from config import console
 from cli import BufferCLI
 
 
 def main():
+    # 加载所有提示词文件
+    prompt_manager.load_prompts()
+
     cli = BufferCLI()
     try:
         asyncio.run(cli.run())
diff --git a/src/maisaka/builtin_tools.py b/src/maisaka/builtin_tools.py
new file mode 100644
index 00000000..8642a7e5
--- /dev/null
+++ b/src/maisaka/builtin_tools.py
@@ -0,0 +1,126 @@
+"""
+MaiSaka - 内置工具定义
+定义 say, wait, stop, store_context 等内置工具
+使用主项目的工具格式（ToolOption + ToolParamType）
+"""
+
+from typing import List, Dict, Any
+from src.llm_models.payload_content.tool_option import ToolOption, ToolParamType
+
+# 内置工具定义
+def create_builtin_tools() -> List[ToolOption]:
+    """创建内置工具列表"""
+    from src.llm_models.payload_content.tool_option import ToolOptionBuilder
+
+    tools = []
+
+    # say 工具
+    say_builder = ToolOptionBuilder()
+    say_builder.set_name("say")
+    say_builder.set_description("对用户说话。你所有想让用户看到的正式发言都必须通过此工具输出。直接输出的文本会被视为你的内心思考，用户无法阅读。reason 参数描述你想要回复的方式、想法和内容，系统会根据你的想法和对话上下文生成具体的回复。")
+    say_builder.add_param(
+        name="reason",
+        param_type=ToolParamType.STRING,
+        description="描述你想要回复的方式、想法和内容。例如：'同意对方的看法，并分享自己的经历' 或 '礼貌地拒绝，表示现在不方便聊天'",
+        required=True,
+        enum_values=None
+    )
+    tools.append(say_builder.build())
+
+    # wait 工具
+    wait_builder = ToolOptionBuilder()
+    wait_builder.set_name("wait")
+    wait_builder.set_description("暂时结束你的发言，把话语权交给用户，等待对方说话。这就像现实对话中你说完一句话后停下来等对方回应。如果用户在等待期间说了话，你会通过工具返回结果收到内容。如果超时没有回复，你也会收到超时通知。")
+    wait_builder.add_param(
+        name="seconds",
+        param_type=ToolParamType.INTEGER,
+        description="等待的秒数。建议 3-10 秒。超过这个时间用户没有回复会显示超时提示。",
+        required=True,
+        enum_values=None
+    )
+    tools.append(wait_builder.build())
+
+    # stop 工具
+    stop_builder = ToolOptionBuilder()
+    stop_builder.set_name("stop")
+    stop_builder.set_description("结束当前对话循环，进入待机状态，直到用户下次输入新内容时再唤醒你。当对话自然结束、用户表示不想继续聊、或连续多次等待超时用户没有回复时使用。")
+    tools.append(stop_builder.build())
+
+    # store_context 工具
+    store_context_builder = ToolOptionBuilder()
+    store_context_builder.set_name("store_context")
+    store_context_builder.set_description("将指定范围的对话上下文存入记忆系统，然后从当前对话中移除这些内容。适合在对话上下文过长、话题转换、或遇到重要内容需要保存时使用。")
+    store_context_builder.add_param(
+        name="count",
+        param_type=ToolParamType.INTEGER,
+        description="要保存的消息条数（从最早的对话开始计数）。建议 5-20 条。",
+        required=True,
+        enum_values=None
+    )
+    store_context_builder.add_param(
+        name="reason",
+        param_type=ToolParamType.STRING,
+        description="保存原因，用于后续检索。例如：'讨论了用户的工作情况' 或 '用户分享了对电影的看法'",
+        required=True,
+        enum_values=None
+    )
+    tools.append(store_context_builder.build())
+
+    return tools
+
+# 为了兼容性，创建一个函数来将工具转换为 dict 格式（用于调试显示）
+def builtin_tools_as_dicts() -> List[Dict[str, Any]]:
+    """将内置工具转换为 dict 格式（用于调试）"""
+    return [
+        {
+            "name": "say",
+            "description": "对用户说话。你所有想让用户看到的正式发言都必须通过此工具输出。",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "reason": {"type": "string", "description": "回复的想法和内容"}
+                },
+                "required": ["reason"]
+            }
+        },
+        {
+            "name": "wait",
+            "description": "暂时结束发言，等待用户回应",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "seconds": {"type": "number", "description": "等待秒数"}
+                },
+                "required": ["seconds"]
+            }
+        },
+        {
+            "name": "stop",
+            "description": "结束对话循环",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        },
+        {
+            "name": "store_context",
+            "description": "保存对话上下文到记忆系统",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "count": {"type": "number", "description": "保存的消息条数"},
+                    "reason": {"type": "string", "description": "保存原因"}
+                },
+                "required": ["count", "reason"]
+            }
+        }
+    ]
+
+# 导出工具创建函数和列表
+def get_builtin_tools() -> List[ToolOption]:
+    """获取内置工具列表"""
+    return create_builtin_tools()
+
+# 为了向后兼容，也导出 dict 格式
+BUILTIN_TOOLS_DICTS = builtin_tools_as_dicts()
diff --git a/src/maisaka/cli.py b/src/maisaka/cli.py
index f633f201..1edf1483 100644
--- a/src/maisaka/cli.py
+++ b/src/maisaka/cli.py
@@ -15,12 +15,10 @@ from rich import box
 
 from config import console, ENABLE_EMOTION_MODULE, ENABLE_COGNITION_MODULE, ENABLE_TIMING_MODULE, ENABLE_KNOWLEDGE_MODULE, ENABLE_MCP
 from input_reader import InputReader
-from debug_client import DebugViewer
 from timing import build_timing_info
 from knowledge import store_knowledge_from_context, retrieve_relevant_knowledge, build_knowledge_summary
 from knowledge_store import get_knowledge_store
-from llm_service import BaseLLMService, OpenAILLMService
-from llm_service.utils import build_message, remove_last_perception
+from llm_service import MaiSakaLLMService, build_message, remove_last_perception
 from mcp_client import MCPManager
 from tool_handlers import (
     ToolHandlerContext,
@@ -43,7 +41,7 @@ class BufferCLI:
     """命令行交互界面"""
 
     def __init__(self):
-        self.llm_service: Optional[BaseLLMService] = None
+        self.llm_service: Optional[MaiSakaLLMService] = None
         self._reader = InputReader()
         self._chat_history: Optional[list] = None  # 持久化的对话历史
         self._knowledge_store = get_knowledge_store()  # 了解存储实例
@@ -51,9 +49,9 @@ class BufferCLI:
         # 显示了解存储统计
         knowledge_stats = self._knowledge_store.get_stats()
         if knowledge_stats["total_items"] > 0:
-            console.print(f"[success]✓ 了解系统: {knowledge_stats['total_items']}条特征信息[/success]")
+            console.print(f"[success][OK] 了解系统: {knowledge_stats['total_items']}条特征信息[/success]")
         else:
-            console.print("[muted]✓ 了解系统: 已初始化 (暂无数据)[/muted]")
+            console.print("[muted][OK] 了解系统: 已初始化 (暂无数据)[/muted]")
         # Timing 模块时间戳跟踪
         self._chat_start_time: Optional[datetime] = None
         self._last_user_input_time: Optional[datetime] = None
@@ -61,15 +59,10 @@ class BufferCLI:
         self._user_input_times: list[datetime] = []  # 所有用户输入时间戳
         # MCP 管理器（异步初始化，在 run() 中完成）
         self._mcp_manager: Optional[MCPManager] = None
-        # Debug Viewer
-        self._debug_viewer = DebugViewer()
         self._init_llm()
 
     def _init_llm(self):
-        """初始化 LLM 服务"""
-        api_key = os.getenv("OPENAI_API_KEY", "")
-        base_url = os.getenv("OPENAI_BASE_URL", "")
-        model = os.getenv("OPENAI_MODEL", "gpt-4o")
+        """初始化 LLM 服务 - 使用主项目配置系统"""
         thinking_env = os.getenv("ENABLE_THINKING", "").strip().lower()
         enable_thinking: Optional[bool] = (
             True if thinking_env == "true"
@@ -77,30 +70,18 @@ class BufferCLI:
             else None
         )
 
-        if not api_key:
-            console.print(
-                Panel(
-                    "[warning]未检测到 OPENAI_API_KEY 环境变量！[/warning]\n\n"
-                    "请设置以下环境变量（或在 .env 文件中配置）：\n"
-                    "  • OPENAI_API_KEY   - 必填，API 密钥\n"
-                    "  • OPENAI_BASE_URL  - 可选，API 基地址\n"
-                    "  • OPENAI_MODEL     - 可选，模型名称（默认 gpt-4o）\n\n"
-                    "[muted]程序无法运行，请配置后重试。[/muted]",
-                    title="⚠️ 配置提示",
-                    border_style="yellow",
-                )
-            )
-            return
-
-        self.llm_service = OpenAILLMService(
-            api_key=api_key,
-            base_url=base_url if base_url else None,
-            model=model,
+        # MaiSakaLLMService 现在使用主项目的配置系统
+        # 参数仅为兼容性保留，实际从 config_manager 读取配置
+        self.llm_service = MaiSakaLLMService(
+            api_key="",
+            base_url=None,
+            model="",
             enable_thinking=enable_thinking,
         )
-        # 绑定 debug 回调
-        self.llm_service.set_debug_callback(self._debug_viewer.send)
-        console.print(f"[success]✓ LLM 服务已初始化[/success] [muted](模型: {model})[/muted]")
+
+        # 获取实际使用的模型名称
+        model_name = self.llm_service._model_name
+        console.print(f"[success][OK] LLM 服务已初始化[/success] [muted](模型: {model_name})[/muted]")
 
     def _build_tool_context(self) -> ToolHandlerContext:
         """构建工具处理器所需的上下文。"""
@@ -228,11 +209,11 @@ class BufferCLI:
                                     self.llm_service,
                                     to_compress,
                                     store_result_callback=lambda cat_id, cat_name, content: console.print(
-                                        f"[muted]  ✓ 存储了解信息: {cat_name}[/muted]"
+                                        f"[muted]  [OK] 存储了解信息: {cat_name}[/muted]"
                                     )
                                 )
                                 if knowledge_count > 0:
-                                    console.print(f"[success]✓ 了解模块: 存储{knowledge_count}条特征信息[/success]")
+                                    console.print(f"[success][OK] 了解模块: 存储{knowledge_count}条特征信息[/success]")
                             except Exception as e:
                                 console.print(f"[warning]了解存储失败: {e}[/warning]")
                         if summary:
@@ -579,9 +560,6 @@ class BufferCLI:
 
     async def run(self):
         """主循环：直接输入文本即可对话"""
-        # 启动调试窗口
-        self._debug_viewer.start()
-
         # 根据配置决定是否初始化 MCP 服务器
         if ENABLE_MCP:
             await self._init_mcp()
@@ -608,6 +586,5 @@ class BufferCLI:
 
                 await self._start_chat(raw_input)
         finally:
-            self._debug_viewer.close()
             if self._mcp_manager:
                 await self._mcp_manager.close()
diff --git a/src/maisaka/llm_service.py b/src/maisaka/llm_service.py
new file mode 100644
index 00000000..c2c183bb
--- /dev/null
+++ b/src/maisaka/llm_service.py
@@ -0,0 +1,568 @@
+"""
+MaiSaka LLM 服务 - 使用主项目 LLM 系统
+将主项目的 LLMRequest 适配为 MaiSaka 需要的接口
+"""
+
+import json
+import os
+from dataclasses import dataclass
+from typing import List, Optional, Literal
+
+from src.common.logger import get_logger
+from src.config.config import config_manager
+from src.llm_models.utils_model import LLMRequest
+from src.prompt.prompt_manager import prompt_manager
+from src.llm_models.payload_content.message import MessageBuilder, RoleType
+from src.llm_models.payload_content.tool_option import ToolCall as ToolCallOption, ToolOption
+from builtin_tools import get_builtin_tools
+
+import config
+
+logger = get_logger("maisaka_llm")
+
+# ──────────────────── 消息类型 ────────────────────
+
+MessageType = Literal["user", "assistant", "system", "perception"]
+
+# 内部使用的字段前缀，用于标记不应发送给 API 的元数据
+INTERNAL_FIELD_PREFIX = "_"
+
+# 消息类型字段名
+MSG_TYPE_FIELD = "_type"
+
+
+@dataclass
+class ToolCall:
+    """工具调用信息"""
+    id: str
+    name: str
+    arguments: dict
+
+
+@dataclass
+class ChatResponse:
+    """LLM 对话循环单步响应"""
+    content: Optional[str]
+    tool_calls: List[ToolCall]
+    raw_message: dict  # 可直接追加到对话历史的消息字典
+
+
+# ──────────────────── 工具函数 ────────────────────
+
+def build_message(role: str, content: str, msg_type: MessageType = "user", **kwargs) -> dict:
+    """构建消息字典，包含消息类型标记。"""
+    msg = {"role": role, "content": content, MSG_TYPE_FIELD: msg_type, **kwargs}
+    return msg
+
+
+def remove_last_perception(messages: list[dict]) -> None:
+    """移除最后一条感知消息（直接修改原列表）。"""
+    for i in range(len(messages) - 1, -1, -1):
+        if messages[i].get(MSG_TYPE_FIELD) == "perception":
+            messages.pop(i)
+            break
+
+
+class MaiSakaLLMService:
+    """MaiSaka LLM 服务 - 适配主项目 LLM 系统"""
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        model: Optional[str] = None,
+        chat_system_prompt: Optional[str] = None,
+        temperature: float = 0.5,
+        max_tokens: int = 2048,
+        enable_thinking: Optional[bool] = None,
+    ):
+        """
+        初始化 LLM 服务
+
+        参数仅为兼容性保留，实际使用主项目配置
+        """
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+        self._enable_thinking = enable_thinking
+        self._extra_tools: List[dict] = []
+
+        # 获取主项目模型配置
+        try:
+            model_config = config_manager.get_model_config()
+            self._model_configs = model_config.model_task_config
+        except Exception:
+            # 如果配置加载失败，使用默认配置
+            from src.config.model_configs import ModelTaskConfig
+            self._model_configs = ModelTaskConfig()
+            logger.warning("无法加载主项目模型配置，使用默认配置")
+
+        # 初始化 LLMRequest 实例（只使用 tool_use 和 replyer）
+        self._llm_tool_use = LLMRequest(
+            model_set=self._model_configs.tool_use,
+            request_type="maisaka_tool_use"
+        )
+        # 主对话也使用 tool_use 模型（因为需要工具调用支持）
+        self._llm_chat = self._llm_tool_use
+        # 分析模块也使用 tool_use 模型
+        self._llm_utils = self._llm_tool_use
+        # 回复生成使用 replyer 模型
+        self._llm_replyer = LLMRequest(
+            model_set=self._model_configs.replyer,
+            request_type="maisaka_replyer"
+        )
+
+        # 尝试修复数据库 schema（忽略错误）
+        self._try_fix_database_schema()
+
+        # 加载系统提示词
+        if chat_system_prompt is None:
+            try:
+                chat_prompt = prompt_manager.get_prompt("maidairy_chat")
+                logger.info("成功加载 maidairy_chat 提示词模板")
+                tools_section = ""
+                if config.ENABLE_WRITE_FILE:
+                    tools_section += "\n• write_file(filename, content) — 在 mai_files 目录下写入文件。"
+                if config.ENABLE_READ_FILE:
+                    tools_section += "\n• read_file(filename) — 读取 mai_files 目录下的文件内容。"
+                if config.ENABLE_LIST_FILES:
+                    tools_section += "\n• list_files() — 获取 mai_files 目录下所有文件的元信息列表。"
+                if config.ENABLE_QQ_TOOLS:
+                    tools_section += "\n• get_qq_chat_info(chat, limit) — 获取指定 QQ 聊天的聊天记录。"
+                    tools_section += "\n• send_info(chat, message) — 发送消息到指定的 QQ 聊天。"
+                    tools_section += "\n• list_qq_chats() — 获取所有可用的 QQ 聊天列表。"
+
+                chat_prompt.add_context("file_tools_section", tools_section if tools_section else "")
+                import asyncio
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                try:
+                    self._chat_system_prompt = loop.run_until_complete(prompt_manager.render_prompt(chat_prompt))
+                    logger.info(f"系统提示词已渲染，长度: {len(self._chat_system_prompt)}")
+                finally:
+                    loop.close()
+            except Exception as e:
+                logger.error(f"加载系统提示词失败: {e}")
+                self._chat_system_prompt = "你是一个友好的 AI 助手。"
+        else:
+            self._chat_system_prompt = chat_system_prompt
+
+        # 获取模型名称用于显示
+        self._model_name = self._model_configs.tool_use.model_list[0] if self._model_configs.tool_use.model_list else "未配置"
+
+        # 加载子模块提示词
+        self._emotion_prompt: Optional[str] = None
+        self._cognition_prompt: Optional[str] = None
+        self._timing_prompt: Optional[str] = None
+        self._context_summarize_prompt: Optional[str] = None
+
+        try:
+            import asyncio
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            try:
+                self._emotion_prompt = loop.run_until_complete(prompt_manager.render_prompt(
+                    prompt_manager.get_prompt("maidairy_emotion")
+                ))
+                self._cognition_prompt = loop.run_until_complete(prompt_manager.render_prompt(
+                    prompt_manager.get_prompt("maidairy_cognition")
+                ))
+                self._timing_prompt = loop.run_until_complete(prompt_manager.render_prompt(
+                    prompt_manager.get_prompt("maidairy_timing")
+                ))
+                self._context_summarize_prompt = loop.run_until_complete(prompt_manager.render_prompt(
+                    prompt_manager.get_prompt("maidairy_context_summarize")
+                ))
+                logger.info("成功加载 MaiSaka 子模块提示词")
+            finally:
+                loop.close()
+        except Exception as e:
+            logger.warning(f"加载子模块提示词失败，将使用默认提示词: {e}")
+
+    def _try_fix_database_schema(self) -> None:
+        """尝试修复数据库 schema，添加缺失的列"""
+        try:
+            from src.common.database.database_client import get_db_session
+            from sqlalchemy import text
+
+            with get_db_session() as session:
+                # 检查 model_api_provider_name 列是否存在
+                result = session.execute(text("PRAGMA table_info(llm_usage)"))
+                columns = [row[1] for row in result.fetchall()]
+
+                if "model_api_provider_name" not in columns:
+                    # 添加缺失的列
+                    session.execute(text(
+                        "ALTER TABLE llm_usage ADD COLUMN model_api_provider_name VARCHAR(255)"
+                    ))
+                    session.commit()
+                    logger.info("数据库 schema 已修复：添加 model_api_provider_name 列")
+        except Exception:
+            # 静默忽略任何错误，不影响正常流程
+            pass
+
+    def set_extra_tools(self, tools: List[dict]) -> None:
+        """设置额外的工具定义（如 MCP 工具）"""
+        self._extra_tools = list(tools)
+
+    @staticmethod
+    def _tool_option_to_dict(tool: 'ToolOption') -> dict:
+        """将 ToolOption 对象转换为主项目期望的 dict 格式
+
+        主项目的 _build_tool_options() 期望的格式:
+        {
+            "name": str,
+            "description": str,
+            "parameters": List[Tuple[name, ToolParamType, description, required, enum_values]]
+        }
+        """
+        params = []
+        if tool.params:
+            for param in tool.params:
+                params.append((
+                    param.name,
+                    param.param_type,
+                    param.description,
+                    param.required,
+                    param.enum_values
+                ))
+        return {
+            "name": tool.name,
+            "description": tool.description,
+            "parameters": params
+        }
+
+    async def chat_loop_step(self, chat_history: List[dict]) -> ChatResponse:
+        """执行对话循环的一步 - 使用 tool_use 模型"""
+
+        def message_factory(client) -> List:
+            """将 MaiSaka 的 chat_history 转换为主项目的 Message 格式"""
+            messages = []
+
+            # 首先添加系统提示词
+            system_msg = MessageBuilder().set_role(RoleType.System)
+            system_msg.add_text_content(self._chat_system_prompt)
+            messages.append(system_msg.build())
+
+            # 然后添加对话历史
+            for msg in chat_history:
+                role = msg.get("role", "")
+                content = msg.get("content", "")
+
+                # 跳过内部字段类型的消息和系统消息（已经有系统提示词了）
+                if role in ("perception", "system"):
+                    continue
+
+                # 映射角色类型
+                if role == "user":
+                    role_type = RoleType.User
+                elif role == "assistant":
+                    role_type = RoleType.Assistant
+                elif role == "tool":
+                    role_type = RoleType.Tool
+                else:
+                    continue
+
+                builder = MessageBuilder().set_role(role_type)
+
+                # 处理工具调用
+                if role == "assistant" and "tool_calls" in msg:
+                    # 转换 tool_calls 格式：从 MaiSaka 格式转为主项目格式
+                    tool_calls_list = []
+                    for tc in msg["tool_calls"]:
+                        tc_func = tc.get("function", {})
+                        # 主项目的 ToolCall: call_id, func_name, args
+                        tool_calls_list.append(ToolCallOption(
+                            call_id=tc.get("id", ""),
+                            func_name=tc_func.get("name", ""),
+                            args=json.loads(tc_func.get("arguments", "{}")) if tc_func.get("arguments") else {}
+                        ))
+                    builder.set_tool_calls(tool_calls_list)
+                elif role == "tool" and "tool_call_id" in msg:
+                    builder.add_tool_call(msg["tool_call_id"])
+
+                # 添加文本内容
+                if content:
+                    builder.add_text_content(content)
+
+                messages.append(builder.build())
+
+            return messages
+
+        # 调用 LLM（使用带消息的接口）
+        # 合并内置工具和额外工具（将 ToolOption 对象转换为 dict）
+        all_tools = [self._tool_option_to_dict(t) for t in get_builtin_tools()] + (self._extra_tools if self._extra_tools else [])
+
+        # 打印消息列表
+        built_messages = message_factory(None)
+        print("\n" + "="*60)
+        print("MaiSaka LLM Request - chat_loop_step:")
+        for msg in built_messages:
+            print(f"  {msg}")
+        print("="*60 + "\n")
+
+        response, (reasoning, model, tool_calls) = await self._llm_chat.generate_response_with_message_async(
+            message_factory=message_factory,
+            tools=all_tools if all_tools else None,
+            temperature=self._temperature,
+            max_tokens=self._max_tokens,
+        )
+
+        # 转换 tool_calls 格式：从主项目格式转为 MaiSaka 格式
+        converted_tool_calls = []
+        if tool_calls:
+            for tc in tool_calls:
+                # 主项目的 ToolCall 有 call_id, func_name, args
+                call_id = tc.call_id if hasattr(tc, 'call_id') else ""
+                func_name = tc.func_name if hasattr(tc, 'func_name') else ""
+                args = tc.args if hasattr(tc, 'args') else {}
+
+                converted_tool_calls.append(ToolCall(
+                    id=call_id,
+                    name=func_name,
+                    arguments=args,
+                ))
+
+        # 构建原始消息格式（MaiSaka 风格）
+        raw_message = {"role": "assistant", "content": response}
+        if converted_tool_calls:
+            raw_message["tool_calls"] = [
+                {
+                    "id": tc.id,
+                    "type": "function",
+                    "function": {
+                        "name": tc.name,
+                        "arguments": json.dumps(tc.arguments),
+                    },
+                }
+                for tc in converted_tool_calls
+            ]
+
+        return ChatResponse(
+            content=response,
+            tool_calls=converted_tool_calls,
+            raw_message=raw_message,
+        )
+
+    def _filter_for_api(self, chat_history: List[dict]) -> str:
+        """过滤对话历史为 API 格式"""
+        parts = []
+        for msg in chat_history:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+
+            # 跳过内部字段
+            if role in ("perception", "tool"):
+                continue
+
+            if role == "system":
+                parts.append(f"System: {content}")
+            elif role == "user":
+                parts.append(f"User: {content}")
+            elif role == "assistant":
+                # 处理工具调用
+                if "tool_calls" in msg:
+                    tool_desc = ", ".join([tc.get("name", "") for tc in msg["tool_calls"]])
+                    parts.append(f"Assistant (called tools: {tool_desc})")
+                else:
+                    parts.append(f"Assistant: {content}")
+
+        return "\n\n".join(parts)
+
+    def build_chat_context(self, user_text: str) -> List[dict]:
+        """构建对话上下文"""
+        return [
+            {"role": "system", "content": self._chat_system_prompt},
+            {"role": "user", "content": user_text},
+        ]
+
+    # ──────── 分析模块（使用 utils 模型） ────────
+
+    async def analyze_emotion(self, chat_history: List[dict]) -> str:
+        """情绪分析 - 使用 utils 模型"""
+        filtered = [m for m in chat_history if m.get("_type") != "perception"]
+        recent = filtered[-10:] if len(filtered) > 10 else filtered
+
+        # 使用加载的系统提示词
+        system_prompt = self._emotion_prompt or "请分析以下对话中用户的情绪状态和言语态度："
+
+        prompt_parts = [f"{system_prompt}\n\n【对话内容】\n"]
+        for msg in recent:
+            if msg.get("role") == "user":
+                prompt_parts.append(f"用户: {msg.get('content', '')}")
+            elif msg.get("role") == "assistant":
+                prompt_parts.append(f"助手: {msg.get('content', '')}")
+
+        prompt = "\n".join(prompt_parts)
+
+        print("\n" + "="*60)
+        print("MaiSaka LLM Request - analyze_emotion:")
+        print(f"  {prompt}")
+        print("="*60 + "\n")
+
+        try:
+            response, _ = await self._llm_utils.generate_response_async(
+                prompt=prompt,
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+            return response
+        except Exception as e:
+            logger.error(f"情绪分析 LLM 调用出错: {e}")
+            return ""
+
+    async def analyze_cognition(self, chat_history: List[dict]) -> str:
+        """认知分析 - 使用 utils 模型"""
+        filtered = [m for m in chat_history if m.get("_type") != "perception"]
+        recent = filtered[-10:] if len(filtered) > 10 else filtered
+
+        # 使用加载的系统提示词
+        system_prompt = self._cognition_prompt or "请分析以下对话中用户的意图、认知状态和目的："
+
+        prompt_parts = [f"{system_prompt}\n\n【对话内容】\n"]
+        for msg in recent:
+            if msg.get("role") == "user":
+                prompt_parts.append(f"用户: {msg.get('content', '')}")
+            elif msg.get("role") == "assistant":
+                prompt_parts.append(f"助手: {msg.get('content', '')}")
+
+        prompt = "\n".join(prompt_parts)
+
+        print("\n" + "="*60)
+        print("MaiSaka LLM Request - analyze_cognition:")
+        print(f"  {prompt}")
+        print("="*60 + "\n")
+
+        try:
+            response, _ = await self._llm_utils.generate_response_async(
+                prompt=prompt,
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+            return response
+        except Exception as e:
+            logger.error(f"认知分析 LLM 调用出错: {e}")
+            return ""
+
+    async def analyze_timing(self, chat_history: List[dict], timing_info: str) -> str:
+        """时间分析 - 使用 utils 模型"""
+        filtered = [m for m in chat_history if m.get("_type") not in ("perception", "system")]
+
+        # 使用加载的系统提示词
+        system_prompt = self._timing_prompt or "请分析以下对话的时间节奏和用户状态："
+
+        prompt_parts = [f"{system_prompt}\n\n【系统时间戳信息】\n{timing_info}\n\n【当前对话记录】\n"]
+        for msg in filtered:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "user":
+                prompt_parts.append(f"用户: {content}")
+            elif role == "assistant":
+                prompt_parts.append(f"助手: {content}")
+
+        prompt = "\n".join(prompt_parts)
+
+        print("\n" + "="*60)
+        print("MaiSaka LLM Request - analyze_timing:")
+        print(f"  {prompt}")
+        print("="*60 + "\n")
+
+        try:
+            response, _ = await self._llm_utils.generate_response_async(
+                prompt=prompt,
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+            return response
+        except Exception as e:
+            logger.error(f"时间分析 LLM 调用出错: {e}")
+            return ""
+
+    async def summarize_context(self, context_messages: List[dict]) -> str:
+        """上下文总结 - 使用 utils 模型"""
+        filtered = [m for m in context_messages if m.get("role") != "system"]
+
+        # 使用加载的系统提示词
+        system_prompt = self._context_summarize_prompt or "请对以下对话内容进行总结："
+
+        prompt_parts = [f"{system_prompt}\n\n【对话内容】\n"]
+        for msg in filtered:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "user":
+                prompt_parts.append(f"用户: {content}")
+            elif role == "assistant":
+                prompt_parts.append(f"助手: {content}")
+
+        prompt = "\n".join(prompt_parts)
+
+        print("\n" + "="*60)
+        print("MaiSaka LLM Request - summarize_context:")
+        print(f"  {prompt}")
+        print("="*60 + "\n")
+
+        try:
+            response, _ = await self._llm_utils.generate_response_async(
+                prompt=prompt,
+                temperature=0.3,
+                max_tokens=1024,
+            )
+
+            return response
+        except Exception as e:
+            logger.error(f"上下文总结 LLM 调用出错: {e}")
+            return ""
+
+    # ──────── 回复生成（使用 replyer 模型） ────────
+
+    async def generate_reply(self, reason: str, chat_history: List[dict]) -> str:
+        """
+        生成回复 - 使用 replyer 模型
+        可供 Replyer 类直接调用
+        """
+        from datetime import datetime
+        from replyer import format_chat_history
+
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        # 格式化对话历史
+        filtered_history = [
+            msg for msg in chat_history
+            if msg.get("role") != "system" and msg.get("_type") != "perception"
+        ]
+        formatted_history = format_chat_history(filtered_history)
+
+        # 获取回复提示词
+        try:
+            replyer_prompt = prompt_manager.get_prompt("maidairy_replyer")
+            system_prompt = await prompt_manager.render_prompt(replyer_prompt)
+        except Exception:
+            system_prompt = "你是一个友好的 AI 助手，请根据用户的想法生成自然的回复。"
+
+        user_prompt = (
+            f"当前时间：{current_time}\n\n"
+            f"【聊天记录】\n{formatted_history}\n\n"
+            f"【你的想法】\n{reason}\n\n"
+            f"现在，你说："
+        )
+
+        messages = f"System: {system_prompt}\n\nUser: {user_prompt}"
+
+        print("\n" + "="*60)
+        print("MaiSaka LLM Request - generate_reply:")
+        print(f"  {messages}")
+        print("="*60 + "\n")
+
+        try:
+            response, _ = await self._llm_replyer.generate_response_async(
+                prompt=messages,
+                temperature=0.8,
+                max_tokens=512,
+            )
+
+            return response.strip() if response else "..."
+        except Exception as e:
+            logger.error(f"回复生成 LLM 调用出错: {e}")
+            return "..."
diff --git a/src/maisaka/llm_service/__init__.py b/src/maisaka/llm_service/__init__.py
deleted file mode 100644
index 9f503675..00000000
--- a/src/maisaka/llm_service/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""
-MaiSaka - LLM 服务包
-提供抽象接口 (BaseLLMService) 和 OpenAI 兼容实现 (OpenAILLMService)。
-"""
-
-from .base import BaseLLMService, ChatResponse, ModelInfo, ToolCall
-from .openai_impl import OpenAILLMService
-from .utils import format_chat_history
-
-__all__ = [
-    "BaseLLMService",
-    "ChatResponse",
-    "ModelInfo",
-    "ToolCall",
-    "OpenAILLMService",
-    "format_chat_history",
-]
diff --git a/src/maisaka/llm_service/base.py b/src/maisaka/llm_service/base.py
deleted file mode 100644
index b8a04c67..00000000
--- a/src/maisaka/llm_service/base.py
+++ /dev/null
@@ -1,200 +0,0 @@
-"""
-MaiSaka - LLM 服务数据结构与抽象接口
-"""
-
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import List, Optional
-
-
-# ──────────────────── 数据结构 ────────────────────
-
-@dataclass
-class ModelInfo:
-    """模型描述信息"""
-    model_name: str
-    base_url: str
-
-
-@dataclass
-class ToolCall:
-    """工具调用信息"""
-    id: str
-    name: str
-    arguments: dict
-
-
-@dataclass
-class ChatResponse:
-    """LLM 对话循环单步响应"""
-    content: Optional[str]
-    tool_calls: List[ToolCall]
-    raw_message: dict  # 可直接追加到对话历史的消息字典
-
-
-# ──────────────────── 抽象接口 ────────────────────
-
-class BaseLLMService(ABC):
-    """
-    LLM 服务抽象基类。
-    所有 LLM 后端实现都应继承此类，并实现以下方法。
-    """
-
-    def set_extra_tools(self, tools: List[dict]) -> None:
-        """
-        设置额外的工具定义（如 MCP 工具），将与内置工具合并使用。
-
-        Args:
-            tools: OpenAI function calling 格式的工具定义列表
-        """
-        # 默认空实现，子类可覆盖
-        pass
-
-    @abstractmethod
-    async def chat_loop_step(self, chat_history: List[dict]) -> ChatResponse:
-        """
-        执行对话循环的一步。
-
-        发送当前对话历史，获取 LLM 响应（可能包含文本和/或工具调用）。
-        调用方需要将 raw_message 追加到 chat_history，并根据 tool_calls 执行工具、
-        将工具结果追加到 chat_history 后再次调用本方法。
-
-        Args:
-            chat_history: 对话历史（含 system / user / assistant / tool 消息）
-
-        Returns:
-            ChatResponse
-        """
-        ...
-
-    @abstractmethod
-    def build_chat_context(self, user_text: str) -> List[dict]:
-        """根据用户初始输入，构建对话循环的初始上下文（system + user）。"""
-        ...
-
-    @abstractmethod
-    async def analyze_timing(
-        self, chat_history: List[dict], timing_info: str,
-    ) -> str:
-        """
-        Timing 模块（含自我反思功能）：分析对话的时间维度信息和进行自我反思。
-
-        评估对话已经持续多久、上次回复距今多长时间、建议等待时长、
-        以及其他与时间节奏相关的考量。同时反思自己的回复逻辑，
-        检查人设一致性、回复合理性和认知局限性。
-
-        Args:
-            chat_history: 当前对话历史（与主 Agent 完全一致的上下文）
-            timing_info:  系统提供的精确时间戳信息（对话开始时间、各消息时间等）
-
-        Returns:
-            时间维度分析和自我反思的综合文本
-        """
-        ...
-
-    @abstractmethod
-    async def analyze_emotion(self, chat_history: List[dict]) -> str:
-        """
-        情商模块：分析对话对方（用户）的情绪状态和言语态度。
-
-        接收与主 Agent 相同的上下文，返回一段简洁的情绪分析文本。
-        该文本将被注入主 Agent 上下文，帮助主 Agent 更好地理解用户状态。
-
-        Args:
-            chat_history: 当前对话历史（与主 Agent 完全一致的上下文）
-
-        Returns:
-            情绪分析文本
-        """
-        ...
-
-    @abstractmethod
-    async def analyze_cognition(self, chat_history: List[dict]) -> str:
-        """
-        认知模块：分析对话对方（用户）的意图、认知状态和目的。
-
-        接收与主 Agent 相同的上下文，返回一段简洁的认知分析文本。
-        该文本将被注入主 Agent 上下文，帮助主 Agent 更好地理解用户意图。
-
-        Args:
-            chat_history: 当前对话历史（与主 Agent 完全一致的上下文）
-
-        Returns:
-            认知分析文本
-        """
-        ...
-
-    @abstractmethod
-    def get_model_info(self) -> ModelInfo:
-        """返回当前使用的模型信息。"""
-        ...
-
-    @abstractmethod
-    async def summarize_context(self, context_messages: List[dict]) -> str:
-        """
-        上下文总结模块：对需要压缩的上下文进行总结。
-
-        当对话历史过长时，对早期的对话内容进行总结。
-
-        Args:
-            context_messages: 需要总结的上下文消息列表
-
-        Returns:
-            总结后的文本内容
-        """
-        ...
-
-    @abstractmethod
-    async def analyze_knowledge_categories(
-        self, context_messages: List[dict], categories_summary: str
-    ) -> List[str]:
-        """
-        了解模块-分类分析：分析对话内容涉及哪些个人特征分类。
-
-        在上下文裁切时触发，分析需要提取哪些分类的个人特征信息。
-
-        Args:
-            context_messages: 需要分析的上下文消息
-            categories_summary: 所有分类的摘要信息
-
-        Returns:
-            涉及的分类编号列表
-        """
-        ...
-
-    @abstractmethod
-    async def extract_knowledge_for_category(
-        self, context_messages: List[dict], category_id: str, category_name: str
-    ) -> str:
-        """
-        了解模块-内容提取：从对话中提取指定分类的个人特征信息。
-
-        为每个分类创建 subAgent，提取相关的个人特征内容。
-
-        Args:
-            context_messages: 需要分析的上下文消息
-            category_id: 分类编号
-            category_name: 分类名称
-
-        Returns:
-            提取的个人特征内容
-        """
-        ...
-
-    @abstractmethod
-    async def analyze_knowledge_need(
-        self, chat_history: List[dict], categories_summary: str
-    ) -> List[str]:
-        """
-        了解模块-需求分析：分析当前对话需要哪些个人特征信息。
-
-        在每次对话前触发，分析需要检索哪些分类的了解内容。
-
-        Args:
-            chat_history: 当前对话历史
-            categories_summary: 所有分类的摘要信息
-
-        Returns:
-            需要的分类编号列表
-        """
-        ...
diff --git a/src/maisaka/llm_service/openai_impl.py b/src/maisaka/llm_service/openai_impl.py
deleted file mode 100644
index d3e3d444..00000000
--- a/src/maisaka/llm_service/openai_impl.py
+++ /dev/null
@@ -1,515 +0,0 @@
-"""
-MaiSaka - OpenAI 兼容 LLM 服务实现
-支持所有兼容 OpenAI Chat Completions 接口的服务商。
-"""
-
-import json
-from typing import Callable, List, Optional
-
-from openai import AsyncOpenAI
-
-import asyncio
-
-from .base import BaseLLMService, ChatResponse, ModelInfo, ToolCall
-from .prompts import get_enabled_chat_tools
-from .utils import format_chat_history, format_chat_history_for_eq, filter_for_api
-from src.prompt.prompt_manager import prompt_manager
-from knowledge import extract_category_ids_from_result
-
-
-def _load_prompt_sync(name: str, **kwargs) -> str:
-    """同步加载并渲染 prompt（用于非异步上下文）"""
-    prompt = prompt_manager.get_prompt(name)
-    for key, value in kwargs.items():
-        prompt.add_context(key, value)
-    # 在新事件循环中运行异步渲染
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        return loop.run_until_complete(prompt_manager.render_prompt(prompt))
-    finally:
-        loop.close()
-
-
-class OpenAILLMService(BaseLLMService):
-    """
-    基于 OpenAI 兼容 API 的 LLM 服务实现。
-    支持所有兼容 OpenAI Chat Completions 接口的服务商。
-    """
-
-    def __init__(
-        self,
-        api_key: str,
-        base_url: Optional[str] = None,
-        model: str = "gpt-4o",
-        chat_system_prompt: Optional[str] = None,
-        temperature: float = 0.5,
-        max_tokens: int = 2048,
-        enable_thinking: Optional[bool] = None,
-    ):
-        """
-        Args:
-            api_key:              API 密钥
-            base_url:             API 基地址 (默认 OpenAI 官方)
-            model:                模型名称
-            chat_system_prompt:   自定义对话系统提示词 (为 None 则使用默认)
-            temperature:          生成温度
-            max_tokens:           最大输出 token 数
-            enable_thinking:      是否启用思考模式 (True/False/None)
-        """
-        self._base_url = base_url or "https://api.openai.com/v1"
-        self._model = model
-        self._temperature = temperature
-        self._max_tokens = max_tokens
-        self._enable_thinking = enable_thinking
-
-        # 如果没有提供自定义提示词，则根据配置动态构建
-        if chat_system_prompt is None:
-            from config import ENABLE_WRITE_FILE, ENABLE_READ_FILE, ENABLE_LIST_FILES, ENABLE_QQ_TOOLS
-
-            # 构建文件工具说明
-            file_tools_parts = []
-            if ENABLE_WRITE_FILE:
-                file_tools_parts.append("• write_file(filename, content) — 在 mai_files 目录下写入文件，支持任意格式。")
-            if ENABLE_READ_FILE:
-                file_tools_parts.append("• read_file(filename) — 读取 mai_files 目录下的文件内容。")
-            if ENABLE_LIST_FILES:
-                file_tools_parts.append("• list_files() — 获取 mai_files 目录下所有文件的元信息列表。")
-
-            # 构建QQ工具说明
-            qq_tools_parts = []
-            if ENABLE_QQ_TOOLS:
-                qq_tools_parts.append("• get_qq_chat_info(chat, limit) — 获取指定 QQ 聊天的聊天记录。")
-                qq_tools_parts.append("• send_info(chat, message) — 发送消息到指定的 QQ 聊天。")
-                qq_tools_parts.append("• list_qq_chats() — 获取所有可用的 QQ 聊天列表。")
-
-            # 合并所有工具说明
-            tools_parts = []
-            if file_tools_parts:
-                tools_parts.extend(file_tools_parts)
-            if qq_tools_parts:
-                tools_parts.extend(qq_tools_parts)
-
-            # 如果有任何工具启用，添加前缀空行
-            if tools_parts:
-                tools_section = "\n" + "\n".join(tools_parts) + "\n"
-            else:
-                tools_section = ""
-
-            # 加载提示词模板并注入工具部分
-            self._chat_system_prompt = _load_prompt_sync("maidairy_chat", file_tools_section=tools_section)
-        else:
-            self._chat_system_prompt = chat_system_prompt
-
-        self._client = AsyncOpenAI(
-            api_key=api_key,
-            base_url=self._base_url,
-        )
-        self._debug_callback: Optional[Callable] = None
-        self._extra_tools: List[dict] = []  # MCP 等外部工具
-
-    def set_extra_tools(self, tools: List[dict]) -> None:
-        """设置额外的工具定义（如 MCP 工具），与内置工具合并使用。"""
-        self._extra_tools = list(tools)
-
-    def set_debug_callback(self, callback: Callable[[str, list, Optional[list], Optional[dict]], None]):
-        """
-        设置调试回调，每次 LLM 调用时触发（调用前和响应后）。
-
-        callback(label, messages, tools, response) — tools 和 response 可为 None。
-        """
-        self._debug_callback = callback
-
-    async def _call_llm(self, label: str, messages: list, tools: Optional[list] = None, **kwargs):
-        """统一 LLM 调用入口：触发 debug 回调后调用 API。"""
-        if self._debug_callback:
-            try:
-                self._debug_callback(label, messages, tools)
-            except Exception:
-                pass
-
-        create_kwargs = {"model": self._model, "messages": messages, **kwargs}
-        if tools:
-            create_kwargs["tools"] = tools
-
-        response = await self._client.chat.completions.create(**create_kwargs)
-
-        # 发送响应结果到调试窗口
-        if self._debug_callback:
-            try:
-                # 转换 tool_calls 为可序列化的格式
-                tool_calls_list = []
-                if response.choices[0].message.tool_calls:
-                    for tc in response.choices[0].message.tool_calls:
-                        tool_calls_list.append({
-                            "id": tc.id,
-                            "type": tc.type,
-                            "function": {
-                                "name": tc.function.name,
-                                "arguments": tc.function.arguments,
-                            },
-                        })
-
-                resp_dict = {
-                    "content": response.choices[0].message.content,
-                    "tool_calls": tool_calls_list,
-                }
-                self._debug_callback(label, messages, tools, resp_dict)
-            except Exception:
-                pass
-
-        return response
-
-    def _build_extra_body(self) -> dict:
-        """构建 extra_body 参数（如 enable_thinking）。"""
-        extra_body = {}
-        if self._enable_thinking is not None:
-            extra_body["enable_thinking"] = self._enable_thinking
-        return extra_body
-
-    def _parse_tool_calls(self, msg) -> List[ToolCall]:
-        """从 API 响应消息中解析工具调用列表。"""
-        tool_calls: List[ToolCall] = []
-        if msg.tool_calls:
-            for tc in msg.tool_calls:
-                try:
-                    args = json.loads(tc.function.arguments) if tc.function.arguments else {}
-                except json.JSONDecodeError:
-                    args = {}
-                tool_calls.append(ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=args,
-                ))
-        return tool_calls
-
-    def _build_raw_message(self, msg) -> dict:
-        """从 API 响应消息构建可追加到对话历史的消息字典。"""
-        raw_message: dict = {"role": "assistant", "content": msg.content}
-        if msg.tool_calls:
-            raw_message["tool_calls"] = [
-                {
-                    "id": tc.id,
-                    "type": "function",
-                    "function": {
-                        "name": tc.function.name,
-                        # 确保 arguments 是有效的 JSON 字符串，空参数用 "{}"
-                        "arguments": tc.function.arguments or "{}",
-                    },
-                }
-                for tc in msg.tool_calls
-            ]
-        return raw_message
-
-    # ──────── 接口实现 ────────
-
-    async def chat_loop_step(self, chat_history: List[dict]) -> ChatResponse:
-        """执行对话循环的一步，返回包含文本和/或工具调用的响应。"""
-        extra_body = self._build_extra_body()
-
-        # 延迟导入配置以避免循环导入
-        from config import ENABLE_WRITE_FILE, ENABLE_READ_FILE, ENABLE_LIST_FILES, ENABLE_QQ_TOOLS
-
-        # 获取根据配置启用的内置工具
-        enabled_tools = get_enabled_chat_tools(
-            enable_write_file=ENABLE_WRITE_FILE,
-            enable_read_file=ENABLE_READ_FILE,
-            enable_list_files=ENABLE_LIST_FILES,
-            enable_qq_tools=ENABLE_QQ_TOOLS,
-        )
-
-        # 合并内置工具与 MCP 等外部工具
-        all_tools = enabled_tools + self._extra_tools
-
-        # 过滤内部字段（如 _type），只保留 API 需要的字段
-        api_messages = filter_for_api(chat_history)
-
-        response = await self._call_llm(
-            "主 Agent 对话",
-            api_messages,
-            tools=all_tools,
-            temperature=self._temperature,
-            max_tokens=self._max_tokens,
-            **({"extra_body": extra_body} if extra_body else {}),
-        )
-
-        msg = response.choices[0].message
-        return ChatResponse(
-            content=msg.content,
-            tool_calls=self._parse_tool_calls(msg),
-            raw_message=self._build_raw_message(msg),
-        )
-
-    def get_model_info(self) -> ModelInfo:
-        return ModelInfo(model_name=self._model, base_url=self._base_url)
-
-    # ──────── Timing 模块（含自我反思功能） ────────
-
-    async def analyze_timing(
-        self, chat_history: List[dict], timing_info: str,
-    ) -> str:
-        """Timing 模块（含自我反思功能）：分析对话的时间维度信息和进行自我反思。"""
-        # 过滤掉感知消息和 system 消息
-        filtered_history = [
-            msg for msg in chat_history
-            if msg.get("_type") != "perception" and msg.get("role") != "system"
-        ]
-        formatted = format_chat_history(filtered_history)
-        timing_prompt = prompt_manager.get_prompt("maidairy_timing")
-        timing_messages = [
-            {"role": "system", "content": await prompt_manager.render_prompt(timing_prompt)},
-            {
-                "role": "user",
-                "content": (
-                    f"【系统时间戳信息】\n{timing_info}\n\n"
-                    f"【当前对话记录】\n{formatted}"
-                ),
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        response = await self._call_llm(
-            "Timing 模块",
-            timing_messages,
-            temperature=0.3,
-            max_tokens=512,
-            **({"extra_body": extra_body} if extra_body else {}),
-        )
-
-        return response.choices[0].message.content or ""
-
-    # ──────── 情商模块 (EQ Module) ────────
-
-    async def analyze_emotion(self, chat_history: List[dict]) -> str:
-        """情商模块：分析用户的情绪状态和言语态度。"""
-        # 过滤掉感知消息（AI 的内部感知不需要再分析）
-        filtered_history = [msg for msg in chat_history if msg.get("_type") != "perception"]
-        # 获取最近几轮对话（约 8-10 条消息，约 3-5 轮）
-        recent_messages = filtered_history[-10:] if len(filtered_history) > 10 else filtered_history
-        # 使用情商模块专用格式化函数：只包含用户回复、助手思考、助手说
-        formatted = format_chat_history_for_eq(recent_messages)
-
-        emotion_prompt = prompt_manager.get_prompt("maidairy_emotion")
-        eq_messages = [
-            {"role": "system", "content": await prompt_manager.render_prompt(emotion_prompt)},
-            {
-                "role": "user",
-                "content": f"以下是最近几轮对话记录，请分析其中用户的情绪状态和言语态度：\n\n{formatted}",
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        response = await self._call_llm(
-            "情商模块 (EQ)",
-            eq_messages,
-            temperature=0.3,
-            max_tokens=512,
-            **({"extra_body": extra_body} if extra_body else {}),
-        )
-
-        return response.choices[0].message.content or ""
-
-    # ──────── 认知模块 (Cognition Module) ────────
-
-    async def analyze_cognition(self, chat_history: List[dict]) -> str:
-        """认知模块：分析用户的意图、认知状态和目的。"""
-        # 过滤掉感知消息（AI 的内部感知不需要再分析）
-        filtered_history = [msg for msg in chat_history if msg.get("_type") != "perception"]
-        # 获取最近几轮对话（约 8-10 条消息，约 3-5 轮）
-        recent_messages = filtered_history[-10:] if len(filtered_history) > 10 else filtered_history
-        # 使用情商模块专用格式化函数：只包含用户回复、助手思考、助手说
-        formatted = format_chat_history_for_eq(recent_messages)
-
-        cognition_prompt = prompt_manager.get_prompt("maidairy_cognition")
-        cognition_messages = [
-            {"role": "system", "content": await prompt_manager.render_prompt(cognition_prompt)},
-            {
-                "role": "user",
-                "content": f"以下是最近几轮对话记录，请分析其中用户的意图、认知状态和目的：\n\n{formatted}",
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        response = await self._call_llm(
-            "认知模块 (Cognition)",
-            cognition_messages,
-            temperature=0.3,
-            max_tokens=512,
-            **({"extra_body": extra_body} if extra_body else {}),
-        )
-
-        return response.choices[0].message.content or ""
-
-    # ──────── 上下文总结模块 ────────
-
-    async def summarize_context(self, context_messages: List[dict]) -> str:
-        """上下文总结模块：对需要压缩的上下文进行总结。"""
-        # 过滤掉 system 消息
-        filtered_messages = [msg for msg in context_messages if msg.get("role") != "system"]
-        formatted = format_chat_history(filtered_messages)
-
-        summarize_prompt = prompt_manager.get_prompt("maidairy_context_summarize")
-        summarize_messages = [
-            {"role": "system", "content": await prompt_manager.render_prompt(summarize_prompt)},
-            {
-                "role": "user",
-                "content": f"请对以下对话内容进行总结，以便存入记忆系统：\n\n{formatted}",
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        try:
-            response = await self._call_llm(
-                "上下文总结",
-                summarize_messages,
-                temperature=0.3,
-                max_tokens=1024,
-                **({"extra_body": extra_body} if extra_body else {}),
-            )
-            return response.choices[0].message.content or ""
-        except Exception:
-            # 总结失败时返回空字符串
-            return ""
-
-    # ──────── 了解模块 (Knowledge Module) ────────
-
-    async def analyze_knowledge_categories(
-        self, context_messages: List[dict], categories_summary: str
-    ) -> List[str]:
-        """
-        了解模块-分类分析：分析对话内容涉及哪些个人特征分类。
-
-        在上下文裁切时触发，分析需要提取哪些分类的个人特征信息。
-        """
-        from knowledge import format_context_for_memory
-
-        context_text = format_context_for_memory(context_messages)
-        if not context_text:
-            return []
-
-        # 加载分类分析 prompt
-        category_prompt = prompt_manager.get_prompt("maidairy_knowledge_category")
-        category_prompt.add_context("categories_summary", categories_summary)
-        prompt = await prompt_manager.render_prompt(category_prompt)
-
-        category_messages = [
-            {"role": "system", "content": prompt},
-            {
-                "role": "user",
-                "content": f"请分析以下对话内容涉及哪些个人特征分类：\n\n{context_text}",
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        try:
-            response = await self._call_llm(
-                "了解模块-分类分析",
-                category_messages,
-                temperature=0.3,
-                max_tokens=256,
-                **({"extra_body": extra_body} if extra_body else {}),
-            )
-            result = response.choices[0].message.content or ""
-            return extract_category_ids_from_result(result)
-        except Exception:
-            return []
-
-    async def extract_knowledge_for_category(
-        self, context_messages: List[dict], category_id: str, category_name: str
-    ) -> str:
-        """
-        了解模块-内容提取：从对话中提取指定分类的个人特征信息。
-
-        为每个分类创建 subAgent，提取相关的个人特征内容。
-        """
-        from knowledge import format_context_for_memory
-
-        context_text = format_context_for_memory(context_messages)
-        if not context_text:
-            return ""
-
-        # 加载内容提取 prompt
-        extract_prompt = prompt_manager.get_prompt("maidairy_knowledge_extract")
-        extract_prompt.add_context("category_name", category_name)
-        prompt = await prompt_manager.render_prompt(extract_prompt)
-
-        extract_messages = [
-            {"role": "system", "content": prompt},
-            {
-                "role": "user",
-                "content": f"请从以下对话内容中提取与「{category_name}」相关的信息：\n\n{context_text}",
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        try:
-            response = await self._call_llm(
-                f"了解模块-{category_name}提取",
-                extract_messages,
-                temperature=0.3,
-                max_tokens=512,
-                **({"extra_body": extra_body} if extra_body else {}),
-            )
-            result = response.choices[0].message.content or ""
-
-            # 检查是否表示"无"
-            if "无" in result or not result.strip():
-                return ""
-
-            return result
-        except Exception:
-            return ""
-
-    async def analyze_knowledge_need(
-        self, chat_history: List[dict], categories_summary: str
-    ) -> List[str]:
-        """
-        了解模块-需求分析：分析当前对话需要哪些个人特征信息。
-
-        在每次对话前触发，分析需要检索哪些分类的了解内容。
-        """
-        # 过滤掉感知消息和 system 消息
-        filtered_history = [
-            msg for msg in chat_history
-            if msg.get("_type") != "perception" and msg.get("role") != "system"
-        ]
-        # 获取最近几轮对话用于分析
-        recent_messages = filtered_history[-10:] if len(filtered_history) > 10 else filtered_history
-        formatted = format_chat_history(recent_messages)
-
-        # 加载需求分析 prompt
-        retrieve_prompt = prompt_manager.get_prompt("maidairy_knowledge_retrieve")
-        retrieve_prompt.add_context("chat_context", formatted)
-        retrieve_prompt.add_context("categories_summary", categories_summary)
-        prompt = await prompt_manager.render_prompt(retrieve_prompt)
-
-        need_messages = [
-            {"role": "system", "content": prompt},
-            {
-                "role": "user",
-                "content": "请分析当前对话需要哪些个人特征信息。",
-            },
-        ]
-        extra_body = self._build_extra_body()
-
-        try:
-            response = await self._call_llm(
-                "了解模块-需求分析",
-                need_messages,
-                temperature=0.3,
-                max_tokens=256,
-                **({"extra_body": extra_body} if extra_body else {}),
-            )
-            result = response.choices[0].message.content or ""
-            return extract_category_ids_from_result(result)
-        except Exception:
-            return []
-
-    # ──────── 对话上下文构建 ────────
-
-    def build_chat_context(self, user_text: str) -> List[dict]:
-        """根据用户初始输入构建对话循环的初始上下文。"""
-        return [
-            {"role": "system", "content": self._chat_system_prompt},
-            {"role": "user", "content": user_text},
-        ]
diff --git a/src/maisaka/llm_service/prompts.py b/src/maisaka/llm_service/prompts.py
deleted file mode 100644
index c678ac59..00000000
--- a/src/maisaka/llm_service/prompts.py
+++ /dev/null
@@ -1,273 +0,0 @@
-"""
-MaiSaka - LLM 工具定义
-所有 Tool Schema 集中管理。
-
-注意：所有 Prompt 模板已迁移至主项目 prompts/ 目录，使用 .prompt 文件存储。
-使用 prompt_manager.get_prompt("maidairy_xxx") 加载模板。
-"""
-
-# ──────────────────── 工具定义 ────────────────────
-
-# 核心工具（始终启用）
-CORE_TOOLS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "say",
-            "description": (
-                "对用户说话。你的所有正式发言都必须通过此工具输出。"
-                "直接输出的 content 文本会被视为你的内心思考，用户无法看到。"
-                "请描述你想要回复的方式、想法和内容，系统会根据你的想法和对话上下文生成具体的回复。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "reason": {
-                        "type": "string",
-                        "description": "你想要回复的方式、想法、内容（例如：'我觉得他说得对，表示认同' 或 '这个观点太离谱了，想质疑一下'）",
-                    }
-                },
-                "required": ["reason"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "wait",
-            "description": (
-                "暂时结束你的发言，把话语权交给用户，等待对方说话。"
-                "指定等待的最大秒数。"
-                "如果用户在等待期间说了话，你会通过工具结果收到内容；"
-                "如果超时对方没有说话，你会收到超时通知。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "seconds": {
-                        "type": "integer",
-                        "description": "等待的秒数（1-24*3600",
-                    }
-                },
-                "required": ["seconds"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "stop",
-            "description": (
-                "结束当前对话循环，进入待机状态。"
-                "调用后主循环会停止，直到用户下次输入新内容时重新唤醒。"
-                "适合在对话自然结束、用户不再回复、或深夜等不适合继续聊天时使用。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {},
-            },
-        },
-    },
-]
-
-# 可选工具（可通过配置启用/禁用）
-OPTIONAL_TOOLS = {
-    "get_qq_chat_info": {
-        "type": "function",
-        "function": {
-            "name": "get_qq_chat_info",
-            "description": (
-                "获取指定 QQ 聊天的聊天记录。"
-                "通过 HTTP 请求获取另一个程序的 QQ 聊天内容，返回最近的聊天消息（纯文本格式）。"
-                "可用于查看用户在 QQ 上的对话，了解用户当前的聊天状态。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "chat": {
-                        "type": "string",
-                        "description": "QQ 聊天标识符，格式如 'qq:群号:group' 或 'qq:QQ号:private'",
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "获取的聊天消息数量限制，默认 20 条",
-                    },
-                },
-                "required": ["chat"],
-            },
-        },
-    },
-    "send_info": {
-        "type": "function",
-        "function": {
-            "name": "send_info",
-            "description": (
-                "发送消息到指定的 QQ 聊天。"
-                "通过 HTTP 请求将消息发送到 QQ，可以发送到群聊或私聊。"
-                "适合在需要主动向 QQ 发送通知、回复或消息时使用。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "chat": {
-                        "type": "string",
-                        "description": "目标 QQ 聊天标识符，格式如 'qq:群号:group' 或 'qq:QQ号:private'",
-                    },
-                    "message": {
-                        "type": "string",
-                        "description": "要发送的消息内容",
-                    },
-                },
-                "required": ["chat", "message"],
-            },
-        },
-    },
-    "list_qq_chats": {
-        "type": "function",
-        "function": {
-            "name": "list_qq_chats",
-            "description": (
-                "获取所有可用的 QQ 群聊列表。"
-                "返回当前可访问的所有 QQ 群聊信息（包括群名、群号、聊天标识符等）。"
-                "可用于查看有哪些 QQ 群聊可以获取消息或发送消息。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {},
-            },
-        },
-    },
-    "write_file": {
-        "type": "function",
-        "function": {
-            "name": "write_file",
-            "description": (
-                "在 mai_files 目录下写入文件，支持任意格式（文本、代码、Markdown等）。"
-                "如果文件已存在，会覆盖原有内容。可用于保存笔记、代码片段、配置等。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "filename": {
-                        "type": "string",
-                        "description": "文件名，可包含路径，如 'notes.txt' 或 'diary/2024-03-09.md'",
-                    },
-                    "content": {
-                        "type": "string",
-                        "description": "要写入的文件内容",
-                    },
-                },
-                "required": ["filename", "content"],
-            },
-        },
-    },
-    "read_file": {
-        "type": "function",
-        "function": {
-            "name": "read_file",
-            "description": (
-                "读取 mai_files 目录下的文件内容。"
-                "返回文件的完整文本内容。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "filename": {
-                        "type": "string",
-                        "description": "要读取的文件名，可包含路径",
-                    },
-                },
-                "required": ["filename"],
-            },
-        },
-    },
-    "list_files": {
-        "type": "function",
-        "function": {
-            "name": "list_files",
-            "description": (
-                "获取 mai_files 目录下所有文件的元信息列表。"
-                "返回每个文件的名称、大小、修改时间等信息，帮助你了解有哪些文件可用。"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {},
-            },
-        },
-    },
-}
-
-# 始终启用的工具
-ALWAYS_ENABLED_TOOLS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "store_context",
-            "description": (
-                "将指定范围的对话上下文存入记忆系统，然后从当前对话中移除这些内容。"
-                "适合在以下情况使用："
-                "1. 对话上下文过长，需要压缩以保持效率"
-                "2. 对话话题已经转换，旧话题的内容可以归档"
-                "3. 遇到重要的对话内容，需要保存到长期记忆中"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "count": {
-                        "type": "integer",
-                        "description": "要存入记忆的消息数量，从最早的消息开始计算。例如传入10会将最早的10条消息存入记忆并移除。",
-                    },
-                    "reason": {
-                        "type": "string",
-                        "description": "说明为什么要存入这段上下文，帮助记忆系统更好地组织信息。例如：「话题从游戏转换到了工作」或「上下文过长需要压缩」。",
-                    },
-                },
-                "required": ["count", "reason"],
-            },
-        },
-    },
-]
-
-# ──────────────────── 主 Agent 工具定义 ────────────────────
-
-# 保持原有的 CHAT_TOOLS 用于向后兼容
-CHAT_TOOLS = CORE_TOOLS + [
-    OPTIONAL_TOOLS["write_file"],
-    OPTIONAL_TOOLS["read_file"],
-    OPTIONAL_TOOLS["list_files"],
-    ALWAYS_ENABLED_TOOLS[0],
-]
-
-
-def get_enabled_chat_tools(
-    enable_write_file: bool = True,
-    enable_read_file: bool = True,
-    enable_list_files: bool = True,
-    enable_qq_tools: bool = False,
-) -> list:
-    """
-    根据配置获取启用的工具列表。
-
-    Args:
-        enable_write_file: 是否启用 write_file 工具
-        enable_read_file: 是否启用 read_file 工具
-        enable_list_files: 是否启用 list_files 工具
-        enable_qq_tools: 是否启用 QQ 工具 (get_qq_chat_info, send_info, list_qq_chats)
-
-    Returns:
-        启用的工具列表
-    """
-    tools = CORE_TOOLS + ALWAYS_ENABLED_TOOLS
-
-    if enable_qq_tools:
-        tools.append(OPTIONAL_TOOLS["get_qq_chat_info"])
-        tools.append(OPTIONAL_TOOLS["send_info"])
-        tools.append(OPTIONAL_TOOLS["list_qq_chats"])
-    if enable_write_file:
-        tools.append(OPTIONAL_TOOLS["write_file"])
-    if enable_read_file:
-        tools.append(OPTIONAL_TOOLS["read_file"])
-    if enable_list_files:
-        tools.append(OPTIONAL_TOOLS["list_files"])
-
-    return tools
diff --git a/src/maisaka/llm_service/utils.py b/src/maisaka/llm_service/utils.py
deleted file mode 100644
index a8f232ad..00000000
--- a/src/maisaka/llm_service/utils.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""
-MaiSaka - LLM 服务工具函数
-"""
-
-from typing import Literal
-
-# ──────────────────── 消息类型 ────────────────────
-
-MessageType = Literal["user", "assistant", "system", "perception"]
-
-# 内部使用的字段前缀，用于标记不应发送给 API 的元数据
-INTERNAL_FIELD_PREFIX = "_"
-
-# 消息类型字段名
-MSG_TYPE_FIELD = "_type"
-
-
-# ──────────────────── 消息构建 ────────────────────
-
-def build_message(role: str, content: str, msg_type: MessageType = "user", **kwargs) -> dict:
-    """
-    构建消息字典，包含消息类型标记。
-
-    Args:
-        role: 消息角色 (user/assistant/system)
-        content: 消息内容
-        msg_type: 消息类型 (user/assistant/system/perception)
-        **kwargs: 其他字段（如 tool_calls）
-
-    Returns:
-        消息字典
-    """
-    msg = {"role": role, "content": content, MSG_TYPE_FIELD: msg_type, **kwargs}
-    return msg
-
-
-def filter_for_api(messages: list[dict]) -> list[dict]:
-    """
-    过滤消息列表，移除内部字段，用于发送给 API。
-
-    Args:
-        messages: 原始消息列表
-
-    Returns:
-        过滤后的消息列表（移除所有以 _ 开头的字段）
-    """
-    return [
-        {k: v for k, v in msg.items() if not k.startswith(INTERNAL_FIELD_PREFIX)}
-        for msg in messages
-    ]
-
-
-def filter_by_type(messages: list[dict], msg_type: MessageType) -> list[dict]:
-    """
-    按消息类型过滤消息列表。
-
-    Args:
-        messages: 原始消息列表
-        msg_type: 要保留的消息类型
-
-    Returns:
-        只包含指定类型的消息列表
-    """
-    return [msg for msg in messages if msg.get(MSG_TYPE_FIELD) == msg_type]
-
-
-def remove_last_perception(messages: list[dict]) -> None:
-    """
-    移除最后一条感知消息（直接修改原列表）。
-
-    Args:
-        messages: 消息列表（会被原地修改）
-    """
-    for i in range(len(messages) - 1, -1, -1):
-        if messages[i].get(MSG_TYPE_FIELD) == "perception":
-            messages.pop(i)
-            break
-
-
-def format_chat_history(messages: list) -> str:
-    """将聊天消息列表格式化为可读文本，用于子代理上下文构建。"""
-    parts: list[str] = []
-    for msg in messages:
-        role = msg.get("role", "?")
-        content = msg.get("content", "") or ""
-        if role == "system":
-            parts.append(f"[系统] {content[:500]}")
-        elif role == "user":
-            parts.append(f"[用户] {content[:500]}")
-        elif role == "assistant":
-            if content:
-                parts.append(f"[助手思考] {content[:500]}")
-            for tc in msg.get("tool_calls", []):
-                func = tc.get("function", {})
-                name = func.get("name", "?")
-                args = func.get("arguments", "")
-                if isinstance(args, str) and len(args) > 200:
-                    args = args[:200] + "..."
-                parts.append(f"[助手调用 {name}] {args}")
-        elif role == "tool":
-            parts.append(f"[工具结果] {content[:300]}")
-    return "\n".join(parts)
-
-
-def format_chat_history_for_eq(messages: list) -> str:
-    """
-    将聊天消息列表格式化为可读文本，专门用于情商模块。
-
-    只包含三种内容：
-    1. 模型自身思考内容（assistant 的 content）
-    2. 模型 say 的结果内容（say 工具的结果）
-    3. 用户回复内容（user 消息）
-
-    不包含：工具调用本身、其他工具的结果
-    """
-    parts: list[str] = []
-    say_tool_call_ids = set()
-
-    # 第一遍：收集所有 say 工具的 tool_call_id
-    for msg in messages:
-        if msg.get("role") == "assistant" and "tool_calls" in msg:
-            for tc in msg.get("tool_calls", []):
-                func = tc.get("function", {})
-                if func.get("name") == "say":
-                    say_tool_call_ids.add(tc.get("id", ""))
-
-    # 第二遍：格式化消息
-    for msg in messages:
-        role = msg.get("role", "?")
-        content = msg.get("content", "") or ""
-
-        if role == "user":
-            parts.append(f"[用户] {content[:500]}")
-        elif role == "assistant":
-            # 只包含助手思考内容，不包含工具调用本身
-            if content:
-                parts.append(f"[助手思考] {content[:500]}")
-        elif role == "tool":
-            # 只包含 say 工具的结果
-            tool_call_id = msg.get("tool_call_id", "")
-            if tool_call_id in say_tool_call_ids:
-                parts.append(f"[助手说] {content[:500]}")
-
-    return "\n".join(parts)
diff --git a/src/maisaka/replyer.py b/src/maisaka/replyer.py
index 543ad5c5..2546ae82 100644
--- a/src/maisaka/replyer.py
+++ b/src/maisaka/replyer.py
@@ -4,10 +4,32 @@ MaiSaka - Reply 回复生成器
 """
 
 from typing import Optional
-from datetime import datetime
-from src.prompt.prompt_manager import prompt_manager
-from llm_service import BaseLLMService
-from llm_service.utils import format_chat_history
+from llm_service import MaiSakaLLMService
+
+
+def format_chat_history(messages: list) -> str:
+    """将聊天消息列表格式化为可读文本。"""
+    parts: list[str] = []
+    for msg in messages:
+        role = msg.get("role", "?")
+        content = msg.get("content", "") or ""
+        if role == "system":
+            parts.append(f"[系统] {content[:500]}")
+        elif role == "user":
+            parts.append(f"[用户] {content[:500]}")
+        elif role == "assistant":
+            if content:
+                parts.append(f"[助手思考] {content[:500]}")
+            for tc in msg.get("tool_calls", []):
+                func = tc.get("function", {})
+                name = func.get("name", "?")
+                args = func.get("arguments", "")
+                if isinstance(args, str) and len(args) > 200:
+                    args = args[:200] + "..."
+                parts.append(f"[助手调用 {name}] {args}")
+        elif role == "tool":
+            parts.append(f"[工具结果] {content[:300]}")
+    return "\n".join(parts)
 
 
 class Replyer:
@@ -17,7 +39,7 @@ class Replyer:
     根据给定的想法（reason）和对话上下文，生成符合人设的口语化回复。
     """
 
-    def __init__(self, llm_service: Optional[BaseLLMService] = None):
+    def __init__(self, llm_service: Optional[MaiSakaLLMService] = None):
         """
         初始化回复器。
 
@@ -27,7 +49,7 @@ class Replyer:
         self._llm_service = llm_service
         self._enabled = True
 
-    def set_llm_service(self, llm_service: BaseLLMService) -> None:
+    def set_llm_service(self, llm_service: MaiSakaLLMService) -> None:
         """设置 LLM 服务"""
         self._llm_service = llm_service
 
@@ -49,48 +71,6 @@ class Replyer:
         if not self._enabled or not reason or self._llm_service is None:
             return "..."
 
-        # 获取当前时间
-        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-        # 格式化对话历史（过滤掉 system 消息，保留其他内容）
-        filtered_history = [
-            msg for msg in chat_history
-            if msg.get("role") != "system" and msg.get("_type") != "perception"
-        ]
-        formatted_history = format_chat_history(filtered_history)
-
-        # 构建回复消息
-        replyer_prompt = prompt_manager.get_prompt("maidairy_replyer")
-        system_prompt = await prompt_manager.render_prompt(replyer_prompt)
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {
-                "role": "user",
-                "content": (
-                    f"当前时间：{current_time}\n\n"
-                    f"【聊天记录】\n{formatted_history}\n\n"
-                    f"【你的想法】\n{reason}\n\n"
-                    f"现在，你说："
-                ),
-            },
-        ]
-
-        try:
-            # 调用 LLM 生成回复
-            from llm_service.openai_impl import OpenAILLMService
-            if isinstance(self._llm_service, OpenAILLMService):
-                extra_body = self._llm_service._build_extra_body()
-                response = await self._llm_service._call_llm(
-                    "回复生成",
-                    messages,
-                    temperature=0.8,
-                    max_tokens=512,
-                    **({"extra_body": extra_body} if extra_body else {}),
-                )
-                result = response.choices[0].message.content or "..."
-                return result.strip()
-        except Exception:
-            pass
-
-        # 生成失败时返回默认回复
-        return "..."
+        # 直接使用 LLM 服务的 generate_reply 方法
+        # 该方法使用主项目的 replyer 模型配置
+        return await self._llm_service.generate_reply(reason, chat_history)
diff --git a/src/maisaka/tool_handlers.py b/src/maisaka/tool_handlers.py
index c00624aa..ff4220d7 100644
--- a/src/maisaka/tool_handlers.py
+++ b/src/maisaka/tool_handlers.py
@@ -21,7 +21,7 @@ from rich.markdown import Markdown
 
 from config import console
 from input_reader import InputReader
-from llm_service import BaseLLMService
+from llm_service import MaiSakaLLMService
 from replyer import Replyer
 
 if TYPE_CHECKING:
@@ -35,7 +35,7 @@ MAI_FILES_DIR = Path(os.path.join(os.path.dirname(os.path.abspath(__file__)), "m
 _replyer: Optional[Replyer] = None
 
 
-def get_replyer(llm_service: BaseLLMService) -> Replyer:
+def get_replyer(llm_service: MaiSakaLLMService) -> Replyer:
     """获取回复器实例（单例模式）"""
     global _replyer
     if _replyer is None:
@@ -50,7 +50,7 @@ class ToolHandlerContext:
 
     def __init__(
         self,
-        llm_service: BaseLLMService,
+        llm_service: MaiSakaLLMService,
         reader: InputReader,
         user_input_times: list[datetime],
     ):