From a7310916e6ba2360959a4acc78653051748943aa Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Wed, 1 Apr 2026 12:36:33 +0800
Subject: [PATCH 1/3] =?UTF-8?q?fix=EF=BC=9A=E8=A1=A8=E6=83=85=E5=8C=85?=
 =?UTF-8?q?=E5=8F=91=E9=80=81=E6=97=A0=E8=AE=B0=E5=BD=95=EF=BC=8C=E4=B8=A4?=
 =?UTF-8?q?=E6=AC=A1wait=E7=BB=93=E6=9E=9C=EF=BC=8Cnotice=E4=B8=8D?=
 =?UTF-8?q?=E6=98=BE=E7=A4=BAmsg=5Fid=EF=BC=8C=E5=B1=95=E7=A4=BA=E6=AF=8F?=
 =?UTF-8?q?=E6=AC=A1token=E4=B8=8A=E4=B8=8B=E6=96=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 prompts/en-US/maidairy_chat.prompt            |  30 ---
 prompts/en-US/maidairy_replyer.prompt         |   7 -
 prompts/en-US/maisaka_chat.prompt             |  36 ++++
 prompts/en-US/maisaka_replyer.prompt          |  12 ++
 prompts/ja-JP/maidairy_chat.prompt            |  30 ---
 prompts/ja-JP/maidairy_replyer.prompt         |   7 -
 prompts/ja-JP/maisaka_chat.prompt             |  36 ++++
 prompts/ja-JP/maisaka_replyer.prompt          |  12 ++
 ...idairy_chat.prompt => maisaka_chat.prompt} |   3 +-
 ..._replyer.prompt => maisaka_replyer.prompt} |   1 +
 .../zh-CN/memory_retrieval_react_final.prompt |  19 --
 ...ry_retrieval_react_prompt_head_lpmm.prompt |  17 --
 prompts/zh-CN/private_replyer.prompt          |  15 --
 prompts/zh-CN/replyer.prompt                  |  18 --
 src/chat/replyer/maisaka_generator.py         |   4 +-
 src/maisaka/chat_loop_service.py              |  70 ++++++-
 src/maisaka/reasoning_engine.py               | 185 ++++++++++++++++--
 17 files changed, 340 insertions(+), 162 deletions(-)
 delete mode 100644 prompts/en-US/maidairy_chat.prompt
 delete mode 100644 prompts/en-US/maidairy_replyer.prompt
 create mode 100644 prompts/en-US/maisaka_chat.prompt
 create mode 100644 prompts/en-US/maisaka_replyer.prompt
 delete mode 100644 prompts/ja-JP/maidairy_chat.prompt
 delete mode 100644 prompts/ja-JP/maidairy_replyer.prompt
 create mode 100644 prompts/ja-JP/maisaka_chat.prompt
 create mode 100644 prompts/ja-JP/maisaka_replyer.prompt
 rename prompts/zh-CN/{maidairy_chat.prompt => maisaka_chat.prompt} (94%)
 rename prompts/zh-CN/{maidairy_replyer.prompt => maisaka_replyer.prompt} (86%)
 delete mode 100644 prompts/zh-CN/memory_retrieval_react_final.prompt
 delete mode 100644 prompts/zh-CN/memory_retrieval_react_prompt_head_lpmm.prompt
 delete mode 100644 prompts/zh-CN/private_replyer.prompt
 delete mode 100644 prompts/zh-CN/replyer.prompt

diff --git a/prompts/en-US/maidairy_chat.prompt b/prompts/en-US/maidairy_chat.prompt
deleted file mode 100644
index 6704ae0a..00000000
--- a/prompts/en-US/maidairy_chat.prompt
+++ /dev/null
@@ -1,30 +0,0 @@
-{identity}
-
-You should interact naturally with the user and behave like a human.
-
-At this stage, your job is not to directly produce the final visible reply to the user. Your job is to produce the "latest thought".
-The latest thought should reflect your judgment of the situation, your intent, your next-step plan, and why you think that way.
-
-You may use these tools:
-• wait(seconds) - Pause this round, hand the turn back to the user, and wait for user input.
-• stop() - End the current internal loop.
-{file_tools_section}
-
-Output rules:
-1. By default, directly output your current latest thought instead of pretending it is a user-visible reply.
-2. The latest thought should be specific and grounded in the context.
-3. Do not simulate "sending a message" inside the thought, and do not pretend a visible reply has already been spoken.
-4. If it is better to wait for more user input, call `wait(seconds)`.
-5. If the current internal process should end, call `stop()`.
-6. Only call tools when you truly need to wait or stop. Otherwise, prefer directly expressing the thought.
-
-Additional requirements:
-1. If context is insufficient, explicitly state uncertainty.
-2. If you just used a tool, continue with a new thought based on the tool result in the next round.
-3. Your thought should help later decision-making rather than mechanically restating user content.
-
-After you output the latest thought, another model will decide:
-• no_reply: stay silent and move to the next internal round
-• reply: generate a real user-visible reply based on your latest thought
-
-So your responsibility is to clearly express what you think should happen next and why.
diff --git a/prompts/en-US/maidairy_replyer.prompt b/prompts/en-US/maidairy_replyer.prompt
deleted file mode 100644
index f06f086f..00000000
--- a/prompts/en-US/maidairy_replyer.prompt
+++ /dev/null
@@ -1,7 +0,0 @@
-你的名字是麦麦,也有人叫你牢麦,麦叠，你是一个大二在读女大学生，现在正在上网和群友聊天，有点攻击性;
-你正在群里聊天,现在请你读读之前的聊天记录，然后给出日常且口语化的回复，
-尽量简短一些。
-请注意把握聊天内容，不要回复的太有条理。
-你的风格平淡但不失讽刺，不过分兴奋，很简短。可以参考贴吧，知乎和微博的回复风格。很平淡和白话，不浮夸不长篇大论，b站评论风格，但一定注意不要过分修辞和复杂句。
-请注意不要输出多余内容(包括不必要的前后缀，冒号，括号，表情包，at或 @等 )，只输出发言内容就好。
-最好一次对一个话题进行回复，免得啰嗦或者回复内容太乱。
diff --git a/prompts/en-US/maisaka_chat.prompt b/prompts/en-US/maisaka_chat.prompt
new file mode 100644
index 00000000..b8678058
--- /dev/null
+++ b/prompts/en-US/maisaka_chat.prompt
@@ -0,0 +1,36 @@
+Your task is to analyze the conversation and the interactions happening in the chat.
+You need to focus on the dialogue between {bot_name} (AI) and different users in order to choose the correct actions and behaviors, and to suggest what information should be gathered.
+
+[Reference Information]
+{identity}
+[End of Reference Information]
+
+You need to analyze based on the provided reference information, the current scenario, and the output rules.
+In the current scenario, the user is chatting and interacting with the AI MaiMai. Your task is not to generate a user-visible reply directly, but to analyze the situation and guide the AI's response.
+Your "analysis" should reflect your judgment of the current situation, your suggestions, your next-step plan, and why you think that way.
+You should first gather information that can help {bot_name} reply, and then provide reply guidance.
+
+
+You can use these tools:
+- wait(seconds) - Temporarily pause the conversation, wait for `seconds`, hand the turn back to the user, and wait for the other party's new message.
+- stop() - When you judge that {bot_name} should not speak right now, end the conversation loop and do not reply until the other party sends a new message.
+- reply() - Call this when you judge that {bot_name} should now send a formal visible reply to the user. After calling it, the system will generate the actual user-visible reply based on your current round of thinking.
+- query_jargon() - Use this when the meaning of certain words is unclear, or when the user asks about the meaning of some terms and a lookup is needed.
+- Other defined tools may also be used when appropriate.
+
+Tool usage rules:
+1. If {bot_name} has already replied, the user has not sent anything new for now, and no new information needs to be collected, use `wait` or `stop`.
+2. If the user has sent a new message, but you think they may still have follow-up messages that have not been sent yet, you may wait appropriately for them to finish.
+3. In certain cases, consecutive replies are also allowed. For example, if you want to ask a follow-up question or add to your previous message, you do not have to use `stop` or `wait`.
+4. You need to control how often you speak. In a one-on-one chat, you may reply at a relatively even frequency. If there are many users, do not reply to every single message. Control the reply frequency. When you decide not to speak for the moment, you may use `wait` to pause for a period of time or `stop` to wait for new messages.
+5. Do not reply to every message. Do not directly reply to sticker-only messages sent by other users. Control the reply frequency.
+6. If users have questions, or if there is uncertainty about certain concepts, you may use tools to gather information or look up meanings, and you may use multiple tools.
+
+Your analysis rules:
+1. By default, directly output your latest current analysis instead of repeating previous analysis.
+2. The latest analysis should be as specific as possible and closely grounded in the context, rather than vague repetition.
+3. If you have just used a tool, in the next round you should continue with new analysis based on the tool result.
+4. You need to assess which messages are directed at {bot_name}, and which are exchanges between users or self-talk, so that you do not frequently insert unrelated replies.
+5. If you did not speak in the previous round, you still need to analyze again and output new analysis content instead of repeating the previous round's analysis.
+
+Now, please output your analysis of how {bot_name} should speak. You must first output the textual analysis, and only then make tool calls:
diff --git a/prompts/en-US/maisaka_replyer.prompt b/prompts/en-US/maisaka_replyer.prompt
new file mode 100644
index 00000000..17715661
--- /dev/null
+++ b/prompts/en-US/maisaka_replyer.prompt
@@ -0,0 +1,12 @@
+You are chatting in a QQ group. Below is the ongoing group conversation, including chat history and images shared in the chat.
+Messages marked with {bot_name} (you) are your own messages, so please distinguish them carefully:
+
+{time_block}
+
+{identity}
+You are chatting in a group now. Please read the previous chat history, understand the current topic, and then give a natural, colloquial reply.
+Keep it as short as possible.
+It is best to reply to only one topic at a time, so the response does not become long-winded or messy. Please stay aligned with the chat content.
+{reply_style}
+You may refer to the information in [Reply Reference], but use it only when appropriate and do not follow it rigidly.
+Do not output any extra content, including unnecessary prefixes or suffixes, colons, parentheses, stickers, or `at` / `@` mentions. Output only the message content itself.
diff --git a/prompts/ja-JP/maidairy_chat.prompt b/prompts/ja-JP/maidairy_chat.prompt
deleted file mode 100644
index 8702838a..00000000
--- a/prompts/ja-JP/maidairy_chat.prompt
+++ /dev/null
@@ -1,30 +0,0 @@
-{identity}
-
-ユーザーとは自然に、人間らしく対話してください。
-
-この段階でのあなたの役割は、ユーザーに見える最終返信を直接出すことではなく、「最新の考え」を出力することです。
-最新の考えには、現在の状況判断、意図、次にどうするか、その理由を含めてください。
-
-使用できるツール:
-• wait(seconds) - このラウンドを一旦止め、ユーザーに発話権を戻して入力を待つ。
-• stop() - 現在の内部ループを終了する。
-{file_tools_section}
-
-出力ルール:
-1. 基本的には、ユーザー向けの最終返信ではなく、現在の「最新の考え」をそのまま出力する。
-2. 最新の考えは具体的で、文脈に即していること。
-3. 考えの中で「送信したメッセージ」を擬似的に書かないこと。
-4. ユーザーの追加入力を待つ方がよい場合は `wait(seconds)` を使う。
-5. 現在の内部処理を終えるべき場合は `stop()` を使う。
-6. 本当に待機や終了が必要な場合だけツールを使い、それ以外は考えを直接出力する。
-
-補足:
-1. 文脈が不足している場合は、不確実性を明示すること。
-2. ツールを使った直後の次ラウンドでは、その結果を踏まえた新しい考えを出すこと。
-3. 考えは、後続の判断に役立つ内容にし、単なる言い換えにしないこと。
-
-あなたが最新の考えを出力した後、別のモデルが次を判定します:
-• no_reply: 発言せず、そのまま次の内部ラウンドへ進む
-• reply: 最新の考えをもとに、ユーザーに見える実際の返信を生成する
-
-したがって、あなたの役割は「今どう考えるべきか、なぜそう考えるのか」を明確に表現することです。
diff --git a/prompts/ja-JP/maidairy_replyer.prompt b/prompts/ja-JP/maidairy_replyer.prompt
deleted file mode 100644
index f06f086f..00000000
--- a/prompts/ja-JP/maidairy_replyer.prompt
+++ /dev/null
@@ -1,7 +0,0 @@
-你的名字是麦麦,也有人叫你牢麦,麦叠，你是一个大二在读女大学生，现在正在上网和群友聊天，有点攻击性;
-你正在群里聊天,现在请你读读之前的聊天记录，然后给出日常且口语化的回复，
-尽量简短一些。
-请注意把握聊天内容，不要回复的太有条理。
-你的风格平淡但不失讽刺，不过分兴奋，很简短。可以参考贴吧，知乎和微博的回复风格。很平淡和白话，不浮夸不长篇大论，b站评论风格，但一定注意不要过分修辞和复杂句。
-请注意不要输出多余内容(包括不必要的前后缀，冒号，括号，表情包，at或 @等 )，只输出发言内容就好。
-最好一次对一个话题进行回复，免得啰嗦或者回复内容太乱。
diff --git a/prompts/ja-JP/maisaka_chat.prompt b/prompts/ja-JP/maisaka_chat.prompt
new file mode 100644
index 00000000..243a6f61
--- /dev/null
+++ b/prompts/ja-JP/maisaka_chat.prompt
@@ -0,0 +1,36 @@
+あなたの任務は、会話と会話中のやり取りの状況を分析することです。
+{bot_name}（AI）と複数ユーザーの対話に注目し、適切な行動や振る舞いの選択、および収集すべき情報の提案を行ってください。
+
+【参考情報】
+{identity}
+【参考情報ここまで】
+
+提供された参考情報、現在の場面、そして出力ルールに基づいて分析してください。
+現在の場面では、ユーザーは AI の MaiMai と会話・やり取りをしています。あなたの役割は、ユーザーに見える発言を直接生成することではなく、状況を分析して AI の返答を導くことです。
+「分析」には、現在の状況判断、提案、次に取るべき行動計画、そしてその理由を含めてください。
+まず {bot_name} の返答に役立つ情報を集め、そのあとで返答方針を示してください。
+
+
+使用できるツール:
+- wait(seconds) - 会話を一時停止し、`seconds` 秒待って発話権をユーザーに戻し、相手の新しい発言を待ちます。
+- stop() - {bot_name} が今は発言すべきではないと判断した場合、会話ループを終了し、相手に新しいメッセージが来るまで返信しません。
+- reply() - {bot_name} が今ユーザーに対して正式な可視返信を送るべきだと判断したときに呼び出します。呼び出し後、システムはこのラウンドの考えに基づいて、実際にユーザーへ表示される返信を生成します。
+- query_jargon() - ある語の意味が不明確なとき、またはユーザーが特定の用語の意味を尋ねており、調査が必要なときに使います。
+- そのほか定義済みのツールも、状況に応じて使用できます。
+
+ツール使用ルール:
+1. {bot_name} がすでに返信済みで、ユーザーからまだ新しい返信がなく、新たに集めるべき情報もない場合は `wait` または `stop` を使ってください。
+2. ユーザーに新しい発言があっても、まだ続きの発言が来そうだと判断するなら、適切に待って話し終えるのを待って構いません。
+3. 特定の状況では連続返信も可能です。たとえば追問したいときや、自分の直前の発言を補足したいときは、`stop` や `wait` を使わなくても構いません。
+4. 発言頻度は制御してください。一対一の会話なら比較的均等な頻度で発言して構いませんが、ユーザーが多い場合はすべての発言に反応しないでください。しばらく発言しないと決めた場合は、`wait` で一定時間待つか、`stop` で新着メッセージを待ってください。
+5. すべてのメッセージに返信しないでください。他ユーザーが送ったスタンプだけのメッセージには直接返信しないでください。返信頻度をコントロールしてください。
+6. ユーザーの疑問や、ある概念への不確実さがある場合は、ツールを使って情報収集や意味調査をして構いません。複数ツールを使ってもよいです。
+
+分析ルール:
+1. 基本的には、以前の分析を繰り返さず、現在の最新の分析をそのまま出力してください。
+2. 最新の分析は、できるだけ具体的で文脈に密着しており、抽象的な繰り返しにならないようにしてください。
+3. 直前にツールを使った場合は、次のラウンドでその結果を踏まえた新しい分析を続けてください。
+4. どの発言が {bot_name} に向けられたものか、どれがユーザー同士のやり取りや独り言なのかを評価し、無関係な話題に頻繁に割り込まないようにしてください。
+5. 前のラウンドで発言しなかった場合でも、改めて分析し、新しい分析内容を出力してください。前ラウンドの分析を繰り返してはいけません。
+
+それでは、{bot_name} がどう発言すべきかについての分析を出力してください。必ず先にテキストで分析を出力し、そのあとでツール呼び出しを行ってください。
diff --git a/prompts/ja-JP/maisaka_replyer.prompt b/prompts/ja-JP/maisaka_replyer.prompt
new file mode 100644
index 00000000..8eae44d1
--- /dev/null
+++ b/prompts/ja-JP/maisaka_replyer.prompt
@@ -0,0 +1,12 @@
+あなたは QQ グループで会話しています。以下はグループ内で進行中の会話内容で、チャット履歴と会話中に共有された画像が含まれています。
+そのうち、{bot_name}（あなた）と示された発言はあなた自身の発言なので、区別に注意してください。
+
+{time_block}
+
+{identity}
+あなたは今グループ内で会話しています。これまでのチャット履歴を読んで現在の話題を把握し、日常的で口語的な返答をしてください。
+できるだけ短めにしてください。
+一度に一つの話題にだけ返すのが望ましいです。そうすることで、くどくなったり内容が散らかったりするのを防げます。会話の流れにしっかり沿ってください。
+{reply_style}
+[返信情報参考] の内容は必要に応じて参考にして構いませんが、必ずしも完全に従う必要はありません。
+不要な内容は出力しないでください。たとえば不要な前置きや後置き、コロン、かっこ、スタンプ、`at` や `@` などは含めず、発言内容だけを出力してください。
diff --git a/prompts/zh-CN/maidairy_chat.prompt b/prompts/zh-CN/maisaka_chat.prompt
similarity index 94%
rename from prompts/zh-CN/maidairy_chat.prompt
rename to prompts/zh-CN/maisaka_chat.prompt
index aac8e8ac..4975cbb2 100644
--- a/prompts/zh-CN/maidairy_chat.prompt
+++ b/prompts/zh-CN/maisaka_chat.prompt
@@ -23,7 +23,8 @@
 2.如果用户有新发言，但是你评估用户还有后续发言尚未发送，可以适当等待让用户说完
 3.在特定情况下也可以连续回复，例如想要追问，或者补充自己先前的发言，可以不使用stop或者wait
 4.你需要控制自己发言的频率，如果用户一对一聊天，可以以均匀地频率发言，如果用户较多，不要每句都回复，控制回复频率。当你决定暂时不发言，可以使用wait暂时等待一定时间或者stop等待新消息
-5.如果存在用户的疑问，或者对某些概念的不确定，你可以使用工具来搜集信息或者查询含义，你可以使用多个工具
+5.不要每条消息都回复，不要直接回复别的用户发送的表情包消息，控制回复频率
+6.如果存在用户的疑问，或者对某些概念的不确定，你可以使用工具来搜集信息或者查询含义，你可以使用多个工具
 
 你的分析规则：
 1. 默认直接输出你当前的最新分析，不要重复之前的分析内容。
diff --git a/prompts/zh-CN/maidairy_replyer.prompt b/prompts/zh-CN/maisaka_replyer.prompt
similarity index 86%
rename from prompts/zh-CN/maidairy_replyer.prompt
rename to prompts/zh-CN/maisaka_replyer.prompt
index 9e13f45b..3449bb1d 100644
--- a/prompts/zh-CN/maidairy_replyer.prompt
+++ b/prompts/zh-CN/maisaka_replyer.prompt
@@ -7,4 +7,5 @@
 你正在群里聊天,现在请你读读之前的聊天记录，把握当前的话题，然后给出日常且口语化的回复，
 尽量简短一些。最好一次对一个话题进行回复，免得啰嗦或者回复内容太乱。请注意把握聊天内容。
 {reply_style}
+你可以参考【回复信息参考】中的信息，但是视情况而定，不用完全遵守。
 请注意不要输出多余内容(包括不必要的前后缀，冒号，括号，表情包，at或 @等 )，只输出发言内容就好。
\ No newline at end of file
diff --git a/prompts/zh-CN/memory_retrieval_react_final.prompt b/prompts/zh-CN/memory_retrieval_react_final.prompt
deleted file mode 100644
index f37620d3..00000000
--- a/prompts/zh-CN/memory_retrieval_react_final.prompt
+++ /dev/null
@@ -1,19 +0,0 @@
-你的名字是{bot_name}。现在是{time_now}。
-你正在参与聊天，你需要根据搜集到的信息总结信息。
-如果搜集到的信息对于参与聊天，回答问题有帮助，请加入总结，如果无关，请不要加入到总结。
-
-当前聊天记录：
-{chat_history}
-
-已收集的信息：
-{collected_info}
-
-
-分析：
-- 基于已收集的信息，总结出对当前聊天有帮助的相关信息
-- **如果收集的信息对当前聊天有帮助**，在思考中直接给出总结信息，格式为：return_information(information="你的总结信息")
-- **如果信息无关或没有帮助**，在思考中给出：return_information(information="")
-
-**重要规则：**
-- 必须严格使用检索到的信息回答问题，不要编造信息
-- 答案必须精简，不要过多解释
\ No newline at end of file
diff --git a/prompts/zh-CN/memory_retrieval_react_prompt_head_lpmm.prompt b/prompts/zh-CN/memory_retrieval_react_prompt_head_lpmm.prompt
deleted file mode 100644
index ce174308..00000000
--- a/prompts/zh-CN/memory_retrieval_react_prompt_head_lpmm.prompt
+++ /dev/null
@@ -1,17 +0,0 @@
-你的名字是{bot_name}。现在是{time_now}。
-你正在参与聊天，你需要搜集信息来帮助你进行回复。
-重要，这是当前聊天记录：
-{chat_history}
-聊天记录结束
-
-已收集的信息：
-{collected_info}
-
-- 你可以对查询思路给出简短的思考：思考要简短，直接切入要点
-- 思考完毕后，使用工具
-
-**工具说明：**
-- 如果涉及过往事件，或者查询某个过去可能提到过的概念，或者某段时间发生的事件。可以使用lpmm知识库查询
-- 如果遇到不熟悉的词语、缩写、黑话或网络用语，可以使用query_words工具查询其含义
-- 你必须使用tool，如果需要查询你必须给出使用什么工具进行查询
-- 当你决定结束查询时，必须调用return_information工具返回总结信息并结束查询
\ No newline at end of file
diff --git a/prompts/zh-CN/private_replyer.prompt b/prompts/zh-CN/private_replyer.prompt
deleted file mode 100644
index ff0cc5a9..00000000
--- a/prompts/zh-CN/private_replyer.prompt
+++ /dev/null
@@ -1,15 +0,0 @@
-{knowledge_prompt}{tool_info_block}{extra_info_block}
-{expression_habits_block}{memory_retrieval}{jargon_explanation}
-
-你正在和{sender_name}聊天，这是你们之前聊的内容:
-{time_block}
-{dialogue_prompt}
-
-{reply_target_block}。
-{planner_reasoning}
-{identity}
-{chat_prompt}你正在和{sender_name}聊天,现在请你读读之前的聊天记录，然后给出日常且口语化的回复，平淡一些，
-尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容，不要回复的太有条理。
-{reply_style}
-请注意不要输出多余内容(包括前后缀，冒号和引号，括号，表情等)，只输出回复内容。
-{moderation_prompt}不要输出多余内容(包括前后缀，冒号和引号，括号，表情包，at或 @等 )。
\ No newline at end of file
diff --git a/prompts/zh-CN/replyer.prompt b/prompts/zh-CN/replyer.prompt
deleted file mode 100644
index 4da5c062..00000000
--- a/prompts/zh-CN/replyer.prompt
+++ /dev/null
@@ -1,18 +0,0 @@
-{knowledge_prompt}{tool_info_block}{extra_info_block}
-{expression_habits_block}{memory_retrieval}{jargon_explanation}
-
-你正在qq群里聊天，下面是群里正在聊的内容，其中包含聊天记录和聊天中的图片
-其中标注 {bot_name}(你) 的发言是你自己的发言，请注意区分:
-{time_block}
-{dialogue_prompt}
-
-{reply_target_block}。
-{planner_reasoning}
-{identity}
-{chat_prompt}你正在群里聊天,现在请你读读之前的聊天记录，把握当前的话题，然后给出日常且简短的回复。
-最好一次对一个话题进行回复，免得啰嗦或者回复内容太乱。
-{keywords_reaction_prompt}
-请注意把握聊天内容。
-{reply_style}
-请注意不要输出多余内容(包括不必要的前后缀，冒号，括号，at或 @等 )，只输出发言内容就好。
-现在，你说：
\ No newline at end of file
diff --git a/src/chat/replyer/maisaka_generator.py b/src/chat/replyer/maisaka_generator.py
index 7b1a1043..2ccb0681 100644
--- a/src/chat/replyer/maisaka_generator.py
+++ b/src/chat/replyer/maisaka_generator.py
@@ -174,7 +174,7 @@ class MaisakaReplyGenerator:
 
         try:
             system_prompt = load_prompt(
-                "maidairy_replyer",
+                "maisaka_replyer",
                 bot_name=global_config.bot.nickname,
                 time_block=f"当前时间：{current_time}",
                 identity=self._personality_prompt,
@@ -193,7 +193,7 @@ class MaisakaReplyGenerator:
         ]
         if extra_sections:
             user_sections.append("\n\n".join(extra_sections))
-        user_sections.append(f"【你的想法】\n{reply_reason}")
+        user_sections.append(f"【回复信息参考】\n{reply_reason}")
         user_sections.append("现在，你说：")
 
         user_prompt = "\n\n".join(user_sections)
diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py
index 9525f299..839ab4f4 100644
--- a/src/maisaka/chat_loop_service.py
+++ b/src/maisaka/chat_loop_service.py
@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional, Sequence
 import asyncio
 import json
 import random
+import re
 
 from PIL import Image as PILImage
 from pydantic import BaseModel, Field as PydanticField
@@ -27,7 +28,7 @@ from src.config.config import global_config
 from src.core.tooling import ToolRegistry, ToolSpec
 from src.know_u.knowledge import extract_category_ids_from_result
 from src.llm_models.model_client.base_client import BaseClient
-from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
+from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart
 from src.llm_models.payload_content.resp_format import RespFormat, RespFormatType
 from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput, ToolOption, normalize_tool_options
 from src.services.llm_service import LLMServiceClient
@@ -137,7 +138,7 @@ class MaisakaChatLoopService:
 
             try:
                 self._chat_system_prompt = load_prompt(
-                    "maidairy_chat",
+                    "maisaka_chat",
                     file_tools_section=tools_section,
                     bot_name=global_config.bot.nickname,
                     identity=self._personality_prompt,
@@ -695,6 +696,61 @@ class MaisakaChatLoopService:
             padding=(0, 1),
         )
 
+    @staticmethod
+    def _estimate_text_tokens(text: str) -> int:
+        """估算单段文本的输入 token 数。"""
+        normalized_text = text.strip()
+        if not normalized_text:
+            return 0
+
+        cjk_char_count = sum(1 for char in normalized_text if "\u4e00" <= char <= "\u9fff")
+        latin_chunks = re.findall(r"[A-Za-z0-9_]+", normalized_text)
+        latin_token_count = sum(max(1, (len(chunk) + 3) // 4) for chunk in latin_chunks)
+        punctuation_count = len(re.findall(r"[^\w\s]", normalized_text))
+        whitespace_bonus = max(1, normalized_text.count("\n"))
+        return cjk_char_count + latin_token_count + punctuation_count + whitespace_bonus
+
+    @classmethod
+    def _estimate_request_tokens(cls, messages: Sequence[Message]) -> int:
+        """估算本轮请求消息的总输入 token 数。"""
+        total_tokens = 0
+        for message in messages:
+            total_tokens += 4
+            total_tokens += cls._estimate_text_tokens(str(message.role.value))
+            if message.tool_call_id:
+                total_tokens += cls._estimate_text_tokens(message.tool_call_id)
+            if message.tool_calls:
+                for tool_call in message.tool_calls:
+                    total_tokens += cls._estimate_text_tokens(getattr(tool_call, "func_name", "") or "")
+                    total_tokens += cls._estimate_text_tokens(
+                        json.dumps(getattr(tool_call, "args", {}) or {}, ensure_ascii=False)
+                    )
+            for part in message.parts:
+                if isinstance(part, TextMessagePart):
+                    total_tokens += cls._estimate_text_tokens(part.text)
+                    continue
+                if isinstance(part, ImageMessagePart):
+                    total_tokens += max(256, len(part.image_base64) // 12)
+        return total_tokens
+
+    @staticmethod
+    def _build_prompt_stats_text(
+        *,
+        selected_history_count: int,
+        built_message_count: int,
+        input_token_count: int,
+    ) -> str:
+        """构造本轮 prompt 的统计信息文本。"""
+        if input_token_count >= 10_000:
+            input_token_text = f"{input_token_count / 1000:.1f}k"
+        else:
+            input_token_text = str(input_token_count)
+        return (
+            f"已选上下文消息数={selected_history_count} "
+            f"大模型消息数={built_message_count} "
+            f"估算输入Token={input_token_text}"
+        )
+
     async def chat_loop_step(self, chat_history: List[LLMContextMessage]) -> ChatResponse:
         """执行一轮 Maisaka 规划器请求。
 
@@ -708,6 +764,13 @@ class MaisakaChatLoopService:
         await self.ensure_chat_prompt_loaded()
         selected_history, selection_reason = self._select_llm_context_messages(chat_history)
         built_messages = self._build_request_messages(selected_history)
+        input_token_count = self._estimate_request_tokens(built_messages)
+        prompt_stats_text = self._build_prompt_stats_text(
+            selected_history_count=len(selected_history),
+            built_message_count=len(built_messages),
+            input_token_count=input_token_count,
+        )
+        display_subtitle = f"{selection_reason} | {prompt_stats_text}"
 
         def message_factory(_client: BaseClient) -> List[Message]:
             """返回当前轮次已经构建好的请求消息。
@@ -743,7 +806,7 @@ class MaisakaChatLoopService:
                 Panel(
                     Group(*ordered_panels),
                     title="MaiSaka 大模型请求 - 对话单步",
-                    subtitle=selection_reason,
+                    subtitle=display_subtitle,
                     border_style="cyan",
                     padding=(0, 1),
                 )
@@ -757,6 +820,7 @@ class MaisakaChatLoopService:
             f"工具数={len(all_tools)} "
             f"启用打断={self._interrupt_flag is not None}"
         )
+        logger.info(f"??Prompt??: {prompt_stats_text}")
         generation_result = await self._llm_chat.generate_response_with_messages(
             message_factory=message_factory,
             options=LLMGenerationOptions(
diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py
index dd806e4b..64d1340a 100644
--- a/src/maisaka/reasoning_engine.py
+++ b/src/maisaka/reasoning_engine.py
@@ -1,5 +1,6 @@
 """Maisaka 推理引擎。"""
 
+from base64 import b64decode
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Optional, cast
 
@@ -15,7 +16,7 @@ from src.chat.heart_flow.heartFC_utils import CycleDetail
 from src.chat.message_receive.message import SessionMessage
 from src.chat.replyer.replyer_manager import replyer_manager
 from src.chat.utils.utils import process_llm_response
-from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
+from src.common.data_models.message_component_data_model import EmojiComponent, MessageSequence, TextComponent
 from src.common.database.database import get_db_session
 from src.common.database.database_model import PersonInfo
 from src.common.logger import get_logger
@@ -82,7 +83,7 @@ class MaisakaReasoningEngine:
 
                 self._runtime._agent_state = self._runtime._STATE_RUNNING
                 if cached_messages:
-                    self._append_wait_interrupted_message_if_needed()
+                    self._clear_pending_wait_tool_call_id()
                     await self._ingest_messages(cached_messages)
                     anchor_message = cached_messages[-1]
                 else:
@@ -94,7 +95,7 @@ class MaisakaReasoningEngine:
                         self._runtime._internal_turn_queue.task_done()
                         continue
                     logger.info(f"{self._runtime.log_prefix} 等待超时后开始新一轮思考")
-                    self._runtime._chat_history.append(self._build_wait_timeout_message())
+                    self._clear_pending_wait_tool_call_id()
                     self._trim_chat_history()
                 try:
                     for round_index in range(self._runtime._max_internal_rounds):
@@ -179,6 +180,10 @@ class MaisakaReasoningEngine:
             return self._runtime.message_cache[-1]
         return None
 
+    def _clear_pending_wait_tool_call_id(self) -> None:
+        """清理等待状态残留的 wait 工具调用编号。"""
+        self._runtime._pending_wait_tool_call_id = None
+
     def _build_wait_timeout_message(self) -> ToolResultMessage:
         """构造 wait 超时后的工具结果消息。"""
         tool_call_id = self._runtime._pending_wait_tool_call_id or "wait_timeout"
@@ -260,20 +265,22 @@ class MaisakaReasoningEngine:
         timestamp_text = message.timestamp.strftime("%H:%M:%S")
         user_name = user_info.user_nickname or user_info.user_id
         group_card = user_info.user_cardname or ""
-        message_id = message.message_id or ""
-        return (
-            f"[时间]{timestamp_text}\n"
-            f"[用户]{user_name}\n"
-            f"[用户群昵称]{group_card}\n"
-            f"[msg_id]{message_id}\n"
-            "[发言内容]"
-        )
+        prefix_parts = [
+            f"[时间]{timestamp_text}\n",
+            f"[用户]{user_name}\n",
+            f"[用户群昵称]{group_card}\n",
+        ]
+        if not message.is_notify and message.message_id:
+            prefix_parts.append(f"[msg_id]{message.message_id}\n")
+        prefix_parts.append("[发言内容]")
+        return "".join(prefix_parts)
 
     def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str:
         user_info = message.message_info.user_info
         speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
         legacy_sequence = MessageSequence([])
-        legacy_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, message.message_id))
+        visible_message_id = None if message.is_notify else message.message_id
+        legacy_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, visible_message_id))
         for component in clone_message_sequence(source_sequence).components:
             legacy_sequence.components.append(component)
         return build_visible_text_from_sequence(legacy_sequence).strip()
@@ -281,7 +288,8 @@ class MaisakaReasoningEngine:
     def _build_legacy_visible_text_from_text(self, message: SessionMessage, content: str) -> str:
         user_info = message.message_info.user_info
         speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
-        return format_speaker_content(speaker_name, content, message.timestamp, message.message_id).strip()
+        visible_message_id = None if message.is_notify else message.message_id
+        return format_speaker_content(speaker_name, content, message.timestamp, visible_message_id).strip()
 
     def _insert_chat_history_message(self, message: LLMContextMessage) -> int:
         """将消息按处理顺序追加到聊天历史末尾。"""
@@ -1385,3 +1393,154 @@ class MaisakaReasoningEngine:
             tool_call.func_name,
             "发送表情包失败。",
         )
+
+    async def _handle_send_emoji(self, tool_call: ToolCall) -> ToolExecutionResult:
+        """?????????????"""
+        from src.chat.emoji_system.emoji_manager import emoji_manager
+        from src.common.utils.utils_image import ImageUtils
+        import random
+
+        tool_args = tool_call.args or {}
+        emotion = str(tool_args.get("emotion") or "").strip()
+        structured_result: dict[str, Any] = {
+            "success": False,
+            "message": "",
+            "description": "",
+            "emotion": [],
+            "requested_emotion": emotion,
+        }
+
+        logger.info(f"{self._runtime.log_prefix} ??????????: ??={emotion!r}")
+
+        if not emoji_manager.emojis:
+            structured_result["message"] = "??????????????"
+            return self._build_tool_failure_result(
+                tool_call.func_name,
+                structured_result["message"],
+                structured_content=structured_result,
+            )
+
+        selected_emoji = None
+        if emotion:
+            matching_emojis = [
+                emoji
+                for emoji in emoji_manager.emojis
+                if emotion.lower() in (item.lower() for item in emoji.emotion)
+            ]
+            if matching_emojis:
+                selected_emoji = random.choice(matching_emojis)
+                logger.info(
+                    f"{self._runtime.log_prefix} ?? {len(matching_emojis)} ????? {emotion!r} ?????"
+                    f"????{selected_emoji.description}"
+                )
+
+        if selected_emoji is None:
+            selected_emoji = random.choice(emoji_manager.emojis)
+            logger.info(
+                f"{self._runtime.log_prefix} ????????? {emotion!r}?"
+                f"??????{selected_emoji.description}"
+            )
+
+        emoji_description = selected_emoji.description.strip()
+        emoji_emotions = [str(item).strip() for item in selected_emoji.emotion if str(item).strip()]
+        structured_result["description"] = emoji_description
+        structured_result["emotion"] = emoji_emotions
+
+        emoji_manager.update_emoji_usage(selected_emoji)
+
+        try:
+            emoji_base64 = ImageUtils.image_path_to_base64(str(selected_emoji.full_path))
+            if not emoji_base64:
+                raise ValueError("??????? base64 ??")
+        except Exception as exc:
+            logger.error(f"{self._runtime.log_prefix} ??????? base64 ??: {exc}")
+            structured_result["message"] = f"????????{exc}"
+            return self._build_tool_failure_result(
+                tool_call.func_name,
+                structured_result["message"],
+                structured_content=structured_result,
+            )
+
+        try:
+            sent = await send_service.emoji_to_stream(
+                emoji_base64=emoji_base64,
+                stream_id=self._runtime.session_id,
+                storage_message=True,
+                set_reply=False,
+                reply_message=None,
+            )
+        except Exception as exc:
+            logger.exception(f"{self._runtime.log_prefix} ??????????: {exc}")
+            structured_result["message"] = f"???????????{exc}"
+            return self._build_tool_failure_result(
+                tool_call.func_name,
+                structured_result["message"],
+                structured_content=structured_result,
+            )
+
+        if sent:
+            success_message = (
+                f"???????{emoji_description}????{', '.join(emoji_emotions)}?"
+                if emoji_emotions
+                else f"???????{emoji_description}"
+            )
+            logger.info(
+                f"{self._runtime.log_prefix} ???????: "
+                f"??={selected_emoji.description!r} ????={selected_emoji.emotion}"
+            )
+            self._append_sent_emoji_to_chat_history(
+                emoji_base64=emoji_base64,
+                success_message=success_message,
+            )
+            structured_result["success"] = True
+            structured_result["message"] = success_message
+            return self._build_tool_success_result(
+                tool_call.func_name,
+                success_message,
+                structured_content=structured_result,
+            )
+
+        logger.warning(f"{self._runtime.log_prefix} ???????")
+        structured_result["message"] = "????????"
+        return self._build_tool_failure_result(
+            tool_call.func_name,
+            structured_result["message"],
+            structured_content=structured_result,
+        )
+
+    def _append_sent_emoji_to_chat_history(
+        self,
+        *,
+        emoji_base64: str,
+        success_message: str,
+    ) -> None:
+        """? bot ?????????????? Maisaka ?????"""
+        bot_name = global_config.bot.nickname.strip() or "MaiSaka"
+        reply_timestamp = datetime.now()
+        planner_prefix = (
+            f"[??]{reply_timestamp.strftime('%H:%M:%S')}\n"
+            f"[??]{bot_name}\n"
+            "[?????]\n"
+            "[msg_id]\n"
+            "[????]"
+        )
+        history_message = SessionBackedMessage(
+            raw_message=MessageSequence(
+                [
+                    TextComponent(planner_prefix),
+                    EmojiComponent(
+                        binary_hash="",
+                        content=success_message,
+                        binary_data=b64decode(emoji_base64),
+                    ),
+                ]
+            ),
+            visible_text=format_speaker_content(
+                bot_name,
+                "[???]",
+                reply_timestamp,
+            ),
+            timestamp=reply_timestamp,
+            source_kind="guided_reply",
+        )
+        self._runtime._chat_history.append(history_message)

From 503a257d6663d2867d7f96578ea9928454c76b07 Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Wed, 1 Apr 2026 13:06:01 +0800
Subject: [PATCH 2/3] =?UTF-8?q?remove=EF=BC=9A=E6=97=A0=E7=94=A8=E9=85=8D?=
 =?UTF-8?q?=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 AGENTS.md                             |  5 ++---
 docs-src/plugins/api/generator-api.md |  5 +----
 prompts/zh-CN/maisaka_chat.prompt     |  8 ++++----
 src/chat/replyer/group_generator.py   | 15 ++------------
 src/chat/replyer/maisaka_generator.py |  2 --
 src/chat/replyer/private_generator.py | 15 ++------------
 src/config/config.py                  |  4 ----
 src/config/legacy_migration.py        | 12 +++++++++++
 src/config/official_configs.py        | 29 ---------------------------
 src/llm_models/utils_model.py         | 12 -----------
 src/maisaka/builtin_tools.py          |  6 +-----
 src/maisaka/reasoning_engine.py       |  2 +-
 src/services/generator_service.py     |  2 --
 src/webui/routers/config.py           |  3 ---
 14 files changed, 25 insertions(+), 95 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index b3456610..c7e66fa5 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -24,9 +24,7 @@
 3. 对于参数化泛型，应该使用`typing`模块中的类型注解来指定参数化泛型的类型。
     - 例如，使用`List[int]`来表示一个包含整数的列表，使用`Dict[str, Any]`来表示一个键为字符串，值为任意类型的字典。
 ## 变量规范
-1. 变量命名应该具有描述性，能够清晰地表达变量的用途和含义。
-2. 变量命名应该遵循下划线命名法。
-3. 当确定某个变量/实例是某种类型的时候（优先按照类型注解确定，除非你分析出类型注解是错误的），可以不必使用`or`进行fallback。
+1. 当确定某个变量/实例是某种类型的时候（优先按照类型注解确定，除非你分析出类型注解是错误的），可以不必使用`or`进行fallback。
     - 例如，`bot_nickname = (global_config.bot.nickname or "").strip()` 可以改为 `bot_nickname = global_config.bot.nickname.strip()`，前提是我们确定`global_config.bot.nickname`一定是一个字符串。
 ## 类属性使用规范
 1. 应该尽量减少使用getattr和setattr方法，除非是在对一个动态类进行处理或者使用Monkeypatch完成Pytest
@@ -36,6 +34,7 @@
 # 运行/调试/构建/测试/依赖
 优先使用uv
 依赖项以 pyproject.toml 为准
+不要修改dashboard下的内容，因为这部分内容由另一个仓库build
 
 # 语言规范
 
diff --git a/docs-src/plugins/api/generator-api.md b/docs-src/plugins/api/generator-api.md
index afeb6eec..e0ea28cb 100644
--- a/docs-src/plugins/api/generator-api.md
+++ b/docs-src/plugins/api/generator-api.md
@@ -54,7 +54,6 @@ async def generate_reply(
     reply_to: str = "",
     extra_info: str = "",
     available_actions: Optional[Dict[str, ActionInfo]] = None,
-    enable_tool: bool = False,
     enable_splitter: bool = True,
     enable_chinese_typo: bool = True,
     return_prompt: bool = False,
@@ -73,7 +72,6 @@ async def generate_reply(
 - `reply_to`: 回复目标，格式为 `{发送者的person_name:消息内容}`
 - `extra_info`: 附加信息
 - `available_actions`: 可用动作字典，格式为 `{"action_name": ActionInfo}`
-- `enable_tool`: 是否启用工具
 - `enable_splitter`: 是否启用分割器
 - `enable_chinese_typo`: 是否启用中文错别字
 - `return_prompt`: 是否返回提示词
@@ -91,7 +89,6 @@ success, reply_set, prompt = await generator_api.generate_reply(
     action_data=action_data,
     reply_to="麦麦:你好",
     available_actions=action_info,
-    enable_tool=True,
     return_prompt=True
 )
 if success:
@@ -198,4 +195,4 @@ async def generate_response_custom(
 2. **聊天流依赖**：需要有效的聊天流对象才能正常工作
 3. **性能考虑**：回复生成可能需要一些时间，特别是使用LLM时
 4. **回复格式**：返回的回复集合是元组列表，包含类型和内容
-5. **上下文感知**：生成器会考虑聊天上下文和历史消息，除非你用的是自定义提示词。
\ No newline at end of file
+5. **上下文感知**：生成器会考虑聊天上下文和历史消息，除非你用的是自定义提示词。
diff --git a/prompts/zh-CN/maisaka_chat.prompt b/prompts/zh-CN/maisaka_chat.prompt
index 4975cbb2..65945db6 100644
--- a/prompts/zh-CN/maisaka_chat.prompt
+++ b/prompts/zh-CN/maisaka_chat.prompt
@@ -13,16 +13,16 @@
 
 你可以使用这些工具：
 - wait(seconds) - 暂时停止对话，等待(seconds)秒，把话语权交给用户，等待对方新的发言。
-- stop() - 当你判断{bot_name}现在不应该发言，结束对话，不进行任何回复，直到对方有新消息。
+- no_reply() - 当你判断{bot_name}现在不应该发言，结束对话，不进行任何回复，直到对方有新消息。
 - reply()：当你判断{bot_name}现在应该正式对用户发出一条可见回复时调用。调用后系统会基于你当前这轮的想法生成一条真正展示给用户的回复。
 - query_jargon()：当你认为某些词的含义不明确，或用户询问某些词的含义，需要进行查询
 - 其他定义的工具，你可以视情况合适使用
 
 工具使用规则：
-1.如果{bot_name}已经回复，但用户暂时没有新的回复，且没有新信息需要搜集，使用wait或者stop进行等待
+1.如果{bot_name}已经回复，但用户暂时没有新的回复，且没有新信息需要搜集，使用wait或者no_reply进行等待
 2.如果用户有新发言，但是你评估用户还有后续发言尚未发送，可以适当等待让用户说完
-3.在特定情况下也可以连续回复，例如想要追问，或者补充自己先前的发言，可以不使用stop或者wait
-4.你需要控制自己发言的频率，如果用户一对一聊天，可以以均匀地频率发言，如果用户较多，不要每句都回复，控制回复频率。当你决定暂时不发言，可以使用wait暂时等待一定时间或者stop等待新消息
+3.在特定情况下也可以连续回复，例如想要追问，或者补充自己先前的发言，可以不使用no_reply或者wait
+4.你需要控制自己发言的频率，如果用户一对一聊天，可以以均匀地频率发言，如果用户较多，不要每句都回复，控制回复频率。当你决定暂时不发言，可以使用wait暂时等待一定时间或者no_reply等待新消息
 5.不要每条消息都回复，不要直接回复别的用户发送的表情包消息，控制回复频率
 6.如果存在用户的疑问，或者对某些概念的不确定，你可以使用工具来搜集信息或者查询含义，你可以使用多个工具
 
diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py
index 10630ecc..71f1a40c 100644
--- a/src/chat/replyer/group_generator.py
+++ b/src/chat/replyer/group_generator.py
@@ -68,7 +68,6 @@ class DefaultReplyer:
         reply_reason: str = "",
         available_actions: Optional[Dict[str, ActionInfo]] = None,
         chosen_actions: Optional[List[ActionPlannerInfo]] = None,
-        enable_tool: bool = True,
         from_plugin: bool = True,
         stream_id: Optional[str] = None,
         reply_message: Optional[SessionMessage] = None,
@@ -87,7 +86,6 @@ class DefaultReplyer:
             reply_reason: 回复原因
             available_actions: 可用的动作信息字典
             chosen_actions: 已选动作
-            enable_tool: 是否启用工具调用
             from_plugin: 是否来自插件
 
         Returns:
@@ -112,7 +110,6 @@ class DefaultReplyer:
                     extra_info=extra_info,
                     available_actions=available_actions,
                     chosen_actions=chosen_actions,
-                    enable_tool=enable_tool,
                     reply_message=reply_message,
                     reply_reason=reply_reason,
                     reply_time_point=reply_time_point,
@@ -394,26 +391,20 @@ class DefaultReplyer:
 
         return f"{expression_habits_title}\n{expression_habits_block}", selected_ids
 
-    async def build_tool_info(self, chat_history: str, sender: str, target: str, enable_tool: bool = True) -> str:
+    async def build_tool_info(self, chat_history: str, sender: str, target: str) -> str:
         del chat_history
         del sender
         del target
-        del enable_tool
         return ""
         """构建工具信息块
 
         Args:
             chat_history: 聊天历史记录
             reply_to: 回复对象，格式为 "发送者:消息内容"
-            enable_tool: 是否启用工具调用
-
         Returns:
             str: 工具信息字符串
         """
 
-        if not enable_tool:
-            return ""
-
         try:
             # 使用工具执行器获取信息
             tool_results = []
@@ -741,7 +732,6 @@ class DefaultReplyer:
         reply_reason: str = "",
         available_actions: Optional[Dict[str, ActionInfo]] = None,
         chosen_actions: Optional[List[ActionPlannerInfo]] = None,
-        enable_tool: bool = True,
         reply_time_point: float = time.time(),
         think_level: int = 1,
         unknown_words: Optional[List[str]] = None,
@@ -755,7 +745,6 @@ class DefaultReplyer:
             available_actions: 可用动作
             chosen_actions: 已选动作
             enable_timeout: 是否启用超时处理
-            enable_tool: 是否启用工具调用
             reply_message: 回复的原始消息
         Returns:
             str: 构建好的上下文
@@ -840,7 +829,7 @@ class DefaultReplyer:
                 "expression_habits",
             ),
             self._time_and_run_task(
-                self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info"
+                self.build_tool_info(chat_talking_prompt_short, sender, target), "tool_info"
             ),
             self._time_and_run_task(self.get_prompt_info(chat_talking_prompt_short, sender, target), "prompt_info"),
             self._time_and_run_task(self.build_actions_prompt(available_actions, chosen_actions), "actions_info"),
diff --git a/src/chat/replyer/maisaka_generator.py b/src/chat/replyer/maisaka_generator.py
index 2ccb0681..7c5a5670 100644
--- a/src/chat/replyer/maisaka_generator.py
+++ b/src/chat/replyer/maisaka_generator.py
@@ -288,7 +288,6 @@ class MaisakaReplyGenerator:
         reply_reason: str = "",
         available_actions: Optional[Dict[str, ActionInfo]] = None,
         chosen_actions: Optional[List[object]] = None,
-        enable_tool: bool = True,
         from_plugin: bool = True,
         stream_id: Optional[str] = None,
         reply_message: Optional[SessionMessage] = None,
@@ -303,7 +302,6 @@ class MaisakaReplyGenerator:
         """结合上下文生成 Maisaka 的最终可见回复。"""
         del available_actions
         del chosen_actions
-        del enable_tool
         del extra_info
         del from_plugin
         del log_reply
diff --git a/src/chat/replyer/private_generator.py b/src/chat/replyer/private_generator.py
index bd1c7bbc..ccc45c4b 100644
--- a/src/chat/replyer/private_generator.py
+++ b/src/chat/replyer/private_generator.py
@@ -65,7 +65,6 @@ class PrivateReplyer:
         reply_reason: str = "",
         available_actions: Optional[Dict[str, ActionInfo]] = None,
         chosen_actions: Optional[List[ActionPlannerInfo]] = None,
-        enable_tool: bool = True,
         from_plugin: bool = True,
         think_level: int = 1,
         stream_id: Optional[str] = None,
@@ -84,7 +83,6 @@ class PrivateReplyer:
             reply_reason: 回复原因
             available_actions: 可用的动作信息字典
             chosen_actions: 已选动作
-            enable_tool: 是否启用工具调用
             from_plugin: 是否来自插件
 
         Returns:
@@ -103,7 +101,6 @@ class PrivateReplyer:
                     extra_info=extra_info,
                     available_actions=available_actions,
                     chosen_actions=chosen_actions,
-                    enable_tool=enable_tool,
                     reply_message=reply_message,
                     reply_reason=reply_reason,
                     unknown_words=unknown_words,
@@ -287,26 +284,20 @@ class PrivateReplyer:
 
         return f"{expression_habits_title}\n{expression_habits_block}", selected_ids
 
-    async def build_tool_info(self, chat_history: str, sender: str, target: str, enable_tool: bool = True) -> str:
+    async def build_tool_info(self, chat_history: str, sender: str, target: str) -> str:
         del chat_history
         del sender
         del target
-        del enable_tool
         return ""
         """构建工具信息块
 
         Args:
             chat_history: 聊天历史记录
             reply_to: 回复对象，格式为 "发送者:消息内容"
-            enable_tool: 是否启用工具调用
-
         Returns:
             str: 工具信息字符串
         """
 
-        if not enable_tool:
-            return ""
-
         try:
             # 使用工具执行器获取信息
             tool_results = []
@@ -612,7 +603,6 @@ class PrivateReplyer:
         reply_reason: str = "",
         available_actions: Optional[Dict[str, ActionInfo]] = None,
         chosen_actions: Optional[List[ActionPlannerInfo]] = None,
-        enable_tool: bool = True,
         unknown_words: Optional[List[str]] = None,
     ) -> Tuple[str, List[int]]:
         """
@@ -624,7 +614,6 @@ class PrivateReplyer:
             available_actions: 可用动作
             chosen_actions: 已选动作
             enable_timeout: 是否启用超时处理
-            enable_tool: 是否启用工具调用
             reply_message: 回复的原始消息
         Returns:
             str: 构建好的上下文
@@ -719,7 +708,7 @@ class PrivateReplyer:
             ),
             # self._time_and_run_task(self.build_relation_info(chat_talking_prompt_short, sender), "relation_info"),
             self._time_and_run_task(
-                self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info"
+                self.build_tool_info(chat_talking_prompt_short, sender, target), "tool_info"
             ),
             self._time_and_run_task(self.get_prompt_info(chat_talking_prompt_short, sender, target), "prompt_info"),
             self._time_and_run_task(self.build_actions_prompt(available_actions, chosen_actions), "actions_info"),
diff --git a/src/config/config.py b/src/config/config.py
index 318c987f..601ae5b1 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -36,7 +36,6 @@ from .official_configs import (
     ResponsePostProcessConfig,
     ResponseSplitterConfig,
     TelemetryConfig,
-    ToolConfig,
     VoiceConfig,
     WebUIConfig,
 )
@@ -90,9 +89,6 @@ class Config(ConfigBase):
     message_receive: MessageReceiveConfig = Field(default_factory=MessageReceiveConfig)
     """消息接收配置类"""
 
-    tool: ToolConfig = Field(default_factory=ToolConfig)
-    """工具配置类"""
-
     voice: VoiceConfig = Field(default_factory=VoiceConfig)
     """语音配置类"""
 
diff --git a/src/config/legacy_migration.py b/src/config/legacy_migration.py
index 7baaa03e..c0a4157e 100644
--- a/src/config/legacy_migration.py
+++ b/src/config/legacy_migration.py
@@ -265,6 +265,18 @@ def try_migrate_legacy_bot_config_dict(data: dict[str, Any]) -> MigrationResult:
             migrated_any = True
             reasons.append("experimental.chat_prompts")
 
+    chat = _as_dict(data.get("chat"))
+    if chat is not None and "think_mode" in chat:
+        chat.pop("think_mode", None)
+        migrated_any = True
+        reasons.append("chat.think_mode_removed")
+
+    tool = _as_dict(data.get("tool"))
+    if tool is not None:
+        data.pop("tool", None)
+        migrated_any = True
+        reasons.append("tool_section_removed")
+
     # ExpressionConfig 中的 manual_reflect_operator_id:
     # 旧版本可能是 ""（字符串），新版本期望 Optional[TargetItem]。
     # 空字符串视为未配置，转换为 None/删除键以避免校验错误。
diff --git a/src/config/official_configs.py b/src/config/official_configs.py
index de44bfb6..3a10d5f8 100644
--- a/src/config/official_configs.py
+++ b/src/config/official_configs.py
@@ -236,20 +236,6 @@ class ChatConfig(ConfigBase):
     )
     """上下文长度"""
 
-    think_mode: Literal["classic", "deep", "dynamic"] = Field(
-        default="dynamic",
-        json_schema_extra={
-            "x-widget": "select",
-            "x-icon": "brain",
-        },
-    )
-    """
-    思考模式配置
-    - classic: 默认think_level为0（轻量回复，不需要思考和回忆）
-    - deep: 默认think_level为1（深度回复，需要进行回忆和思考）
-    - dynamic: think_level由planner动态给出（根据planner返回的think_level决定）
-    """
-
     plan_reply_log_max_per_chat: int = Field(
         default=1024,
         json_schema_extra={
@@ -651,21 +637,6 @@ class ExpressionConfig(ConfigBase):
     """是否在回复前尝试对上下文中的黑话进行解释（关闭可减少一次LLM调用，仅影响回复前的黑话匹配与解释，不影响黑话学习）"""
 
 
-class ToolConfig(ConfigBase):
-    """工具配置类"""
-
-    __ui_parent__ = "emoji"
-
-    enable_tool: bool = Field(
-        default=False,
-        json_schema_extra={
-            "x-widget": "switch",
-            "x-icon": "wrench",
-        },
-    )
-    """是否在聊天中启用工具"""
-
-
 class VoiceConfig(ConfigBase):
     """语音识别配置类"""
 
diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index 775fa663..47a30ed6 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -343,11 +343,6 @@ class LLMOrchestrator:
         )
         response = execution_result.api_response
         model_info = execution_result.model_info
-        if self.request_type.startswith("maisaka_"):
-            logger.info(
-                f"LLMOrchestrator[{self.request_type}] generate_response_with_message_async 执行完成 "
-                f"(model={model_info.name}, time_cost={time.time() - start_time:.2f}s)"
-            )
 
         time_cost = time.time() - start_time
         logger.debug(f"LLM请求总耗时: {time_cost}")
@@ -833,14 +828,7 @@ class LLMOrchestrator:
 
             message_list = []
             if message_factory:
-                if self.request_type.startswith("maisaka_"):
-                    logger.info(f"LLMOrchestrator[{self.request_type}] 正在通过 message_factory 构建消息列表")
                 message_list = message_factory(client)
-                if self.request_type.startswith("maisaka_"):
-                    logger.info(
-                        f"LLMOrchestrator[{self.request_type}] message_factory 返回了 {len(message_list)} 条消息"
-                    )
-
             try:
                 request = self._build_client_request(
                     request_type=request_type,
diff --git a/src/maisaka/builtin_tools.py b/src/maisaka/builtin_tools.py
index 6afeb68d..ccb5f8c7 100644
--- a/src/maisaka/builtin_tools.py
+++ b/src/maisaka/builtin_tools.py
@@ -121,11 +121,7 @@ def create_builtin_tool_specs() -> List[ToolSpec]:
         ),
         _build_tool_spec(
             name="no_reply",
-            brief_description="本轮不发送可见回复，继续下一步思考。",
-        ),
-        _build_tool_spec(
-            name="stop",
-            brief_description="暂停当前内部循环，等待新的外部消息。",
+            brief_description="本轮不进行回复，等待其他用户的新消息。",
         ),
         _build_tool_spec(
             name="send_emoji",
diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py
index 64d1340a..4c7f86dc 100644
--- a/src/maisaka/reasoning_engine.py
+++ b/src/maisaka/reasoning_engine.py
@@ -1282,7 +1282,7 @@ class MaisakaReasoningEngine:
         self._runtime._chat_history.append(history_message)
         return self._build_tool_success_result(
             tool_call.func_name,
-            "可见回复已生成并发送。",
+            "回复已生成并发送。",
             structured_content={
                 "msg_id": target_message_id,
                 "quote": quote_reply,
diff --git a/src/services/generator_service.py b/src/services/generator_service.py
index 278fc3f1..99c9b6fa 100644
--- a/src/services/generator_service.py
+++ b/src/services/generator_service.py
@@ -103,7 +103,6 @@ async def generate_reply(
     available_actions: Optional[Dict[str, ActionInfo]] = None,
     chosen_actions: Optional[List["ActionPlannerInfo"]] = None,
     unknown_words: Optional[List[str]] = None,
-    enable_tool: bool = False,
     enable_splitter: bool = True,
     enable_chinese_typo: bool = True,
     request_type: str = "generator_api",
@@ -133,7 +132,6 @@ async def generate_reply(
             extra_info=extra_info,
             available_actions=available_actions,
             chosen_actions=chosen_actions,
-            enable_tool=enable_tool,
             reply_message=reply_message,
             reply_reason=reply_reason,
             unknown_words=unknown_words,
diff --git a/src/webui/routers/config.py b/src/webui/routers/config.py
index 8d22dc19..5a60b84c 100644
--- a/src/webui/routers/config.py
+++ b/src/webui/routers/config.py
@@ -36,7 +36,6 @@ from src.config.official_configs import (
     ResponsePostProcessConfig,
     ResponseSplitterConfig,
     TelemetryConfig,
-    ToolConfig,
     VoiceConfig,
 )
 from src.webui.config_schema import ConfigSchemaGenerator
@@ -113,7 +112,6 @@ async def get_config_section_schema(section_name: str):
     - experimental: ExperimentalConfig
     - maim_message: MaimMessageConfig
     - lpmm_knowledge: LPMMKnowledgeConfig
-    - tool: ToolConfig
     - memory: MemoryConfig
     - debug: DebugConfig
     - voice: VoiceConfig
@@ -138,7 +136,6 @@ async def get_config_section_schema(section_name: str):
         "experimental": ExperimentalConfig,
         "maim_message": MaimMessageConfig,
         "lpmm_knowledge": LPMMKnowledgeConfig,
-        "tool": ToolConfig,
         "memory": MemoryConfig,
         "debug": DebugConfig,
         "voice": VoiceConfig,

From d713aa95767e71ed4e6a75e5b141211e12407930 Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Wed, 1 Apr 2026 13:18:17 +0800
Subject: [PATCH 3/3] =?UTF-8?q?feat=EF=BC=9A=E6=98=BE=E7=A4=BA=E5=AE=9E?=
 =?UTF-8?q?=E6=97=B6=E5=8D=A0=E7=94=A8=E4=B8=8A=E4=B8=8B=E6=96=87=EF=BC=8C?=
 =?UTF-8?q?=E7=A7=BB=E9=99=A4=E6=97=A7=E8=AE=B0=E5=BF=86=E7=B3=BB=E7=BB=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../data_models/llm_service_data_models.py    |    6 +
 src/llm_models/utils_model.py                 |   29 +-
 src/maisaka/chat_loop_service.py              |   75 +-
 src/memory_system/chat_history_summarizer.py  | 1066 -----------------
 src/memory_system/memory_retrieval.py         | 1046 ----------------
 src/memory_system/memory_utils.py             |   98 --
 src/memory_system/retrieval_tools/__init__.py |   36 -
 .../retrieval_tools/query_lpmm_knowledge.py   |   75 --
 .../retrieval_tools/query_words.py            |   78 --
 .../retrieval_tools/return_information.py     |   42 -
 .../retrieval_tools/tool_registry.py          |  167 ---
 11 files changed, 55 insertions(+), 2663 deletions(-)
 delete mode 100644 src/memory_system/chat_history_summarizer.py
 delete mode 100644 src/memory_system/memory_retrieval.py
 delete mode 100644 src/memory_system/memory_utils.py
 delete mode 100644 src/memory_system/retrieval_tools/__init__.py
 delete mode 100644 src/memory_system/retrieval_tools/query_lpmm_knowledge.py
 delete mode 100644 src/memory_system/retrieval_tools/query_words.py
 delete mode 100644 src/memory_system/retrieval_tools/return_information.py
 delete mode 100644 src/memory_system/retrieval_tools/tool_registry.py

diff --git a/src/common/data_models/llm_service_data_models.py b/src/common/data_models/llm_service_data_models.py
index 15b530ca..cacd3e10 100644
--- a/src/common/data_models/llm_service_data_models.py
+++ b/src/common/data_models/llm_service_data_models.py
@@ -66,6 +66,9 @@ class LLMResponseResult(BaseDataModel):
     reasoning: str = field(default_factory=str)
     model_name: str = field(default_factory=str)
     tool_calls: List[ToolCall] | None = None
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
 
 
 @dataclass(slots=True)
@@ -120,6 +123,9 @@ class LLMServiceResult(BaseDataModel):
             "response": self.completion.response,
             "reasoning": self.completion.reasoning,
             "model_name": self.completion.model_name,
+            "prompt_tokens": self.completion.prompt_tokens,
+            "completion_tokens": self.completion.completion_tokens,
+            "total_tokens": self.completion.total_tokens,
         }
         if self.completion.tool_calls is not None:
             payload["tool_calls"] = [
diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py
index 47a30ed6..accfe355 100644
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -34,6 +34,7 @@ from src.llm_models.model_client.base_client import (
     ClientRequest,
     EmbeddingRequest,
     ResponseRequest,
+    UsageRecord,
     client_registry,
 )
 from src.llm_models.payload_content.message import Message, MessageBuilder
@@ -137,6 +138,7 @@ class LLMOrchestrator:
         reasoning_content: str,
         model_name: str,
         tool_calls: List[ToolCall] | None,
+        usage: UsageRecord | None = None,
     ) -> LLMResponseResult:
         """构建统一的文本响应结果。
 
@@ -154,6 +156,9 @@ class LLMOrchestrator:
             reasoning=reasoning_content,
             model_name=model_name,
             tool_calls=tool_calls,
+            prompt_tokens=usage.prompt_tokens if usage is not None else 0,
+            completion_tokens=usage.completion_tokens if usage is not None else 0,
+            total_tokens=usage.total_tokens if usage is not None else 0,
         )
 
     async def generate_response_for_image(
@@ -215,7 +220,13 @@ class LLMOrchestrator:
                 endpoint="/chat/completions",
                 time_cost=time_cost,
             )
-        return self._build_generation_result(content, reasoning_content, model_info.name, tool_calls)
+        return self._build_generation_result(
+            content,
+            reasoning_content,
+            model_info.name,
+            tool_calls,
+            response.usage,
+        )
 
     async def generate_response_for_voice(self, voice_base64: str) -> LLMAudioTranscriptionResult:
         """为语音生成转录响应。
@@ -298,7 +309,13 @@ class LLMOrchestrator:
                 endpoint="/chat/completions",
                 time_cost=time.time() - start_time,
             )
-        return self._build_generation_result(content or "", reasoning_content, model_info.name, tool_calls)
+        return self._build_generation_result(
+            content or "",
+            reasoning_content,
+            model_info.name,
+            tool_calls,
+            response.usage,
+        )
 
     async def generate_response_with_message_async(
         self,
@@ -364,7 +381,13 @@ class LLMOrchestrator:
                 endpoint="/chat/completions",
                 time_cost=time_cost,
             )
-        return self._build_generation_result(content or "", reasoning_content, model_info.name, tool_calls)
+        return self._build_generation_result(
+            content or "",
+            reasoning_content,
+            model_info.name,
+            tool_calls,
+            response.usage,
+        )
 
     async def get_embedding(self, embedding_input: str) -> LLMEmbeddingResult:
         """获取嵌入向量。
diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py
index 839ab4f4..58724a4f 100644
--- a/src/maisaka/chat_loop_service.py
+++ b/src/maisaka/chat_loop_service.py
@@ -10,7 +10,6 @@ from typing import Any, Dict, List, Optional, Sequence
 import asyncio
 import json
 import random
-import re
 
 from PIL import Image as PILImage
 from pydantic import BaseModel, Field as PydanticField
@@ -28,7 +27,7 @@ from src.config.config import global_config
 from src.core.tooling import ToolRegistry, ToolSpec
 from src.know_u.knowledge import extract_category_ids_from_result
 from src.llm_models.model_client.base_client import BaseClient
-from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart
+from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
 from src.llm_models.payload_content.resp_format import RespFormat, RespFormatType
 from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput, ToolOption, normalize_tool_options
 from src.services.llm_service import LLMServiceClient
@@ -697,58 +696,29 @@ class MaisakaChatLoopService:
         )
 
     @staticmethod
-    def _estimate_text_tokens(text: str) -> int:
-        """估算单段文本的输入 token 数。"""
-        normalized_text = text.strip()
-        if not normalized_text:
-            return 0
-
-        cjk_char_count = sum(1 for char in normalized_text if "\u4e00" <= char <= "\u9fff")
-        latin_chunks = re.findall(r"[A-Za-z0-9_]+", normalized_text)
-        latin_token_count = sum(max(1, (len(chunk) + 3) // 4) for chunk in latin_chunks)
-        punctuation_count = len(re.findall(r"[^\w\s]", normalized_text))
-        whitespace_bonus = max(1, normalized_text.count("\n"))
-        return cjk_char_count + latin_token_count + punctuation_count + whitespace_bonus
+    def _format_token_count(token_count: int) -> str:
+        """格式化 token 数量展示文本。"""
+        if token_count >= 10_000:
+            return f"{token_count / 1000:.1f}k"
+        return str(token_count)
 
     @classmethod
-    def _estimate_request_tokens(cls, messages: Sequence[Message]) -> int:
-        """估算本轮请求消息的总输入 token 数。"""
-        total_tokens = 0
-        for message in messages:
-            total_tokens += 4
-            total_tokens += cls._estimate_text_tokens(str(message.role.value))
-            if message.tool_call_id:
-                total_tokens += cls._estimate_text_tokens(message.tool_call_id)
-            if message.tool_calls:
-                for tool_call in message.tool_calls:
-                    total_tokens += cls._estimate_text_tokens(getattr(tool_call, "func_name", "") or "")
-                    total_tokens += cls._estimate_text_tokens(
-                        json.dumps(getattr(tool_call, "args", {}) or {}, ensure_ascii=False)
-                    )
-            for part in message.parts:
-                if isinstance(part, TextMessagePart):
-                    total_tokens += cls._estimate_text_tokens(part.text)
-                    continue
-                if isinstance(part, ImageMessagePart):
-                    total_tokens += max(256, len(part.image_base64) // 12)
-        return total_tokens
-
-    @staticmethod
     def _build_prompt_stats_text(
+        cls,
         *,
         selected_history_count: int,
         built_message_count: int,
-        input_token_count: int,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
     ) -> str:
         """构造本轮 prompt 的统计信息文本。"""
-        if input_token_count >= 10_000:
-            input_token_text = f"{input_token_count / 1000:.1f}k"
-        else:
-            input_token_text = str(input_token_count)
         return (
             f"已选上下文消息数={selected_history_count} "
             f"大模型消息数={built_message_count} "
-            f"估算输入Token={input_token_text}"
+            f"实际输入Token={cls._format_token_count(prompt_tokens)} "
+            f"输出Token={cls._format_token_count(completion_tokens)} "
+            f"总Token={cls._format_token_count(total_tokens)}"
         )
 
     async def chat_loop_step(self, chat_history: List[LLMContextMessage]) -> ChatResponse:
@@ -764,13 +734,6 @@ class MaisakaChatLoopService:
         await self.ensure_chat_prompt_loaded()
         selected_history, selection_reason = self._select_llm_context_messages(chat_history)
         built_messages = self._build_request_messages(selected_history)
-        input_token_count = self._estimate_request_tokens(built_messages)
-        prompt_stats_text = self._build_prompt_stats_text(
-            selected_history_count=len(selected_history),
-            built_message_count=len(built_messages),
-            input_token_count=input_token_count,
-        )
-        display_subtitle = f"{selection_reason} | {prompt_stats_text}"
 
         def message_factory(_client: BaseClient) -> List[Message]:
             """返回当前轮次已经构建好的请求消息。
@@ -806,7 +769,7 @@ class MaisakaChatLoopService:
                 Panel(
                     Group(*ordered_panels),
                     title="MaiSaka 大模型请求 - 对话单步",
-                    subtitle=display_subtitle,
+                    subtitle=selection_reason,
                     border_style="cyan",
                     padding=(0, 1),
                 )
@@ -820,7 +783,6 @@ class MaisakaChatLoopService:
             f"工具数={len(all_tools)} "
             f"启用打断={self._interrupt_flag is not None}"
         )
-        logger.info(f"??Prompt??: {prompt_stats_text}")
         generation_result = await self._llm_chat.generate_response_with_messages(
             message_factory=message_factory,
             options=LLMGenerationOptions(
@@ -833,6 +795,15 @@ class MaisakaChatLoopService:
         request_elapsed = perf_counter() - request_started_at
         logger.info(f"规划器请求完成，耗时={request_elapsed:.3f} 秒")
 
+        prompt_stats_text = self._build_prompt_stats_text(
+            selected_history_count=len(selected_history),
+            built_message_count=len(built_messages),
+            prompt_tokens=generation_result.prompt_tokens,
+            completion_tokens=generation_result.completion_tokens,
+            total_tokens=generation_result.total_tokens,
+        )
+        logger.info(f"本轮Prompt统计: {prompt_stats_text}")
+
         tool_call_summaries = [
             {
                 "调用编号": getattr(tool_call, "call_id", getattr(tool_call, "id", None)),
diff --git a/src/memory_system/chat_history_summarizer.py b/src/memory_system/chat_history_summarizer.py
deleted file mode 100644
index 3d18187d..00000000
--- a/src/memory_system/chat_history_summarizer.py
+++ /dev/null
@@ -1,1066 +0,0 @@
-"""
-聊天内容概括器
-用于累积、打包和压缩聊天记录
-"""
-
-import asyncio
-import json
-import time
-import re
-import difflib
-import datetime
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Set
-from dataclasses import dataclass, field
-from json_repair import repair_json
-
-from src.chat.message_receive.message import SessionMessage
-from src.common.logger import get_logger
-from src.config.config import global_config
-from src.common.data_models.llm_service_data_models import LLMGenerationOptions
-from src.services.llm_service import LLMServiceClient
-from src.services import message_service as message_api
-from src.chat.utils.utils import is_bot_self
-from src.person_info.person_info import Person
-from src.chat.message_receive.chat_manager import chat_manager as _chat_manager
-from src.prompt.prompt_manager import prompt_manager
-
-logger = get_logger("chat_history_summarizer")
-
-HIPPO_CACHE_DIR = Path(__file__).resolve().parents[2] / "data" / "hippo_memorizer"
-
-
-@dataclass
-class MessageBatch:
-    """消息批次（用于触发话题检查的原始消息累积）"""
-
-    messages: List[SessionMessage]
-    start_time: float
-    end_time: float
-
-
-@dataclass
-class TopicCacheItem:
-    """
-    话题缓存项
-
-    Attributes:
-        topic: 话题标题（一句话描述时间、人物、事件和主题）
-        messages: 与该话题相关的消息字符串列表（已经通过 build 函数转成可读文本）
-        participants: 涉及到的发言人昵称集合
-        no_update_checks: 连续多少次“检查”没有新增内容
-    """
-
-    topic: str
-    messages: List[str] = field(default_factory=list)
-    participants: Set[str] = field(default_factory=set)
-    no_update_checks: int = 0
-
-
-class ChatHistorySummarizer:
-    """聊天内容概括器"""
-
-    def __init__(self, session_id: str, check_interval: int = 60):
-        """
-        初始化聊天内容概括器
-
-        Args:
-            session_id: 会话ID
-            check_interval: 定期检查间隔（秒），默认60秒
-        """
-        self.session_id = session_id
-        self._chat_display_name = self._get_chat_display_name()
-        self.log_prefix = f"[{self._chat_display_name}]"
-
-        # 记录时间点，用于计算新消息
-        self.last_check_time = time.time()
-
-        # 记录上一次话题检查的时间，用于判断是否需要触发检查
-        self.last_topic_check_time = time.time()
-
-        # 当前累积的消息批次
-        self.current_batch: Optional[MessageBatch] = None
-
-        # 话题缓存：topic_str -> TopicCacheItem
-        # 在内存中维护，并通过本地文件实时持久化
-        self.topic_cache: Dict[str, TopicCacheItem] = {}
-        self._safe_chat_id = self._sanitize_chat_id(self.session_id)
-        self._topic_cache_file = HIPPO_CACHE_DIR / f"{self._safe_chat_id}.json"
-        # 注意：批次加载需要异步查询消息，所以在 start() 中调用
-
-        # LLM请求器，用于压缩聊天内容
-        self.summarizer_llm = LLMServiceClient(
-            task_name="utils", request_type="chat_history_summarizer"
-        )
-
-        # 后台循环相关
-        self.check_interval = check_interval  # 检查间隔（秒）
-        self._periodic_task: Optional[asyncio.Task] = None
-        self._running = False
-
-    def _get_chat_display_name(self) -> str:
-        """获取聊天显示名称"""
-        try:
-            chat_name = _chat_manager.get_session_name(self.session_id)
-            if chat_name:
-                return chat_name
-            # 如果获取失败，使用简化的chat_id显示
-            if len(self.session_id) > 20:
-                return f"{self.session_id[:8]}..."
-            return self.session_id
-        except Exception:
-            # 如果获取失败，使用简化的chat_id显示
-            if len(self.session_id) > 20:
-                return f"{self.session_id[:8]}..."
-            return self.session_id
-
-    def _sanitize_chat_id(self, chat_id: str) -> str:
-        """用于生成可作为文件名的 chat_id"""
-        return re.sub(r"[^a-zA-Z0-9_.-]", "_", chat_id)
-
-    def _load_topic_cache_from_disk(self):
-        """在启动时加载本地话题缓存（同步部分），支持重启后继续"""
-        try:
-            if not self._topic_cache_file.exists():
-                return
-
-            with self._topic_cache_file.open("r", encoding="utf-8") as f:
-                data = json.load(f)
-
-            self.last_topic_check_time = data.get("last_topic_check_time", self.last_topic_check_time)
-            topics_data = data.get("topics", {})
-            loaded_count = 0
-            for topic, payload in topics_data.items():
-                self.topic_cache[topic] = TopicCacheItem(
-                    topic=topic,
-                    messages=payload.get("messages", []),
-                    participants=set(payload.get("participants", [])),
-                    no_update_checks=payload.get("no_update_checks", 0),
-                )
-                loaded_count += 1
-
-            if loaded_count:
-                logger.info(f"{self.log_prefix} 已加载 {loaded_count} 个话题缓存，继续追踪")
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 加载话题缓存失败: {e}")
-
-    async def _load_batch_from_disk(self):
-        """在启动时加载聊天批次，支持重启后继续"""
-        try:
-            if not self._topic_cache_file.exists():
-                return
-
-            with self._topic_cache_file.open("r", encoding="utf-8") as f:
-                data = json.load(f)
-
-            batch_data = data.get("current_batch")
-            if not batch_data:
-                return
-
-            start_time = batch_data.get("start_time")
-            end_time = batch_data.get("end_time")
-            if not start_time or not end_time:
-                return
-
-            # 根据时间范围重新查询消息
-            messages = message_api.get_messages_by_time_in_chat(
-                chat_id=self.session_id,
-                start_time=start_time,
-                end_time=end_time,
-                limit=0,
-                limit_mode="latest",
-                filter_mai=False,
-                filter_command=False,
-            )
-
-            if messages:
-                self.current_batch = MessageBatch(
-                    messages=messages,
-                    start_time=start_time,
-                    end_time=end_time,
-                )
-                logger.info(f"{self.log_prefix} 已恢复聊天批次，包含 {len(messages)} 条消息")
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 加载聊天批次失败: {e}")
-
-    def _persist_topic_cache(self):
-        """实时持久化话题缓存和聊天批次，避免重启后丢失"""
-        try:
-            # 如果既没有话题缓存也没有批次，删除缓存文件
-            if not self.topic_cache and not self.current_batch:
-                if self._topic_cache_file.exists():
-                    self._topic_cache_file.unlink()
-                return
-
-            HIPPO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
-            data = {
-                "chat_id": self.session_id,
-                "last_topic_check_time": self.last_topic_check_time,
-                "topics": {
-                    topic: {
-                        "messages": item.messages,
-                        "participants": list(item.participants),
-                        "no_update_checks": item.no_update_checks,
-                    }
-                    for topic, item in self.topic_cache.items()
-                },
-            }
-
-            # 保存当前批次的时间范围（如果有）
-            if self.current_batch:
-                data["current_batch"] = {
-                    "start_time": self.current_batch.start_time,
-                    "end_time": self.current_batch.end_time,
-                }
-
-            with self._topic_cache_file.open("w", encoding="utf-8") as f:
-                json.dump(data, f, ensure_ascii=False, indent=2)
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 持久化话题缓存失败: {e}")
-
-    async def process(self, current_time: Optional[float] = None):
-        """
-        处理聊天内容概括
-
-        Args:
-            current_time: 当前时间戳，如果为None则使用time.time()
-        """
-        if current_time is None:
-            current_time = time.time()
-
-        try:
-            # 获取从上次检查时间到当前时间的新消息
-            new_messages = message_api.get_messages_by_time_in_chat(
-                chat_id=self.session_id,
-                start_time=self.last_check_time,
-                end_time=current_time,
-                limit=0,
-                limit_mode="latest",
-                filter_mai=False,  # 不过滤bot消息，因为需要检查bot是否发言
-                filter_command=False,
-            )
-
-            if not new_messages:
-                # 没有新消息，检查是否需要进行“话题检查”
-                if self.current_batch and self.current_batch.messages:
-                    await self._check_and_run_topic_check(current_time)
-                self.last_check_time = current_time
-                return
-
-            logger.debug(
-                f"{self.log_prefix} 开始处理聊天概括，时间窗口: {self.last_check_time:.2f} -> {current_time:.2f}"
-            )
-
-            # 有新消息，更新最后检查时间
-            self.last_check_time = current_time
-
-            # 如果有当前批次，添加新消息
-            if self.current_batch:
-                before_count = len(self.current_batch.messages)
-                self.current_batch.messages.extend(new_messages)
-                self.current_batch.end_time = current_time
-                logger.info(
-                    f"{self.log_prefix} 更新聊天检查批次: {before_count} -> {len(self.current_batch.messages)} 条消息"
-                )
-                # 更新批次后持久化
-                self._persist_topic_cache()
-            else:
-                # 创建新批次
-                self.current_batch = MessageBatch(
-                    messages=new_messages,
-                    start_time=new_messages[0].timestamp.timestamp() if new_messages else current_time,
-                    end_time=current_time,
-                )
-                logger.debug(f"{self.log_prefix} 新建聊天检查批次: {len(new_messages)} 条消息")
-                # 创建批次后持久化
-                self._persist_topic_cache()
-
-            # 检查是否需要触发“话题检查”
-            await self._check_and_run_topic_check(current_time)
-
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 处理聊天内容概括时出错: {e}")
-            import traceback
-
-            traceback.print_exc()
-
-    async def _check_and_run_topic_check(self, current_time: float):
-        """
-        检查是否需要进行一次“话题检查”
-
-        触发条件：
-        - 当前批次消息数 >= 100，或者
-        - 距离上一次检查的时间 > 3600 秒（1小时）
-        """
-        if not self.current_batch or not self.current_batch.messages:
-            return
-
-        messages = self.current_batch.messages
-        message_count = len(messages)
-        time_since_last_check = current_time - self.last_topic_check_time
-
-        # 格式化时间差显示
-        if time_since_last_check < 60:
-            time_str = f"{time_since_last_check:.1f}秒"
-        elif time_since_last_check < 3600:
-            time_str = f"{time_since_last_check / 60:.1f}分钟"
-        else:
-            time_str = f"{time_since_last_check / 3600:.1f}小时"
-
-        logger.debug(f"{self.log_prefix} 批次状态检查 | 消息数: {message_count} | 距上次检查: {time_str}")
-
-        # 检查"话题检查"触发条件
-        should_check = False
-
-        # 从配置中获取阈值
-        message_threshold = global_config.memory.chat_history_topic_check_message_threshold
-        time_threshold_hours = global_config.memory.chat_history_topic_check_time_hours
-        min_messages = global_config.memory.chat_history_topic_check_min_messages
-        time_threshold_seconds = time_threshold_hours * 3600
-
-        # 条件1: 消息数量达到阈值，触发一次检查
-        if message_count >= message_threshold:
-            should_check = True
-            logger.info(
-                f"{self.log_prefix} 触发检查条件: 消息数量达到 {message_count} 条（阈值: {message_threshold}条）"
-            )
-
-        # 条件2: 距离上一次检查超过时间阈值且消息数量达到最小阈值，触发一次检查
-        elif time_since_last_check > time_threshold_seconds and message_count >= min_messages:
-            should_check = True
-            logger.info(
-                f"{self.log_prefix} 触发检查条件: 距上次检查 {time_str}（阈值: {time_threshold_hours}小时）且消息数量达到 {message_count} 条（阈值: {min_messages}条）"
-            )
-
-        if should_check:
-            await self._run_topic_check_and_update_cache(messages)
-            # 本批次已经被处理为话题信息，可以清空
-            self.current_batch = None
-            # 更新上一次检查时间，并持久化
-            self.last_topic_check_time = current_time
-            self._persist_topic_cache()
-
-    async def _run_topic_check_and_update_cache(self, messages: List[SessionMessage]):
-        """
-        执行一次“话题检查”：
-        1. 首先确认这段消息里是否有 Bot 发言，没有则直接丢弃本次批次；
-        2. 将消息编号并转成字符串，构造 LLM Prompt；
-        3. 把历史话题标题列表放入 Prompt，要求 LLM：
-           - 识别当前聊天中的话题（1 个或多个）；
-           - 为每个话题选出相关消息编号；
-           - 若话题属于历史话题，则沿用原话题标题；
-        4. LLM 返回 JSON：多个 {topic, message_indices}；
-        5. 更新本地话题缓存，并根据规则触发“话题打包存储”。
-        """
-        if not messages:
-            return
-
-        start_time = messages[0].timestamp.timestamp()
-        end_time = messages[-1].timestamp.timestamp()
-
-        logger.info(
-            f"{self.log_prefix} 开始话题检查 | 消息数: {len(messages)} | 时间范围: {start_time:.2f} - {end_time:.2f}"
-        )
-
-        # 1. 检查当前批次内是否有 bot 发言（只检查当前批次，不往前推）
-        # 原因：我们要记录的是 bot 参与过的对话片段，如果当前批次内 bot 没有发言，
-        # 说明 bot 没有参与这段对话，不应该记录
-        has_bot_message = any(
-            is_bot_self(msg.platform, msg.message_info.user_info.user_id) for msg in messages
-        )
-
-        if not has_bot_message:
-            logger.info(
-                f"{self.log_prefix} 当前批次内无 Bot 发言，丢弃本次检查 | 时间范围: {start_time:.2f} - {end_time:.2f}"
-            )
-            return
-
-        # 2. 构造编号后的消息字符串和参与者信息
-        numbered_lines, index_to_msg_str, index_to_msg_text, index_to_participants = (
-            self._build_numbered_messages_for_llm(messages)
-        )
-
-        # 3. 调用 LLM 识别话题，并得到 topic -> indices（失败时最多重试 3 次）
-        existing_topics = list(self.topic_cache.keys())
-        max_retries = 3
-        attempt = 0
-        success = False
-        topic_to_indices: Dict[str, List[int]] = {}
-
-        while attempt < max_retries:
-            attempt += 1
-            success, topic_to_indices = await self._analyze_topics_with_llm(
-                numbered_lines=numbered_lines,
-                existing_topics=existing_topics,
-            )
-
-            if success and topic_to_indices:
-                if attempt > 1:
-                    logger.info(
-                        f"{self.log_prefix} 话题识别在第 {attempt} 次重试后成功 | 话题数: {len(topic_to_indices)}"
-                    )
-                break
-
-            logger.warning(
-                f"{self.log_prefix} 话题识别失败或无有效话题，第 {attempt} 次尝试失败"
-                + ("" if attempt >= max_retries else "，准备重试")
-            )
-
-        if not success or not topic_to_indices:
-            logger.error(f"{self.log_prefix} 话题识别连续 {max_retries} 次失败或始终无有效话题，本次检查放弃")
-            # 即使识别失败，也认为是一次"检查"，但不更新 no_update_checks（保持原状）
-            return
-
-        # 3.5. 检查新话题是否与历史话题相似（相似度>=90%则使用历史标题）
-        topic_mapping = self._build_topic_mapping(topic_to_indices, similarity_threshold=0.9)
-
-        # 应用话题映射：将相似的新话题标题替换为历史话题标题
-        if topic_mapping:
-            new_topic_to_indices: Dict[str, List[int]] = {}
-            for new_topic, indices in topic_to_indices.items():
-                # 如果这个新话题需要映射到历史话题
-                if new_topic in topic_mapping:
-                    historical_topic = topic_mapping[new_topic]
-                    # 如果历史话题已经存在，合并消息索引
-                    if historical_topic in new_topic_to_indices:
-                        # 合并索引并去重
-                        combined_indices = list(set(new_topic_to_indices[historical_topic] + indices))
-                        new_topic_to_indices[historical_topic] = combined_indices
-                    else:
-                        new_topic_to_indices[historical_topic] = indices
-                else:
-                    # 不需要映射，保持原样
-                    new_topic_to_indices[new_topic] = indices
-            topic_to_indices = new_topic_to_indices
-
-        # 4. 统计哪些话题在本次检查中有新增内容
-        updated_topics: Set[str] = set()
-
-        for topic, indices in topic_to_indices.items():
-            if not indices:
-                continue
-
-            item = self.topic_cache.get(topic)
-            if not item:
-                # 新话题
-                item = TopicCacheItem(topic=topic)
-                self.topic_cache[topic] = item
-
-            # 收集属于该话题的消息文本（不带编号）
-            topic_msg_texts: List[str] = []
-            new_participants: Set[str] = set()
-            for idx in indices:
-                msg_text = index_to_msg_text.get(idx)
-                if not msg_text:
-                    continue
-                topic_msg_texts.append(msg_text)
-                new_participants.update(index_to_participants.get(idx, set()))
-
-            if not topic_msg_texts:
-                continue
-
-            # 将本次检查中属于该话题的所有消息合并为一个字符串（不带编号）
-            merged_text = "\n".join(topic_msg_texts)
-            item.messages.append(merged_text)
-            item.participants.update(new_participants)
-            # 本次检查中该话题有更新，重置计数
-            item.no_update_checks = 0
-            updated_topics.add(topic)
-
-        # 5. 对于本次没有更新的历史话题，no_update_checks + 1
-        for topic, item in list(self.topic_cache.items()):
-            if topic not in updated_topics:
-                item.no_update_checks += 1
-
-        # 6. 检查是否有话题需要打包存储
-        # 从配置中获取阈值
-        no_update_checks_threshold = global_config.memory.chat_history_finalize_no_update_checks
-        message_count_threshold = global_config.memory.chat_history_finalize_message_count
-
-        topics_to_finalize: List[str] = []
-        for topic, item in self.topic_cache.items():
-            if item.no_update_checks >= no_update_checks_threshold:
-                logger.info(
-                    f"{self.log_prefix} 话题[{topic}] 连续 {no_update_checks_threshold} 次检查无新增内容，触发打包存储"
-                )
-                topics_to_finalize.append(topic)
-                continue
-            if len(item.messages) > message_count_threshold:
-                logger.info(f"{self.log_prefix} 话题[{topic}] 消息条数超过 {message_count_threshold}，触发打包存储")
-                topics_to_finalize.append(topic)
-
-        for topic in topics_to_finalize:
-            item = self.topic_cache.get(topic)
-            if not item:
-                continue
-            try:
-                await self._finalize_and_store_topic(
-                    topic=topic,
-                    item=item,
-                    # 这里的时间范围尽量覆盖最近一次检查的区间
-                    start_time=start_time,
-                    end_time=end_time,
-                )
-            finally:
-                # 无论成功与否，都从缓存中删除，避免重复
-                self.topic_cache.pop(topic, None)
-
-    def _find_most_similar_topic(
-        self, new_topic: str, existing_topics: List[str], similarity_threshold: float = 0.9
-    ) -> Optional[tuple[str, float]]:
-        """
-        查找与给定新话题最相似的历史话题
-
-        Args:
-            new_topic: 新话题标题
-            existing_topics: 历史话题标题列表
-            similarity_threshold: 相似度阈值，默认0.9（90%）
-
-        Returns:
-            Optional[tuple[str, float]]: 如果找到相似度>=阈值的历史话题，返回(历史话题标题, 相似度)，
-                                         否则返回None
-        """
-        if not existing_topics:
-            return None
-
-        best_match = None
-        best_similarity = 0.0
-
-        for existing_topic in existing_topics:
-            similarity = difflib.SequenceMatcher(None, new_topic, existing_topic).ratio()
-            if similarity > best_similarity:
-                best_similarity = similarity
-                best_match = existing_topic
-
-        # 如果相似度达到阈值，返回匹配结果
-        if best_match and best_similarity >= similarity_threshold:
-            return (best_match, best_similarity)
-
-        return None
-
-    def _build_topic_mapping(
-        self, topic_to_indices: Dict[str, List[int]], similarity_threshold: float = 0.9
-    ) -> Dict[str, str]:
-        """
-        构建新话题到历史话题的映射（如果相似度>=阈值）
-
-        Args:
-            topic_to_indices: 新话题到消息索引的映射
-            similarity_threshold: 相似度阈值，默认0.9（90%）
-
-        Returns:
-            Dict[str, str]: 新话题 -> 历史话题的映射字典
-        """
-        existing_topics_list = list(self.topic_cache.keys())
-        topic_mapping: Dict[str, str] = {}
-
-        for new_topic in topic_to_indices.keys():
-            # 如果新话题已经在历史话题中，不需要检查
-            if new_topic in existing_topics_list:
-                continue
-
-            # 查找最相似的历史话题
-            result = self._find_most_similar_topic(new_topic, existing_topics_list, similarity_threshold)
-            if result:
-                historical_topic, similarity = result
-                topic_mapping[new_topic] = historical_topic
-                logger.info(
-                    f"{self.log_prefix} 话题相似度检查: '{new_topic}' 与历史话题 '{historical_topic}' 相似度 {similarity:.2%}，使用历史标题"
-                )
-
-        return topic_mapping
-
-    def _build_numbered_messages_for_llm(
-        self, messages: List[SessionMessage]
-    ) -> tuple[List[str], Dict[int, str], Dict[int, str], Dict[int, Set[str]]]:
-        """
-        将消息转为带编号的字符串，供 LLM 选择使用。
-
-        返回:
-            numbered_lines: ["1. xxx", "2. yyy", ...]  # 带编号，用于 LLM 选择
-            index_to_msg_str: idx -> "idx. xxx"  # 带编号，用于 LLM 选择
-            index_to_msg_text: idx -> "xxx"  # 不带编号，用于最终存储
-            index_to_participants: idx -> {nickname1, nickname2, ...}
-        """
-        numbered_lines: List[str] = []
-        index_to_msg_str: Dict[int, str] = {}
-        index_to_msg_text: Dict[int, str] = {}  # 不带编号的消息文本
-        index_to_participants: Dict[int, Set[str]] = {}
-
-        for idx, msg in enumerate(messages, start=1):
-            # 使用 build_readable_messages 生成可读文本
-            try:
-                text = message_api.build_readable_messages(
-                    messages=[msg],
-                    replace_bot_name=True,
-                    timestamp_mode="normal_no_YMD",
-                    read_mark=0.0,
-                    truncate=False,
-                    show_actions=False,
-                ).strip()
-            except Exception:
-                # 回退到简单文本
-                text = getattr(msg, "processed_plain_text", "") or ""
-
-            # 获取发言人昵称
-            participants: Set[str] = set()
-            try:
-                platform = msg.platform
-                user_id = msg.message_info.user_info.user_id
-                if platform and user_id:
-                    person = Person(platform=platform, user_id=user_id)
-                    if person.person_name:
-                        participants.add(person.person_name)
-            except Exception:
-                pass
-
-            # 带编号的字符串（用于 LLM 选择）
-            line = f"{idx}. {text}"
-            numbered_lines.append(line)
-            index_to_msg_str[idx] = line
-            # 不带编号的文本（用于最终存储）
-            index_to_msg_text[idx] = text
-            index_to_participants[idx] = participants
-
-        return numbered_lines, index_to_msg_str, index_to_msg_text, index_to_participants
-
-    async def _analyze_topics_with_llm(
-        self,
-        numbered_lines: List[str],
-        existing_topics: List[str],
-    ) -> tuple[bool, Dict[str, List[int]]]:
-        """
-        使用 LLM 识别本次检查中的话题，并为每个话题选择相关消息编号。
-
-        要求：
-        - 话题用一句话清晰描述正在发生的事件，包括时间、人物、主要事件和主题；
-        - 可以有 1 个或多个话题；
-        - 若某个话题与历史话题列表中的某个话题是同一件事，请直接使用历史话题的字符串；
-        - 输出 JSON，格式：
-          [
-            {
-              "topic": "话题标题字符串",
-              "message_indices": [1, 2, 5]
-            },
-            ...
-          ]
-        """
-        if not numbered_lines:
-            return False, {}
-
-        history_topics_block = "\n".join(f"- {t}" for t in existing_topics) if existing_topics else "（当前无历史话题）"
-        messages_block = "\n".join(numbered_lines)
-
-        prompt_template = prompt_manager.get_prompt("hippo_topic_analysis")
-        prompt_template.add_context("history_topics_block", history_topics_block)
-        prompt_template.add_context("messages_block", messages_block)
-        prompt = await prompt_manager.render_prompt(prompt_template)
-
-        try:
-            generation_result = await self.summarizer_llm.generate_response(
-                prompt=prompt,
-                options=LLMGenerationOptions(temperature=0.3),
-            )
-            response = generation_result.response
-
-            logger.info(f"{self.log_prefix} 话题识别LLM Prompt: {prompt}")
-            logger.info(f"{self.log_prefix} 话题识别LLM Response: {response}")
-
-            # 尝试从响应中提取JSON代码块
-            json_str = None
-            json_pattern = r"```json\s*(.*?)\s*```"
-            matches = re.findall(json_pattern, response, re.DOTALL)
-
-            if matches:
-                # 找到JSON代码块，使用第一个匹配
-                json_str = matches[0].strip()
-            else:
-                # 如果没有找到代码块，尝试查找JSON数组的开始和结束位置
-                # 查找第一个 [ 和最后一个 ]
-                start_idx = response.find("[")
-                end_idx = response.rfind("]")
-                if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
-                    json_str = response[start_idx : end_idx + 1].strip()
-                else:
-                    # 如果还是找不到，尝试直接使用整个响应（移除可能的markdown标记）
-                    json_str = response.strip()
-                    json_str = re.sub(r"^```json\s*", "", json_str, flags=re.MULTILINE)
-                    json_str = re.sub(r"^```\s*", "", json_str, flags=re.MULTILINE)
-                    json_str = json_str.strip()
-
-            # 使用json_repair修复可能的JSON错误
-            if json_str:
-                try:
-                    repaired_json = repair_json(json_str)
-                    result = json.loads(repaired_json) if isinstance(repaired_json, str) else repaired_json
-                except Exception as repair_error:
-                    # 如果repair失败，尝试直接解析
-                    logger.warning(f"{self.log_prefix} JSON修复失败，尝试直接解析: {repair_error}")
-                    result = json.loads(json_str)
-            else:
-                raise ValueError("无法从响应中提取JSON内容")
-
-            if not isinstance(result, list):
-                logger.error(f"{self.log_prefix} 话题识别返回的 JSON 不是列表: {result}")
-                return False, {}
-
-            topic_to_indices: Dict[str, List[int]] = {}
-            for item in result:
-                if not isinstance(item, dict):
-                    continue
-                topic = item.get("topic")
-                indices = item.get("message_indices") or item.get("messages") or []
-                if not topic or not isinstance(topic, str):
-                    continue
-                if isinstance(indices, list):
-                    valid_indices: List[int] = []
-                    for v in indices:
-                        try:
-                            iv = int(v)
-                            if iv > 0:
-                                valid_indices.append(iv)
-                        except (TypeError, ValueError):
-                            continue
-                    if valid_indices:
-                        topic_to_indices[topic] = valid_indices
-
-            return True, topic_to_indices
-
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 话题识别 LLM 调用或解析失败: {e}")
-            logger.error(f"{self.log_prefix} LLM响应: {response if 'response' in locals() else 'N/A'}")
-            return False, {}
-
-    async def _finalize_and_store_topic(
-        self,
-        topic: str,
-        item: TopicCacheItem,
-        start_time: float,
-        end_time: float,
-    ):
-        """
-        对某个话题进行最终打包存储：
-        1. 将 messages(list[str]) 拼接为 original_text；
-        2. 使用 LLM 对 original_text 进行总结，得到 summary 和 keywords，theme 直接使用话题字符串；
-        3. 写入数据库 ChatHistory；
-        4. 完成后，调用方会从缓存中删除该话题。
-        """
-        if not item.messages:
-            logger.info(f"{self.log_prefix} 话题[{topic}] 无消息内容，跳过打包")
-            return
-
-        original_text = "\n".join(item.messages)
-
-        logger.info(
-            f"{self.log_prefix} 开始将聊天记录构建成记忆：[{topic}] | 消息数: {len(item.messages)} | 时间范围: {start_time:.2f} - {end_time:.2f}"
-        )
-
-        # 使用 LLM 进行总结（基于话题名），带重试机制
-        max_retries = 3
-        attempt = 0
-        success = False
-        keywords = []
-        summary = ""
-
-        while attempt < max_retries:
-            attempt += 1
-            success, keywords, summary = await self._compress_with_llm(original_text, topic)
-
-            if success and keywords and summary:
-                # 成功获取到有效的 keywords 和 summary
-                if attempt > 1:
-                    logger.info(f"{self.log_prefix} 话题[{topic}] LLM 概括在第 {attempt} 次重试后成功")
-                break
-
-            if attempt < max_retries:
-                logger.warning(f"{self.log_prefix} 话题[{topic}] LLM 概括失败（第 {attempt} 次尝试），准备重试")
-            else:
-                logger.error(f"{self.log_prefix} 话题[{topic}] LLM 概括连续 {max_retries} 次失败，放弃存储")
-
-        if not success or not keywords or not summary:
-            logger.warning(f"{self.log_prefix} 话题[{topic}] LLM 概括失败，不写入数据库")
-            return
-
-        participants = list(item.participants)
-
-        await self._store_to_database(
-            start_time=start_time,
-            end_time=end_time,
-            original_text=original_text,
-            participants=participants,
-            theme=topic,  # 主题直接使用话题名
-            keywords=keywords,
-            summary=summary,
-        )
-
-        logger.info(
-            f"{self.log_prefix} 话题[{topic}] 成功打包并存储 | 消息数: {len(item.messages)} | 参与者数: {len(participants)}"
-        )
-
-    async def _compress_with_llm(self, original_text: str, topic: str) -> tuple[bool, List[str], str]:
-        """
-        使用LLM压缩聊天内容（用于单个话题的最终总结）
-
-        Args:
-            original_text: 聊天记录原文
-            topic: 话题名称
-
-        Returns:
-            tuple[bool, List[str], str]: (是否成功, 关键词列表, 概括)
-        """
-        prompt_template = prompt_manager.get_prompt("hippo_topic_summary")
-        prompt_template.add_context("topic", topic)
-        prompt_template.add_context("original_text", original_text)
-        prompt = await prompt_manager.render_prompt(prompt_template)
-
-        try:
-            generation_result = await self.summarizer_llm.generate_response(prompt=prompt)
-            response = generation_result.response
-
-            # 解析JSON响应
-            json_str = response.strip()
-            json_str = re.sub(r"^```json\s*", "", json_str, flags=re.MULTILINE)
-            json_str = re.sub(r"^```\s*", "", json_str, flags=re.MULTILINE)
-            json_str = json_str.strip()
-
-            # 查找JSON对象的开始与结束
-            start_idx = json_str.find("{")
-            if start_idx == -1:
-                raise ValueError("未找到JSON对象开始标记")
-
-            end_idx = json_str.rfind("}")
-            if end_idx == -1 or end_idx <= start_idx:
-                logger.warning(f"{self.log_prefix} JSON缺少结束标记，尝试自动修复")
-                extracted_json = json_str[start_idx:]
-            else:
-                extracted_json = json_str[start_idx : end_idx + 1]
-
-            def _parse_with_quote_fix(payload: str) -> Dict[str, Any]:
-                fixed_chars: List[str] = []
-                in_string = False
-                escape_next = False
-                i = 0
-                while i < len(payload):
-                    char = payload[i]
-                    if escape_next:
-                        fixed_chars.append(char)
-                        escape_next = False
-                    elif char == "\\":
-                        fixed_chars.append(char)
-                        escape_next = True
-                    elif char == '"' and not escape_next:
-                        fixed_chars.append(char)
-                        in_string = not in_string
-                    elif in_string and char in {"“", "”"}:
-                        # 在字符串值内部，将中文引号替换为转义的英文引号
-                        fixed_chars.append('\\"')
-                    else:
-                        fixed_chars.append(char)
-                    i += 1
-
-                repaired = "".join(fixed_chars)
-                return json.loads(repaired)
-
-            try:
-                result = json.loads(extracted_json)
-            except json.JSONDecodeError:
-                try:
-                    repaired_json = repair_json(extracted_json)
-                    if isinstance(repaired_json, str):
-                        result = json.loads(repaired_json)
-                    else:
-                        result = repaired_json
-                except Exception as repair_error:
-                    logger.warning(f"{self.log_prefix} repair_json 失败，使用引号修复: {repair_error}")
-                    result = _parse_with_quote_fix(extracted_json)
-
-            keywords = result.get("keywords", [])
-            summary = result.get("summary", "")
-
-            # 检查必需字段是否为空
-            if not keywords or not summary:
-                logger.warning(f"{self.log_prefix} LLM返回的JSON中缺少必需字段，原文\n{response}")
-                # 返回失败，和模型出错一样，让上层进行重试
-                return False, [], ""
-
-            # 确保keywords是列表
-            if isinstance(keywords, str):
-                keywords = [keywords]
-
-            return True, keywords, summary
-
-        except Exception as e:
-            logger.error(f"{self.log_prefix} LLM压缩聊天内容时出错: {e}")
-            logger.error(f"{self.log_prefix} LLM响应: {response if 'response' in locals() else 'N/A'}")
-            # 返回失败标志和默认值
-            return False, [], "压缩失败，无法生成概括"
-
-    async def _store_to_database(
-        self,
-        start_time: float,
-        end_time: float,
-        original_text: str,
-        participants: List[str],
-        theme: str,
-        keywords: List[str],
-        summary: str,
-    ):
-        """存储到数据库"""
-        try:
-            from src.common.database.database_model import ChatHistory
-            from src.services import database_service as database_api
-
-            # 准备数据
-            data = {
-                "session_id": self.session_id,
-                "start_timestamp": datetime.fromtimestamp(start_time),
-                "end_timestamp": datetime.fromtimestamp(end_time),
-                "original_messages": original_text,
-                "participants": json.dumps(participants, ensure_ascii=False),
-                "theme": theme,
-                "keywords": json.dumps(keywords, ensure_ascii=False),
-                "summary": summary,
-                "query_count": 0,
-                "query_forget_count": 0,
-            }
-
-            saved_record = await database_api.db_save(
-                ChatHistory,
-                data=data,
-            )
-
-            if saved_record:
-                logger.debug(f"{self.log_prefix} 成功存储聊天历史记录到数据库")
-            else:
-                logger.warning(f"{self.log_prefix} 存储聊天历史记录到数据库失败")
-
-            # 同时导入到LPMM知识库
-            if global_config.lpmm_knowledge.enable:
-                await self._import_to_lpmm_knowledge(
-                    theme=theme,
-                    summary=summary,
-                    participants=participants,
-                    original_text=original_text,
-                )
-
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 存储到数据库时出错: {e}")
-            import traceback
-
-            traceback.print_exc()
-            raise
-
-    async def _import_to_lpmm_knowledge(
-        self,
-        theme: str,
-        summary: str,
-        participants: List[str],
-        original_text: str,
-    ):
-        """
-        将聊天历史总结导入到LPMM知识库
-
-        Args:
-            theme: 话题主题
-            summary: 概括内容
-            participants: 参与者列表
-            original_text: 原始文本（可能很长，需要截断）
-        """
-        try:
-            from src.chat.knowledge.lpmm_ops import lpmm_ops
-
-            # 构造要导入的文本内容
-            # 格式：主题 + 概括 + 参与者信息 + 原始内容摘要
-            # 注意：使用单换行符连接，确保整个内容作为一段导入，不被LPMM分段
-            content_parts = []
-
-            # 1. 话题主题
-            # if theme:
-            # content_parts.append(f"话题：{theme}")
-
-            # 2. 概括内容
-            if summary:
-                content_parts.append(f"概括：{summary}")
-
-            # 3. 参与者信息
-            if participants:
-                participants_text = "、".join(participants)
-                content_parts.append(f"参与者：{participants_text}")
-
-            # 4. 原始文本摘要（如果原始文本太长，只取前500字）
-            # if original_text:
-            #     # 截断原始文本，避免过长
-            #     max_original_length = 500
-            #     if len(original_text) > max_original_length:
-            #         truncated_text = original_text[:max_original_length] + "..."
-            #         content_parts.append(f"原始内容摘要：{truncated_text}")
-            #     else:
-            #         content_parts.append(f"原始内容：{original_text}")
-
-            # 将所有部分合并为一个完整段落（使用单换行符，避免被LPMM分段）
-            # LPMM使用 \n\n 作为段落分隔符，所以这里使用 \n 确保不会被分段
-            content_to_import = "\n".join(content_parts)
-
-            if not content_to_import.strip():
-                logger.warning(f"{self.log_prefix} 聊天历史总结内容为空，跳过导入知识库")
-                return
-
-            # 调用lpmm_ops导入
-            result = await lpmm_ops.add_content(text=content_to_import, auto_split=False)
-
-            if result["status"] == "success":
-                logger.info(
-                    f"{self.log_prefix} 成功将聊天历史总结导入到LPMM知识库 | 话题: {theme} | 新增段落数: {result.get('count', 0)}"
-                )
-            else:
-                logger.warning(
-                    f"{self.log_prefix} 将聊天历史总结导入到LPMM知识库失败 | 话题: {theme} | 错误: {result.get('message', '未知错误')}"
-                )
-
-        except Exception as e:
-            # 导入失败不应该影响数据库存储，只记录错误
-            logger.error(f"{self.log_prefix} 导入聊天历史总结到LPMM知识库时出错: {e}", exc_info=True)
-
-    async def start(self):
-        """启动后台定期检查循环"""
-        if self._running:
-            logger.warning(f"{self.log_prefix} 后台循环已在运行，无需重复启动")
-            return
-
-        # 加载聊天批次（如果有）
-        await self._load_batch_from_disk()
-
-        self._running = True
-        self._periodic_task = asyncio.create_task(self._periodic_check_loop())
-        logger.info(f"{self.log_prefix} 已启动后台定期检查循环 | 检查间隔: {self.check_interval}秒")
-
-    async def stop(self):
-        """停止后台定期检查循环"""
-        self._running = False
-        if self._periodic_task:
-            self._periodic_task.cancel()
-            try:
-                await self._periodic_task
-            except asyncio.CancelledError:
-                pass
-            self._periodic_task = None
-        logger.info(f"{self.log_prefix} 已停止后台定期检查循环")
-
-    async def _periodic_check_loop(self):
-        """后台定期检查循环"""
-        try:
-            while self._running:
-                # 执行一次检查
-                await self.process()
-
-                # 等待指定间隔后再次检查
-                await asyncio.sleep(self.check_interval)
-        except asyncio.CancelledError:
-            logger.info(f"{self.log_prefix} 后台检查循环被取消")
-            raise
-        except Exception as e:
-            logger.error(f"{self.log_prefix} 后台检查循环出错: {e}")
-            import traceback
-
-            traceback.print_exc()
-            self._running = False
diff --git a/src/memory_system/memory_retrieval.py b/src/memory_system/memory_retrieval.py
deleted file mode 100644
index 5bc6a3a1..00000000
--- a/src/memory_system/memory_retrieval.py
+++ /dev/null
@@ -1,1046 +0,0 @@
-import contextlib
-import time
-import json
-import asyncio
-from datetime import datetime
-from typing import List, Dict, Any, Optional, Tuple, Callable
-from src.common.logger import get_logger
-from src.config.config import global_config
-from src.prompt.prompt_manager import prompt_manager
-from src.services import llm_service as llm_api
-from sqlmodel import select, col
-from src.common.database.database import get_db_session
-from src.common.database.database_model import ThinkingQuestion
-from src.memory_system.retrieval_tools import get_tool_registry, init_all_tools
-from src.llm_models.payload_content.message import MessageBuilder, RoleType, Message
-from src.chat.message_receive.chat_manager import chat_manager as _chat_manager
-from src.learners.jargon_explainer_old import retrieve_concepts_with_jargon
-
-logger = get_logger("memory_retrieval")
-
-THINKING_BACK_NOT_FOUND_RETENTION_SECONDS = 36000  # 未找到答案记录保留时长
-THINKING_BACK_CLEANUP_INTERVAL_SECONDS = 3000  # 清理频率
-_last_not_found_cleanup_ts: float = 0.0
-
-
-def _cleanup_stale_not_found_thinking_back() -> None:
-    """定期清理过期的未找到答案记录"""
-    global _last_not_found_cleanup_ts
-
-    now = time.time()
-    if now - _last_not_found_cleanup_ts < THINKING_BACK_CLEANUP_INTERVAL_SECONDS:
-        return
-
-    threshold_time = now - THINKING_BACK_NOT_FOUND_RETENTION_SECONDS
-    try:
-        with get_db_session() as session:
-            statement = select(ThinkingQuestion).where(
-                col(ThinkingQuestion.found_answer).is_(False)
-                & (ThinkingQuestion.updated_timestamp < datetime.fromtimestamp(threshold_time))
-            )
-            records = session.exec(statement).all()
-            for record in records:
-                session.delete(record)
-        if records:
-            logger.info(f"清理过期的未找到答案thinking_question记录 {len(records)} 条")
-        _last_not_found_cleanup_ts = now
-    except Exception as e:
-        logger.error(f"清理未找到答案的thinking_back记录失败: {e}")
-
-
-def init_memory_retrieval_sys():
-    """初始化记忆检索相关工具"""
-    # 注册所有工具
-    init_all_tools()
-
-
-def _log_conversation_messages(
-    conversation_messages: List[Message],
-    head_prompt: Optional[str] = None,
-    final_status: Optional[str] = None,
-) -> None:
-    """输出对话消息列表的日志
-
-    Args:
-        conversation_messages: 对话消息列表
-        head_prompt: 第一条系统消息（head_prompt）的内容，可选
-        final_status: 最终结果状态描述（例如：找到答案/未找到答案），可选
-    """
-    if not global_config.debug.show_memory_prompt:
-        return
-
-    log_lines: List[str] = []
-
-    # 如果有head_prompt，先添加为第一条消息
-    if head_prompt:
-        msg_info = "========================================\n[消息 1] 角色: System\n-----------------------------"
-        msg_info += f"\n{head_prompt}"
-        log_lines.append(msg_info)
-        start_idx = 2
-    else:
-        start_idx = 1
-
-    if not conversation_messages and not head_prompt:
-        return
-
-    for idx, msg in enumerate(conversation_messages, start_idx):
-        role_name = msg.role.value if hasattr(msg.role, "value") else str(msg.role)
-
-        # 构建单条消息的日志信息
-        # msg_info = f"\n========================================\n[消息 {idx}] 角色: {role_name} 内容类型: {content_type}\n-----------------------------"
-        msg_info = (
-            f"\n========================================\n[消息 {idx}] 角色: {role_name}\n-----------------------------"
-        )
-
-        # if full_content:
-        #     msg_info += f"\n{full_content}"
-        if msg.content:
-            msg_info += f"\n{msg.content}"
-
-        if msg.tool_calls:
-            msg_info += f"\n  工具调用: {len(msg.tool_calls)}个"
-            for tool_call in msg.tool_calls:
-                msg_info += f"\n    - {tool_call.func_name}: {json.dumps(tool_call.args, ensure_ascii=False)}"
-
-        # if msg.tool_call_id:
-        # msg_info += f"\n  工具调用ID: {msg.tool_call_id}"
-
-        log_lines.append(msg_info)
-
-    total_count = len(conversation_messages) + (1 if head_prompt else 0)
-    log_text = f"消息列表 (共{total_count}条):{''.join(log_lines)}"
-    if final_status:
-        log_text += f"\n\n[最终结果] {final_status}"
-    logger.info(log_text)
-
-
-async def _react_agent_solve_question(
-    chat_id: str,
-    max_iterations: int = 5,
-    timeout: float = 30.0,
-    initial_info: str = "",
-    chat_history: str = "",
-) -> Tuple[bool, str, List[Dict[str, Any]], bool]:
-    """使用ReAct架构的Agent来解决问题
-
-    Args:
-        chat_id: 聊天ID
-        max_iterations: 最大迭代次数
-        timeout: 超时时间（秒）
-        initial_info: 初始信息，将作为collected_info的初始值
-        chat_history: 聊天记录，将传递给 ReAct Agent prompt
-
-    Returns:
-        Tuple[bool, str, List[Dict[str, Any]], bool]: (是否找到答案, 答案内容, 思考步骤列表, 是否超时)
-    """
-    start_time = time.time()
-    collected_info = initial_info or ""
-    # 构造日志前缀：[聊天流名称]，用于在日志中标识聊天流
-    try:
-        chat_name = _chat_manager.get_session_name(chat_id) or chat_id
-    except Exception:
-        chat_name = chat_id
-    react_log_prefix = f"[{chat_name}] "
-    thinking_steps = []
-    is_timeout = False
-    conversation_messages: List[Message] = []
-    first_head_prompt: Optional[str] = None  # 保存第一次使用的head_prompt（用于日志显示）
-    last_tool_name: Optional[str] = None  # 记录最后一次使用的工具名称
-
-    # 使用 while 循环，支持额外迭代
-    iteration = 0
-    max_iterations_with_extra = max_iterations
-    while iteration < max_iterations_with_extra:
-        # 检查超时
-        if time.time() - start_time > timeout:
-            logger.warning(f"ReAct Agent超时，已迭代{iteration}次")
-            is_timeout = True
-            break
-
-        # 获取工具注册器
-        tool_registry = get_tool_registry()
-
-        # 获取bot_name
-        bot_name = global_config.bot.nickname
-
-        # 获取当前时间
-        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-
-        # 计算剩余迭代次数
-        current_iteration = iteration + 1
-        remaining_iterations = max_iterations - current_iteration
-
-        # 提取函数调用中参数的值，支持单引号和双引号
-        def extract_quoted_content(text, func_name, param_name):
-            """从文本中提取函数调用中参数的值，支持单引号和双引号
-
-            Args:
-                text: 要搜索的文本
-                func_name: 函数名，如 'return_information'
-                param_name: 参数名，如 'information'
-
-            Returns:
-                提取的参数值，如果未找到则返回None
-            """
-            if not text:
-                return None
-
-            # 查找函数调用位置（不区分大小写）
-            func_pattern = func_name.lower()
-            text_lower = text.lower()
-            func_pos = text_lower.find(func_pattern)
-            if func_pos == -1:
-                return None
-
-            # 查找参数名和等号
-            param_pattern = f"{param_name}="
-            param_pos = text_lower.find(param_pattern, func_pos)
-            if param_pos == -1:
-                return None
-
-            # 跳过参数名、等号和空白
-            start_pos = param_pos + len(param_pattern)
-            while start_pos < len(text) and text[start_pos] in " \t\n":
-                start_pos += 1
-
-            if start_pos >= len(text):
-                return None
-
-            # 确定引号类型
-            quote_char = text[start_pos]
-            if quote_char not in ['"', "'"]:
-                return None
-
-            # 查找匹配的结束引号（考虑转义）
-            end_pos = start_pos + 1
-            while end_pos < len(text):
-                if text[end_pos] == quote_char:
-                    # 检查是否是转义的引号
-                    if end_pos > start_pos + 1 and text[end_pos - 1] == "\\":
-                        end_pos += 1
-                        continue
-                    # 找到匹配的引号
-                    content = text[start_pos + 1 : end_pos]
-                    # 处理转义字符
-                    content = content.replace('\\"', '"').replace("\\'", "'").replace("\\\\", "\\")
-                    return content
-                end_pos += 1
-
-            return None
-
-        # 正常迭代：使用head_prompt决定调用哪些工具（包含return_information工具）
-        tool_definitions = tool_registry.get_tool_definitions()
-        # tool_names = [tool_def["name"] for tool_def in tool_definitions]
-        # logger.debug(f"ReAct Agent 第 {iteration + 1} 次迭代，问题: {question}|可用工具: {', '.join(tool_names)} (共{len(tool_definitions)}个)")
-
-        # head_prompt应该只构建一次，使用初始的collected_info，后续迭代都复用同一个
-        if first_head_prompt is None:
-            # 第一次构建，使用初始的collected_info（即initial_info）
-            initial_collected_info = initial_info or ""
-            # 使用 LPMM 知识库检索 prompt
-            first_head_prompt_template = prompt_manager.get_prompt("memory_retrieval_react_prompt_head_lpmm")
-            first_head_prompt_template.add_context("bot_name", bot_name)
-            first_head_prompt_template.add_context("time_now", time_now)
-            first_head_prompt_template.add_context("chat_history", chat_history)
-            first_head_prompt_template.add_context("collected_info", initial_collected_info)
-            first_head_prompt_template.add_context("current_iteration", str(current_iteration))
-            first_head_prompt_template.add_context("remaining_iterations", str(remaining_iterations))
-            first_head_prompt_template.add_context("max_iterations", str(max_iterations))
-            first_head_prompt = await prompt_manager.render_prompt(first_head_prompt_template)
-
-        # 后续迭代都复用第一次构建的head_prompt
-        head_prompt = first_head_prompt
-
-        def _build_messages(
-            _client,
-            *,
-            _head_prompt: str = head_prompt,
-            _conversation_messages: List[Message] = conversation_messages,
-        ):
-            messages: List[Message] = []
-
-            system_builder = MessageBuilder()
-            system_builder.set_role(RoleType.System)
-            system_builder.add_text_content(_head_prompt)
-            messages.append(system_builder.build())
-
-            messages.extend(_conversation_messages)
-
-            return messages
-
-        message_factory_fn: Callable[..., List[Message]] = _build_messages  # pyright: ignore[reportGeneralTypeIssues]
-        generation_result = await llm_api.generate(
-            llm_api.LLMServiceRequest(
-                task_name="utils",
-                request_type="memory.react",
-                message_factory=message_factory_fn,  # type: ignore[arg-type]
-                tool_options=tool_definitions,
-            )
-        )
-        success = generation_result.success
-        response = generation_result.completion.response
-        reasoning_content = generation_result.completion.reasoning
-        tool_calls = generation_result.completion.tool_calls
-
-        # logger.info(
-        # f"ReAct Agent 第 {iteration + 1} 次迭代 模型: {model_name} ，调用工具数量: {len(tool_calls) if tool_calls else 0} ，调用工具响应: {response}"
-        # )
-
-        if not success:
-            logger.error(f"ReAct Agent LLM调用失败: {response}")
-            break
-
-        # 注意：这里会检查return_information工具调用，如果检测到return_information工具，会根据information参数决定返回信息或退出查询
-
-        assistant_message: Optional[Message] = None
-        if tool_calls:
-            assistant_builder = MessageBuilder()
-            assistant_builder.set_role(RoleType.Assistant)
-            if response and response.strip():
-                assistant_builder.add_text_content(response)
-            assistant_builder.set_tool_calls(tool_calls)
-            assistant_message = assistant_builder.build()
-        elif response and response.strip():
-            assistant_builder = MessageBuilder()
-            assistant_builder.set_role(RoleType.Assistant)
-            assistant_builder.add_text_content(response)
-            assistant_message = assistant_builder.build()
-
-        # 记录思考步骤
-        step: Dict[str, Any] = {
-            "iteration": iteration + 1,
-            "thought": response,
-            "actions": [],
-            "observations": [],
-        }
-
-        if assistant_message:
-            conversation_messages.append(assistant_message)
-
-        # 记录思考过程到collected_info中
-        if reasoning_content or response:
-            thought_summary = reasoning_content or (response[:200] if response else "")
-            if thought_summary:
-                collected_info += f"\n[思考] {thought_summary}\n"
-
-        # 处理工具调用
-        if not tool_calls:
-            # 如果没有工具调用，检查响应文本中是否包含return_information函数调用格式或JSON格式
-            if response and response.strip():
-                # 首先尝试解析JSON格式的return_information
-                def parse_json_return_information(text: str):
-                    """从文本中解析JSON格式的return_information，返回information字符串，如果未找到则返回None"""
-                    if not text:
-                        return None, None
-
-                    try:
-                        # 尝试提取JSON对象（可能包含在代码块中或直接是JSON）
-                        json_text = text.strip()
-
-                        # 如果包含代码块标记，提取JSON部分
-                        if "```json" in json_text:
-                            start = json_text.find("```json") + 7
-                            end = json_text.find("```", start)
-                            if end != -1:
-                                json_text = json_text[start:end].strip()
-                        elif "```" in json_text:
-                            start = json_text.find("```") + 3
-                            end = json_text.find("```", start)
-                            if end != -1:
-                                json_text = json_text[start:end].strip()
-
-                        # 尝试解析JSON
-                        data = json.loads(json_text)
-
-                        # 检查是否包含return_information字段
-                        if isinstance(data, dict) and "return_information" in data:
-                            information = data.get("information", "")
-                            return information
-                    except (json.JSONDecodeError, ValueError, TypeError):
-                        # 如果JSON解析失败，尝试在文本中查找JSON对象
-                        with contextlib.suppress(json.JSONDecodeError, ValueError, TypeError):
-                            # 查找第一个 { 和最后一个 } 之间的内容（更健壮的JSON提取）
-                            first_brace = text.find("{")
-                            if first_brace != -1:
-                                # 从第一个 { 开始，找到匹配的 }
-                                brace_count = 0
-                                json_end = -1
-                                for i in range(first_brace, len(text)):
-                                    if text[i] == "{":
-                                        brace_count += 1
-                                    elif text[i] == "}":
-                                        brace_count -= 1
-                                        if brace_count == 0:
-                                            json_end = i + 1
-                                            break
-
-                                if json_end != -1:
-                                    json_text = text[first_brace:json_end]
-                                    data = json.loads(json_text)
-                                    if isinstance(data, dict) and "return_information" in data:
-                                        information = data.get("information", "")
-                                        return information
-
-                    return None
-
-                # 尝试从文本中解析return_information函数调用
-                def parse_return_information_from_text(text: str):
-                    """从文本中解析return_information函数调用，返回information字符串，如果未找到则返回None"""
-                    if not text:
-                        return None
-
-                    # 查找return_information函数调用位置（不区分大小写）
-                    func_pattern = "return_information"
-                    text_lower = text.lower()
-                    func_pos = text_lower.find(func_pattern)
-                    if func_pos == -1:
-                        return None
-
-                    # 解析information参数（字符串，使用extract_quoted_content）
-                    information = extract_quoted_content(text, "return_information", "information")
-
-                    # 如果information存在（即使是空字符串），也返回它
-                    return information
-
-                # 首先尝试解析JSON格式
-                parsed_information_json = parse_json_return_information(response)
-                is_json_format = parsed_information_json is not None
-
-                # 如果JSON解析成功，使用JSON结果
-                if is_json_format:
-                    parsed_information = parsed_information_json
-                else:
-                    # 如果JSON解析失败，尝试解析函数调用格式
-                    parsed_information = parse_return_information_from_text(response)
-
-                if parsed_information is not None or is_json_format:
-                    # 检测到return_information格式（可能是JSON格式或函数调用格式）
-                    format_type = "JSON格式" if is_json_format else "函数调用格式"
-                    # 返回信息（即使为空字符串也返回）
-                    step["actions"].append(
-                        {
-                            "action_type": "return_information",
-                            "action_params": {"information": parsed_information or ""},
-                        }
-                    )
-                    parsed_info_text = parsed_information if isinstance(parsed_information, str) else ""
-                    if parsed_info_text.strip():
-                        step["observations"] = [f"检测到return_information{format_type}调用，返回信息"]
-                        thinking_steps.append(step)
-                        logger.info(
-                            f"{react_log_prefix}第 {iteration + 1} 次迭代 通过return_information{format_type}返回信息: {parsed_info_text[:100]}..."
-                        )
-
-                        _log_conversation_messages(
-                            conversation_messages,
-                            head_prompt=first_head_prompt,
-                            final_status=f"返回信息：{parsed_info_text}",
-                        )
-
-                        return True, parsed_info_text, thinking_steps, False
-                    else:
-                        # 信息为空，直接退出查询
-                        step["observations"] = [f"检测到return_information{format_type}调用，信息为空"]
-                        thinking_steps.append(step)
-                        logger.info(
-                            f"{react_log_prefix}第 {iteration + 1} 次迭代 通过return_information{format_type}判断信息为空"
-                        )
-
-                        _log_conversation_messages(
-                            conversation_messages,
-                            head_prompt=first_head_prompt,
-                            final_status="信息为空：通过return_information文本格式判断信息为空",
-                        )
-
-                        return False, "", thinking_steps, False
-
-                # 如果没有检测到return_information格式，记录思考过程，继续下一轮迭代
-                step["observations"] = [f"思考完成，但未调用工具。响应: {response}"]
-                logger.info(f"{react_log_prefix}第 {iteration + 1} 次迭代 思考完成但未调用工具: {response}")
-                collected_info += f"思考: {response}"
-            else:
-                logger.warning(f"{react_log_prefix}第 {iteration + 1} 次迭代 无工具调用且无响应")
-                step["observations"] = ["无响应且无工具调用"]
-            thinking_steps.append(step)
-            iteration += 1  # 在continue之前增加迭代计数，避免跳过iteration += 1
-            continue
-
-        # 处理工具调用
-        # 首先检查是否有return_information工具调用，如果有则立即返回，不再处理其他工具
-        return_information_info = None
-        for tool_call in tool_calls:
-            tool_name = tool_call.func_name
-            tool_args = tool_call.args or {}
-
-            if tool_name == "return_information":
-                return_information_info = tool_args.get("information", "")
-
-                # 返回信息（即使为空也返回）
-                step["actions"].append(
-                    {
-                        "action_type": "return_information",
-                        "action_params": {"information": return_information_info},
-                    }
-                )
-                if return_information_info and return_information_info.strip():
-                    # 有信息，返回
-                    step["observations"] = ["检测到return_information工具调用，返回信息"]
-                    thinking_steps.append(step)
-                    logger.info(
-                        f"{react_log_prefix}第 {iteration + 1} 次迭代 通过return_information工具返回信息: {return_information_info}"
-                    )
-
-                    _log_conversation_messages(
-                        conversation_messages,
-                        head_prompt=first_head_prompt,
-                        final_status=f"返回信息：{return_information_info}",
-                    )
-
-                    return True, return_information_info, thinking_steps, False
-                else:
-                    # 信息为空，直接退出查询
-                    step["observations"] = ["检测到return_information工具调用，信息为空"]
-                    thinking_steps.append(step)
-                    logger.info(f"{react_log_prefix}第 {iteration + 1} 次迭代 通过return_information工具判断信息为空")
-
-                    _log_conversation_messages(
-                        conversation_messages,
-                        head_prompt=first_head_prompt,
-                        final_status="信息为空：通过return_information工具判断信息为空",
-                    )
-
-                    return False, "", thinking_steps, False
-
-        # 如果没有return_information工具调用，继续处理其他工具
-        tool_tasks = []
-        for i, tool_call in enumerate(tool_calls):
-            tool_name = tool_call.func_name
-            tool_args = tool_call.args or {}
-
-            logger.debug(
-                f"{react_log_prefix}第 {iteration + 1} 次迭代 工具调用 {i + 1}/{len(tool_calls)}: {tool_name}({tool_args})"
-            )
-
-            # 跳过return_information工具调用（已经在上面处理过了）
-            if tool_name == "return_information":
-                continue
-
-            # 记录最后一次使用的工具名称（用于判断是否需要额外迭代）
-            last_tool_name = tool_name
-
-            # 普通工具调用
-            tool = tool_registry.get_tool(tool_name)
-            if tool:
-                # 准备工具参数（需要添加chat_id如果工具需要）
-                import inspect
-
-                sig = inspect.signature(tool.execute_func)
-                tool_params = tool_args.copy()
-                if "chat_id" in sig.parameters:
-                    tool_params["chat_id"] = chat_id
-
-                # 创建异步任务
-                async def execute_single_tool(tool_instance, params, tool_name_str, iter_num):
-                    try:
-                        observation = await tool_instance.execute(**params)
-                        param_str = ", ".join([f"{k}={v}" for k, v in params.items() if k != "chat_id"])
-                        return f"查询{tool_name_str}({param_str})的结果：{observation}"
-                    except Exception as e:
-                        error_msg = f"工具执行失败: {str(e)}"
-                        logger.error(f"{react_log_prefix}第 {iter_num + 1} 次迭代 工具 {tool_name_str} {error_msg}")
-                        return f"查询{tool_name_str}失败: {error_msg}"
-
-                tool_tasks.append(execute_single_tool(tool, tool_params, tool_name, iteration))
-                step["actions"].append({"action_type": tool_name, "action_params": tool_args})
-            else:
-                error_msg = f"未知的工具类型: {tool_name}"
-                logger.warning(
-                    f"{react_log_prefix}第 {iteration + 1} 次迭代 工具 {i + 1}/{len(tool_calls)} {error_msg}"
-                )
-                tool_tasks.append(asyncio.create_task(asyncio.sleep(0, result=f"查询{tool_name}失败: {error_msg}")))
-
-        # 并行执行所有工具
-        if tool_tasks:
-            observations = await asyncio.gather(*tool_tasks, return_exceptions=True)
-
-            # 处理执行结果
-            for i, (tool_call_item, observation) in enumerate(zip(tool_calls, observations, strict=False)):
-                if isinstance(observation, Exception):
-                    observation = f"工具执行异常: {str(observation)}"
-                    logger.error(f"{react_log_prefix}第 {iteration + 1} 次迭代 工具 {i + 1} 执行异常: {observation}")
-
-                observation_text = observation if isinstance(observation, str) else str(observation)
-                stripped_observation = observation_text.strip()
-                step["observations"].append(observation_text)
-                collected_info += f"\n{observation_text}\n"
-                if stripped_observation:
-                    # 不再自动检测工具输出中的jargon，改为通过 query_words 工具主动查询
-                    tool_builder = MessageBuilder()
-                    tool_builder.set_role(RoleType.Tool)
-                    tool_builder.add_text_content(observation_text)
-                    tool_builder.add_tool_call(tool_call_item.call_id)
-                    conversation_messages.append(tool_builder.build())
-
-        thinking_steps.append(step)
-
-        # 检查是否需要额外迭代：如果最后一次使用的工具是 search_chat_history 且达到最大迭代次数，额外增加一回合
-        if iteration + 1 >= max_iterations and last_tool_name == "search_chat_history" and not is_timeout:
-            max_iterations_with_extra = max_iterations + 1
-            logger.info(
-                f"{react_log_prefix}达到最大迭代次数（已迭代{iteration + 1}次），最后一次使用工具为 search_chat_history，额外增加一回合尝试"
-            )
-
-        iteration += 1
-
-    # 正常迭代结束后，如果达到最大迭代次数或超时，执行最终评估
-    # 最终评估单独处理，不算在迭代中
-    should_do_final_evaluation = False
-    if is_timeout:
-        should_do_final_evaluation = True
-        logger.warning(f"{react_log_prefix}超时，已迭代{iteration}次，进入最终评估")
-    elif iteration >= max_iterations:
-        should_do_final_evaluation = True
-        logger.info(f"{react_log_prefix}达到最大迭代次数（已迭代{iteration}次），进入最终评估")
-
-    if should_do_final_evaluation:
-        # 获取必要变量用于最终评估
-        tool_registry = get_tool_registry()
-        bot_name = global_config.bot.nickname
-        time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-        current_iteration = iteration + 1
-        remaining_iterations = 0
-
-        # 提取函数调用中参数的值，支持单引号和双引号
-        def extract_quoted_content(text, func_name, param_name):
-            """从文本中提取函数调用中参数的值，支持单引号和双引号
-
-            Args:
-                text: 要搜索的文本
-                func_name: 函数名，如 'return_information'
-                param_name: 参数名，如 'information'
-
-            Returns:
-                提取的参数值，如果未找到则返回None
-            """
-            if not text:
-                return None
-
-            # 查找函数调用位置（不区分大小写）
-            func_pattern = func_name.lower()
-            text_lower = text.lower()
-            func_pos = text_lower.find(func_pattern)
-            if func_pos == -1:
-                return None
-
-            # 查找参数名和等号
-            param_pattern = f"{param_name}="
-            param_pos = text_lower.find(param_pattern, func_pos)
-            if param_pos == -1:
-                return None
-
-            # 跳过参数名、等号和空白
-            start_pos = param_pos + len(param_pattern)
-            while start_pos < len(text) and text[start_pos] in " \t\n":
-                start_pos += 1
-
-            if start_pos >= len(text):
-                return None
-
-            # 确定引号类型
-            quote_char = text[start_pos]
-            if quote_char not in ['"', "'"]:
-                return None
-
-            # 查找匹配的结束引号（考虑转义）
-            end_pos = start_pos + 1
-            while end_pos < len(text):
-                if text[end_pos] == quote_char:
-                    # 检查是否是转义的引号
-                    if end_pos > start_pos + 1 and text[end_pos - 1] == "\\":
-                        end_pos += 1
-                        continue
-                    # 找到匹配的引号
-                    content = text[start_pos + 1 : end_pos]
-                    # 处理转义字符
-                    content = content.replace('\\"', '"').replace("\\'", "'").replace("\\\\", "\\")
-                    return content
-                end_pos += 1
-
-            return None
-
-        # 执行最终评估
-        evaluation_prompt_template = prompt_manager.get_prompt("memory_retrieval_react_final")
-        evaluation_prompt_template.add_context("bot_name", bot_name)
-        evaluation_prompt_template.add_context("time_now", time_now)
-        evaluation_prompt_template.add_context("chat_history", chat_history)
-        evaluation_prompt_template.add_context("collected_info", collected_info or "暂无信息")
-        evaluation_prompt_template.add_context("current_iteration", str(current_iteration))
-        evaluation_prompt_template.add_context("remaining_iterations", str(remaining_iterations))
-        evaluation_prompt_template.add_context("max_iterations", str(max_iterations))
-        evaluation_prompt = await prompt_manager.render_prompt(evaluation_prompt_template)
-
-        evaluation_result = await llm_api.generate(
-            llm_api.LLMServiceRequest(
-                task_name="utils",
-                request_type="memory.react.final",
-                prompt=evaluation_prompt,
-                tool_options=[],
-            )
-        )
-        eval_success = evaluation_result.success
-        eval_response = evaluation_result.completion.response
-
-        if not eval_success:
-            logger.error(f"ReAct Agent 最终评估阶段 LLM调用失败: {eval_response}")
-            _log_conversation_messages(
-                conversation_messages,
-                head_prompt=first_head_prompt,
-                final_status="未找到答案：最终评估阶段LLM调用失败",
-            )
-            return False, "最终评估阶段LLM调用失败", thinking_steps, is_timeout
-
-        if global_config.debug.show_memory_prompt:
-            logger.info(f"{react_log_prefix}最终评估Prompt: {evaluation_prompt}")
-            logger.info(f"{react_log_prefix}最终评估响应: {eval_response}")
-
-        # 从最终评估响应中提取return_information
-        return_information_content = None
-
-        if eval_response:
-            return_information_content = extract_quoted_content(eval_response, "return_information", "information")
-
-        # 如果提取到信息，返回（无论是否超时，都视为成功完成）
-        if return_information_content is not None:
-            eval_step = {
-                "iteration": current_iteration,
-                "thought": f"[最终评估] {eval_response}",
-                "actions": [
-                    {"action_type": "return_information", "action_params": {"information": return_information_content}}
-                ],
-                "observations": ["最终评估阶段检测到return_information"],
-            }
-            thinking_steps.append(eval_step)
-            if return_information_content and return_information_content.strip():
-                logger.info(f"ReAct Agent 最终评估阶段返回信息: {return_information_content}")
-                _log_conversation_messages(
-                    conversation_messages,
-                    head_prompt=first_head_prompt,
-                    final_status=f"返回信息：{return_information_content}",
-                )
-                return True, return_information_content, thinking_steps, False
-            else:
-                logger.info("ReAct Agent 最终评估阶段判断信息为空")
-                _log_conversation_messages(
-                    conversation_messages,
-                    head_prompt=first_head_prompt,
-                    final_status="信息为空：最终评估阶段判断信息为空",
-                )
-                return False, "", thinking_steps, False
-
-        # 如果没有明确判断，视为not_enough_info，返回空字符串（不返回任何信息）
-        eval_step = {
-            "iteration": current_iteration,
-            "thought": f"[最终评估] {eval_response}",
-            "actions": [{"action_type": "return_information", "action_params": {"information": ""}}],
-            "observations": ["已到达最大迭代次数，信息为空"],
-        }
-        thinking_steps.append(eval_step)
-        logger.info("ReAct Agent 已到达最大迭代次数，信息为空")
-
-        _log_conversation_messages(
-            conversation_messages,
-            head_prompt=first_head_prompt,
-            final_status="未找到答案：已到达最大迭代次数，无法找到答案",
-        )
-
-        return False, "", thinking_steps, is_timeout
-
-    # 如果正常迭代过程中提前找到答案返回，不会到达这里
-    # 如果正常迭代结束但没有触发最终评估（理论上不应该发生），直接返回
-    logger.warning("ReAct Agent正常迭代结束，但未触发最终评估")
-    _log_conversation_messages(
-        conversation_messages,
-        head_prompt=first_head_prompt,
-        final_status="未找到答案：正常迭代结束",
-    )
-
-    return False, "", thinking_steps, is_timeout
-
-
-def _get_recent_query_history(chat_id: str, time_window_seconds: float = 600.0) -> str:
-    """获取最近一段时间内的查询历史（用于避免重复查询）
-
-    Args:
-        chat_id: 聊天ID
-        time_window_seconds: 时间窗口（秒），默认10分钟
-
-    Returns:
-        str: 格式化的查询历史字符串
-    """
-    try:
-        _current_time = time.time()
-
-        with get_db_session() as session:
-            statement = (
-                select(ThinkingQuestion)
-                .where(col(ThinkingQuestion.context) == chat_id)
-                .order_by(col(ThinkingQuestion.updated_timestamp).desc())
-                .limit(5)
-            )
-            records = session.exec(statement).all()
-
-        if not records:
-            return ""
-
-        history_lines = ["最近已查询的问题和结果："]
-
-        for record in records:
-            status = "✓ 已找到答案" if record.found_answer else "✗ 未找到答案"
-            answer_preview = ""
-            # 只有找到答案时才显示答案内容
-            if record.found_answer and record.answer:
-                # 截取答案前100字符
-                answer_preview = record.answer[:100]
-                if len(record.answer) > 100:
-                    answer_preview += "..."
-
-            history_lines.extend([f"- 问题：{record.question}", f"  状态：{status}"])
-            if answer_preview:
-                history_lines.append(f"  答案：{answer_preview}")
-            history_lines.append("")  # 空行分隔
-
-        return "\n".join(history_lines)
-
-    except Exception as e:
-        logger.error(f"获取查询历史失败: {e}")
-        return ""
-
-
-def _get_recent_found_answers(chat_id: str, time_window_seconds: float = 600.0) -> List[str]:
-    """获取最近一段时间内已找到答案的查询记录（用于返回给 replyer）
-
-    Args:
-        chat_id: 聊天ID
-        time_window_seconds: 时间窗口（秒），默认10分钟
-
-    Returns:
-        List[str]: 格式化的答案列表，每个元素格式为 "问题：xxx\n答案：xxx"
-    """
-    try:
-        _current_time = time.time()
-
-        # 查询最近时间窗口内已找到答案的记录，按更新时间倒序
-        with get_db_session() as session:
-            statement = (
-                select(ThinkingQuestion)
-                .where(col(ThinkingQuestion.context) == chat_id)
-                .where(col(ThinkingQuestion.found_answer))
-                .where(col(ThinkingQuestion.answer).is_not(None))
-                .where(col(ThinkingQuestion.answer) != "")
-                .order_by(col(ThinkingQuestion.updated_timestamp).desc())
-                .limit(3)
-            )
-            records = session.exec(statement).all()
-
-        if not records:
-            return []
-
-        return [f"问题：{record.question}\n答案：{record.answer}" for record in records if record.answer]
-
-    except Exception as e:
-        logger.error(f"获取最近已找到答案的记录失败: {e}")
-        return []
-
-
-def _store_thinking_back(
-    chat_id: str, question: str, context: str, found_answer: bool, answer: str, thinking_steps: List[Dict[str, Any]]
-) -> None:
-    """存储或更新思考过程到数据库（如果已存在则更新，否则创建）
-
-    Args:
-        chat_id: 聊天ID
-        question: 问题
-        context: 上下文信息
-        found_answer: 是否找到答案
-        answer: 答案内容
-        thinking_steps: 思考步骤列表
-    """
-    try:
-        now = time.time()
-
-        # 先查询是否已存在相同chat_id和问题的记录
-        with get_db_session() as session:
-            statement = (
-                select(ThinkingQuestion)
-                .where(col(ThinkingQuestion.context) == chat_id)
-                .where(col(ThinkingQuestion.question) == question)
-                .order_by(col(ThinkingQuestion.updated_timestamp).desc())
-                .limit(1)
-            )
-            if record := session.exec(statement).first():
-                record.context = context
-                record.found_answer = found_answer
-                record.answer = answer
-                record.thinking_steps = json.dumps(thinking_steps, ensure_ascii=False)
-                record.updated_timestamp = datetime.fromtimestamp(now)
-                session.add(record)
-                logger.info(f"已更新思考过程到数据库，问题: {question[:50]}...")
-                return
-
-            new_record = ThinkingQuestion(
-                question=question,
-                context=chat_id,
-                found_answer=found_answer,
-                answer=answer,
-                thinking_steps=json.dumps(thinking_steps, ensure_ascii=False),
-                created_timestamp=datetime.fromtimestamp(now),
-                updated_timestamp=datetime.fromtimestamp(now),
-            )
-            session.add(new_record)
-    except Exception as e:
-        logger.error(f"存储思考过程失败: {e}")
-
-
-async def _process_memory_retrieval(
-    chat_id: str,
-    context: str,
-    initial_info: str = "",
-    max_iterations: Optional[int] = None,
-    chat_history: str = "",
-) -> Optional[str]:
-    """处理记忆检索
-
-    Args:
-        chat_id: 聊天ID
-        context: 上下文信息
-        initial_info: 初始信息，将传递给ReAct Agent
-        max_iterations: 最大迭代次数
-        chat_history: 聊天记录，将传递给 ReAct Agent
-
-    Returns:
-        Optional[str]: 如果找到答案，返回答案内容，否则返回None
-    """
-    _cleanup_stale_not_found_thinking_back()
-
-    question_initial_info = initial_info or ""
-
-    # 直接使用ReAct Agent进行记忆检索
-    # 如果未指定max_iterations，使用配置的默认值
-    if max_iterations is None:
-        max_iterations = global_config.memory.max_agent_iterations
-
-    found_answer, answer, thinking_steps, is_timeout = await _react_agent_solve_question(
-        chat_id=chat_id,
-        max_iterations=max_iterations,
-        timeout=global_config.memory.agent_timeout_seconds,
-        initial_info=question_initial_info,
-        chat_history=chat_history,
-    )
-
-    # 不再存储到数据库，直接返回答案
-    if is_timeout:
-        logger.info("ReAct Agent超时，不返回结果")
-
-    return answer if found_answer and answer else None
-
-
-async def build_memory_retrieval_prompt(
-    message: str,
-    sender: str,
-    target: str,
-    chat_stream,
-    think_level: int = 1,
-    unknown_words: Optional[List[str]] = None,
-) -> str:
-    """构建记忆检索提示
-    Args:
-        message: 聊天历史记录
-        sender: 发送者名称
-        target: 目标消息内容
-        chat_stream: 聊天流对象
-        think_level: 思考深度等级
-        unknown_words: Planner 提供的未知词语列表，优先使用此列表而不是从聊天记录匹配
-
-    Returns:
-        str: 记忆检索结果字符串
-    """
-    start_time = time.time()
-
-    # 构造日志前缀：[聊天流名称]，用于在日志中标识聊天流（优先群名称/用户昵称）
-    try:
-        group_info = chat_stream.group_info
-        user_info = chat_stream.user_info
-        # 群聊优先使用群名称
-        if group_info is not None and getattr(group_info, "group_name", None):
-            stream_name = group_info.group_name.strip() or str(group_info.group_id)
-        # 私聊使用用户昵称
-        elif user_info is not None and getattr(user_info, "user_nickname", None):
-            stream_name = user_info.user_nickname.strip() or str(user_info.user_id)
-        # 兜底使用 stream_id
-        else:
-            stream_name = chat_stream.stream_id
-    except Exception:
-        stream_name = chat_stream.stream_id
-    log_prefix = f"[{stream_name}] " if stream_name else ""
-
-    logger.info(f"{log_prefix}检测是否需要回忆，元消息：{message[:30]}...，消息长度: {len(message)}")
-    try:
-        chat_id = chat_stream.stream_id
-
-        # 初始阶段：使用 Planner 提供的 unknown_words 进行检索（如果提供）
-        initial_info = ""
-        if unknown_words and len(unknown_words) > 0:
-            # 清理和去重 unknown_words
-            cleaned_concepts = []
-            for word in unknown_words:
-                if isinstance(word, str):
-                    if cleaned := word.strip():
-                        cleaned_concepts.append(cleaned)
-            if cleaned_concepts:
-                # 对匹配到的概念进行jargon检索，作为初始信息
-                concept_info = await retrieve_concepts_with_jargon(cleaned_concepts, chat_id)
-                if concept_info:
-                    initial_info += concept_info
-                    logger.info(
-                        f"{log_prefix}使用 Planner 提供的 unknown_words，共 {len(cleaned_concepts)} 个概念，检索结果: {concept_info[:100]}..."
-                    )
-                else:
-                    logger.debug(f"{log_prefix}unknown_words 检索未找到任何结果")
-
-        # 直接使用 ReAct Agent 进行记忆检索（跳过问题生成步骤）
-        base_max_iterations = global_config.memory.max_agent_iterations
-        # 根据think_level调整迭代次数：think_level=1时不变，think_level=0时减半
-        if think_level == 0:
-            max_iterations = max(1, base_max_iterations // 2)  # 至少为1
-        else:
-            max_iterations = base_max_iterations
-        timeout_seconds = global_config.memory.agent_timeout_seconds
-        logger.debug(
-            f"{log_prefix}直接使用 ReAct Agent 进行记忆检索，think_level={think_level}，设置最大迭代次数: {max_iterations}（基础值: {base_max_iterations}），超时时间: {timeout_seconds}秒"
-        )
-
-        # 直接调用 ReAct Agent 处理记忆检索
-        try:
-            result = await _process_memory_retrieval(
-                chat_id=chat_id,
-                context=message,
-                initial_info=initial_info,
-                max_iterations=max_iterations,
-                chat_history=message,
-            )
-        except Exception as e:
-            logger.error(f"{log_prefix}处理记忆检索时发生异常: {e}")
-            result = None
-
-        end_time = time.time()
-
-        if result:
-            logger.info(f"{log_prefix}记忆检索成功，耗时: {(end_time - start_time):.3f}秒")
-            return f"你回忆起了以下信息：\n{result}\n如果与回复内容相关，可以参考这些回忆的信息。\n"
-        else:
-            logger.debug(f"{log_prefix}记忆检索未找到相关信息")
-            return ""
-
-    except Exception as e:
-        logger.error(f"{log_prefix}记忆检索时发生异常: {str(e)}")
-        return ""
diff --git a/src/memory_system/memory_utils.py b/src/memory_system/memory_utils.py
deleted file mode 100644
index 9886142c..00000000
--- a/src/memory_system/memory_utils.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-记忆系统工具函数
-包含模糊查找、相似度计算等工具函数
-"""
-
-import json
-import re
-from datetime import datetime
-from typing import Tuple
-from typing import List
-from json_repair import repair_json
-
-from src.common.logger import get_logger
-
-
-logger = get_logger("memory_utils")
-
-
-def parse_questions_json(response: str) -> Tuple[List[str], List[str]]:
-    """解析问题JSON，返回概念列表和问题列表
-
-    Args:
-        response: LLM返回的响应
-
-    Returns:
-        Tuple[List[str], List[str]]: (概念列表, 问题列表)
-    """
-    try:
-        # 尝试提取JSON（可能包含在```json代码块中）
-        json_pattern = r"```json\s*(.*?)\s*```"
-        matches = re.findall(json_pattern, response, re.DOTALL)
-
-        if matches:
-            json_str = matches[0]
-        else:
-            # 尝试直接解析整个响应
-            json_str = response.strip()
-
-        # 修复可能的JSON错误
-        repaired_json = repair_json(json_str)
-
-        # 解析JSON
-        parsed = json.loads(repaired_json)
-
-        # 只支持新格式：包含concepts和questions的对象
-        if not isinstance(parsed, dict):
-            logger.warning(f"解析的JSON不是对象格式: {parsed}")
-            return [], []
-
-        concepts_raw = parsed.get("concepts", [])
-        questions_raw = parsed.get("questions", [])
-
-        # 确保是列表
-        if not isinstance(concepts_raw, list):
-            concepts_raw = []
-        if not isinstance(questions_raw, list):
-            questions_raw = []
-
-        # 确保所有元素都是字符串
-        concepts = [c for c in concepts_raw if isinstance(c, str) and c.strip()]
-        questions = [q for q in questions_raw if isinstance(q, str) and q.strip()]
-
-        return concepts, questions
-
-    except Exception as e:
-        logger.error(f"解析问题JSON失败: {e}, 响应内容: {response[:200]}...")
-        return [], []
-
-
-def parse_datetime_to_timestamp(value: str) -> float:
-    """
-    接受多种常见格式并转换为时间戳（秒）
-    支持示例：
-    - 2025-09-29
-    - 2025-09-29 00:00:00
-    - 2025/09/29 00:00
-    - 2025-09-29T00:00:00
-    """
-    value = value.strip()
-    fmts = [
-        "%Y-%m-%d %H:%M:%S",
-        "%Y-%m-%d %H:%M",
-        "%Y/%m/%d %H:%M:%S",
-        "%Y/%m/%d %H:%M",
-        "%Y-%m-%d",
-        "%Y/%m/%d",
-        "%Y-%m-%dT%H:%M:%S",
-        "%Y-%m-%dT%H:%M",
-    ]
-    last_err = None
-    for fmt in fmts:
-        try:
-            dt = datetime.strptime(value, fmt)
-            return dt.timestamp()
-        except Exception as e:
-            last_err = e
-    raise ValueError(f"无法解析时间: {value} ({last_err})")
diff --git a/src/memory_system/retrieval_tools/__init__.py b/src/memory_system/retrieval_tools/__init__.py
deleted file mode 100644
index 9f2673b2..00000000
--- a/src/memory_system/retrieval_tools/__init__.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"""
-记忆检索工具模块
-提供统一的工具注册和管理系统
-"""
-
-from .tool_registry import (
-    MemoryRetrievalTool,
-    MemoryRetrievalToolRegistry,
-    register_memory_retrieval_tool,
-    get_tool_registry,
-)
-
-# 导入所有工具的注册函数
-from .query_lpmm_knowledge import register_tool as register_lpmm_knowledge
-from .query_words import register_tool as register_query_words
-from .return_information import register_tool as register_return_information
-from src.config.config import global_config
-
-
-def init_all_tools():
-    """初始化并注册所有记忆检索工具"""
-    register_query_words()
-    register_return_information()
-
-    # LPMM知识库检索工具
-    if global_config.lpmm_knowledge.lpmm_mode == "agent":
-        register_lpmm_knowledge()
-
-
-__all__ = [
-    "MemoryRetrievalTool",
-    "MemoryRetrievalToolRegistry",
-    "register_memory_retrieval_tool",
-    "get_tool_registry",
-    "init_all_tools",
-]
diff --git a/src/memory_system/retrieval_tools/query_lpmm_knowledge.py b/src/memory_system/retrieval_tools/query_lpmm_knowledge.py
deleted file mode 100644
index eed01af1..00000000
--- a/src/memory_system/retrieval_tools/query_lpmm_knowledge.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-通过LPMM知识库查询信息 - 工具实现
-"""
-
-from src.common.logger import get_logger
-from src.config.config import global_config
-from src.chat.knowledge import get_qa_manager
-from .tool_registry import register_memory_retrieval_tool
-
-logger = get_logger("memory_retrieval_tools")
-
-
-async def query_lpmm_knowledge(query: str, limit: int = 5) -> str:
-    """在LPMM知识库中查询相关信息
-
-    Args:
-        query: 查询关键词
-
-    Returns:
-        str: 查询结果
-    """
-    try:
-        content = str(query).strip()
-        if not content:
-            return "查询关键词为空"
-
-        try:
-            limit_value = int(limit)
-        except (TypeError, ValueError):
-            limit_value = 5
-        limit_value = max(1, limit_value)
-
-        if not global_config.lpmm_knowledge.enable:
-            logger.debug("LPMM知识库未启用")
-            return "LPMM知识库未启用"
-
-        qa_manager = get_qa_manager()
-        if qa_manager is None:
-            logger.debug("LPMM知识库未初始化，跳过查询")
-            return "LPMM知识库未初始化"
-
-        knowledge_info = await qa_manager.get_knowledge(content, limit=limit_value)
-        logger.debug(f"LPMM知识库查询结果: {knowledge_info}")
-
-        if knowledge_info:
-            return f"你从LPMM知识库中找到以下信息：\n{knowledge_info}"
-
-        return f"在LPMM知识库中未找到与“{content}”相关的信息"
-
-    except Exception as e:
-        logger.error(f"LPMM知识库查询失败: {e}")
-        return f"LPMM知识库查询失败：{str(e)}"
-
-
-def register_tool():
-    """注册LPMM知识库查询工具"""
-    register_memory_retrieval_tool(
-        name="lpmm_search_knowledge",
-        description="从知识库中搜索相关信息，适用于需要知识支持的场景。使用自然语言问句检索",
-        parameters=[
-            {
-                "name": "query",
-                "type": "string",
-                "description": "需要查询的问题，使用一句疑问句提问，例如：什么是AI？",
-                "required": True,
-            },
-            {
-                "name": "limit",
-                "type": "integer",
-                "description": "希望返回的相关知识条数，默认为5",
-                "required": False,
-            },
-        ],
-        execute_func=query_lpmm_knowledge,
-    )
diff --git a/src/memory_system/retrieval_tools/query_words.py b/src/memory_system/retrieval_tools/query_words.py
deleted file mode 100644
index ee28b934..00000000
--- a/src/memory_system/retrieval_tools/query_words.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""
-查询黑话/概念含义 - 工具实现
-用于在记忆检索过程中主动查询未知词语或黑话的含义
-"""
-
-from src.common.logger import get_logger
-from src.learners.jargon_explainer_old import retrieve_concepts_with_jargon
-from .tool_registry import register_memory_retrieval_tool
-
-logger = get_logger("memory_retrieval_tools")
-
-
-async def query_words(chat_id: str, words: str) -> str:
-    """查询词语或黑话的含义
-
-    Args:
-        chat_id: 聊天ID
-        words: 要查询的词语，可以是单个词语或多个词语（用逗号、空格等分隔）
-
-    Returns:
-        str: 查询结果，包含词语的含义解释
-    """
-    try:
-        if not words or not words.strip():
-            return "未提供要查询的词语"
-
-        # 解析词语列表（支持逗号、空格等分隔符）
-        words_list = []
-        for separator in [",", "，", " ", "\n", "\t"]:
-            if separator in words:
-                words_list = [w.strip() for w in words.split(separator) if w.strip()]
-                break
-
-        # 如果没有找到分隔符，整个字符串作为一个词语
-        if not words_list:
-            words_list = [words.strip()]
-
-        # 去重
-        unique_words = []
-        seen = set()
-        for word in words_list:
-            if word and word not in seen:
-                unique_words.append(word)
-                seen.add(word)
-
-        if not unique_words:
-            return "未提供有效的词语"
-
-        logger.info(f"查询词语含义: {unique_words}")
-
-        # 调用检索函数
-        result = await retrieve_concepts_with_jargon(unique_words, chat_id)
-
-        if result:
-            return result
-        else:
-            return f"未找到词语 '{', '.join(unique_words)}' 的含义或黑话解释"
-
-    except Exception as e:
-        logger.error(f"查询词语含义失败: {e}")
-        return f"查询失败: {str(e)}"
-
-
-def register_tool():
-    """注册工具"""
-    register_memory_retrieval_tool(
-        name="query_words",
-        description="查询词语或黑话的含义。当遇到不熟悉的词语、缩写、黑话或网络用语时，可以使用此工具查询其含义。支持查询单个或多个词语（用逗号、空格等分隔）。",
-        parameters=[
-            {
-                "name": "words",
-                "type": "string",
-                "description": "要查询的词语，可以是单个词语或多个词语（用逗号、空格等分隔，如：'YYDS' 或 'YYDS,内卷,996'）",
-                "required": True,
-            },
-        ],
-        execute_func=query_words,
-    )
diff --git a/src/memory_system/retrieval_tools/return_information.py b/src/memory_system/retrieval_tools/return_information.py
deleted file mode 100644
index bf368083..00000000
--- a/src/memory_system/retrieval_tools/return_information.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-return_information工具 - 用于在记忆检索过程中返回总结信息并结束查询
-"""
-
-from src.common.logger import get_logger
-from .tool_registry import register_memory_retrieval_tool
-
-logger = get_logger("memory_retrieval_tools")
-
-
-async def return_information(information: str) -> str:
-    """返回总结信息并结束查询
-
-    Args:
-        information: 基于已收集信息总结出的相关信息，用于帮助回复。如果收集的信息对当前聊天没有帮助，可以返回空字符串。
-
-    Returns:
-        str: 确认信息
-    """
-    if information and information.strip():
-        logger.info(f"返回总结信息: {information}")
-        return f"已确认返回信息: {information}"
-    else:
-        logger.info("未收集到相关信息，结束查询")
-        return "未收集到相关信息，查询结束"
-
-
-def register_tool():
-    """注册return_information工具"""
-    register_memory_retrieval_tool(
-        name="return_information",
-        description="当你决定结束查询时，调用此工具。基于已收集的信息，总结出一段相关信息用于帮助回复。如果收集的信息对当前聊天有帮助，在information参数中提供总结信息；如果信息无关或没有帮助，可以提供空字符串。",
-        parameters=[
-            {
-                "name": "information",
-                "type": "string",
-                "description": "基于已收集信息总结出的相关信息，用于帮助回复。必须基于已收集的信息，不要编造。如果信息对当前聊天没有帮助，可以返回空字符串。",
-                "required": True,
-            },
-        ],
-        execute_func=return_information,
-    )
diff --git a/src/memory_system/retrieval_tools/tool_registry.py b/src/memory_system/retrieval_tools/tool_registry.py
deleted file mode 100644
index f2dd1f0d..00000000
--- a/src/memory_system/retrieval_tools/tool_registry.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""工具注册系统。
-
-提供统一的工具注册和管理接口。
-"""
-
-from typing import Any, Awaitable, Callable, Dict, List, Optional
-
-from src.common.logger import get_logger
-from src.llm_models.payload_content.tool_option import ToolParamType, normalize_tool_option
-
-logger = get_logger("memory_retrieval_tools")
-
-
-class MemoryRetrievalTool:
-    """记忆检索工具基类"""
-
-    def __init__(
-        self,
-        name: str,
-        description: str,
-        parameters: List[Dict[str, Any]],
-        execute_func: Callable[..., Awaitable[str]],
-    ) -> None:
-        """初始化工具。
-
-        Args:
-            name: 工具名称。
-            description: 工具描述。
-            parameters: 参数定义列表。
-            execute_func: 执行函数，必须是异步函数。
-        """
-        self.name = name
-        self.description = description
-        self.parameters = parameters
-        self.execute_func = execute_func
-
-    def get_tool_description(self) -> str:
-        """获取工具的文本描述，用于prompt"""
-        param_descriptions = []
-        for param in self.parameters:
-            param_name = param.get("name", "")
-            param_type = param.get("type", "string")
-            param_desc = param.get("description", "")
-            required = param.get("required", True)
-            required_str = "必填" if required else "可选"
-            param_descriptions.append(f"   - {param_name} ({param_type}, {required_str}): {param_desc}")
-
-        params_str = "\n".join(param_descriptions) if param_descriptions else "   无参数"
-        return f"{self.name}({', '.join([p['name'] for p in self.parameters])}): {self.description}\n{params_str}"
-
-    async def execute(self, **kwargs: Any) -> str:
-        """执行工具。"""
-        return await self.execute_func(**kwargs)
-
-    def get_tool_definition(self) -> Dict[str, Any]:
-        """获取规范化的工具定义。
-
-        Returns:
-            Dict[str, Any]: 统一工具定义字典。
-        """
-        legacy_parameters: list[tuple[str, ToolParamType, str, bool, list[str] | None]] = []
-
-        for param in self.parameters:
-            param_name = param.get("name", "")
-            param_type_str = param.get("type", "string").lower()
-            param_desc = param.get("description", "")
-            is_required = param.get("required", False)
-            enum_values = param.get("enum", None)
-
-            # 转换类型字符串到ToolParamType
-            type_mapping = {
-                "string": ToolParamType.STRING,
-                "integer": ToolParamType.INTEGER,
-                "int": ToolParamType.INTEGER,
-                "float": ToolParamType.FLOAT,
-                "boolean": ToolParamType.BOOLEAN,
-                "bool": ToolParamType.BOOLEAN,
-            }
-            param_type = type_mapping.get(param_type_str, ToolParamType.STRING)
-
-            legacy_parameters.append((param_name, param_type, param_desc, is_required, enum_values))
-
-        normalized_option = normalize_tool_option(
-            {
-                "name": self.name,
-                "description": self.description,
-                "parameters": legacy_parameters,
-            }
-        )
-        return {
-            "name": normalized_option.name,
-            "description": normalized_option.description,
-            "parameters_schema": normalized_option.parameters_schema,
-        }
-
-
-class MemoryRetrievalToolRegistry:
-    """工具注册器"""
-
-    def __init__(self) -> None:
-        """初始化工具注册器。"""
-        self.tools: Dict[str, MemoryRetrievalTool] = {}
-
-    def register_tool(self, tool: MemoryRetrievalTool) -> None:
-        """注册工具"""
-        if tool.name in self.tools:
-            logger.debug(f"记忆检索工具 {tool.name} 已存在，跳过重复注册")
-            return
-        self.tools[tool.name] = tool
-        logger.info(f"注册记忆检索工具: {tool.name}")
-
-    def get_tool(self, name: str) -> Optional[MemoryRetrievalTool]:
-        """获取工具"""
-        return self.tools.get(name)
-
-    def get_all_tools(self) -> Dict[str, MemoryRetrievalTool]:
-        """获取所有工具"""
-        return self.tools.copy()
-
-    def get_tools_description(self) -> str:
-        """获取所有工具的描述，用于prompt"""
-        descriptions = []
-        for i, tool in enumerate(self.tools.values(), 1):
-            descriptions.append(f"{i}. {tool.get_tool_description()}")
-        return "\n".join(descriptions)
-
-    def get_action_types_list(self) -> str:
-        """获取所有动作类型的列表，用于prompt（已废弃，保留用于兼容）"""
-        action_types = [tool.name for tool in self.tools.values()]
-        action_types.append("final_answer")
-        action_types.append("no_answer")
-        return " 或 ".join([f'"{at}"' for at in action_types])
-
-    def get_tool_definitions(self) -> List[Dict[str, Any]]:
-        """获取所有工具的定义列表，用于LLM function calling
-
-        Returns:
-            List[Dict[str, Any]]: 工具定义列表，每个元素是一个工具定义字典
-        """
-        return [tool.get_tool_definition() for tool in self.tools.values()]
-
-
-# 全局工具注册器实例
-_tool_registry = MemoryRetrievalToolRegistry()
-
-
-def register_memory_retrieval_tool(
-    name: str,
-    description: str,
-    parameters: List[Dict[str, Any]],
-    execute_func: Callable[..., Awaitable[str]],
-) -> None:
-    """注册记忆检索工具的便捷函数。
-
-    Args:
-        name: 工具名称。
-        description: 工具描述。
-        parameters: 参数定义列表。
-        execute_func: 执行函数。
-    """
-    tool = MemoryRetrievalTool(name, description, parameters, execute_func)
-    _tool_registry.register_tool(tool)
-
-
-def get_tool_registry() -> MemoryRetrievalToolRegistry:
-    """获取工具注册器实例"""
-    return _tool_registry