better:优化分割,优化表达使用,优化Planner选择和联动,优化记忆总结,优化回复Log

This commit is contained in:
SengokuCola
2025-12-18 10:52:58 +08:00
parent 3ea775af92
commit 1e159213cf
9 changed files with 252 additions and 58 deletions

View File

@@ -15,12 +15,15 @@ from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
from src.chat.utils.chat_message_builder import (
build_readable_messages_with_id,
get_raw_msg_before_timestamp_with_chat,
replace_user_references,
)
from src.chat.utils.utils import get_chat_type_and_target_info
from src.chat.planner_actions.action_manager import ActionManager
from src.chat.message_receive.chat_stream import get_chat_manager
from src.plugin_system.base.component_types import ActionInfo, ComponentType, ActionActivationType
from src.plugin_system.core.component_registry import component_registry
from src.plugin_system.apis.message_api import translate_pid_to_description
from src.person_info.person_info import Person
if TYPE_CHECKING:
from src.common.data_models.info_data_model import TargetPersonInfo
@@ -68,7 +71,8 @@ no_reply
{moderation_prompt}
target_message_id为必填表示触发消息的id
请选择所有符合使用要求的action动作用json格式输出用```json包裹如果输出多个json每个json都要单独一行放在同一个```json代码块内:
请选择所有符合使用要求的action每个动作最多选择一次,但是可以选择多个动作;
动作用json格式输出用```json包裹如果输出多个json每个json都要单独一行放在同一个```json代码块内:
**示例**
// 理由文本(简短)
```json
@@ -155,11 +159,41 @@ class ActionPlanner:
logger.warning(f"{self.log_prefix}planner理由引用 {msg_id} 未找到对应消息,保持原样")
return msg_id
msg_text = (message.processed_plain_text or message.display_message or "").strip()
msg_text = (message.processed_plain_text or "").strip()
if not msg_text:
logger.warning(f"{self.log_prefix}planner理由引用 {msg_id} 的消息内容为空,保持原样")
return msg_id
# 替换 [picid:xxx] 为 [图片:描述]
pic_pattern = r"\[picid:([^\]]+)\]"
def replace_pic_id(pic_match: re.Match) -> str:
pic_id = pic_match.group(1)
description = translate_pid_to_description(pic_id)
return f"[图片:{description}]"
msg_text = re.sub(pic_pattern, replace_pic_id, msg_text)
# 替换用户引用格式:回复<aaa:bbb> 和 @<aaa:bbb>
platform = getattr(message, "user_info", None) and message.user_info.platform or getattr(message, "chat_info", None) and message.chat_info.platform or "qq"
msg_text = replace_user_references(msg_text, platform, replace_bot_name=True)
# 替换单独的 <用户名:用户ID> 格式replace_user_references 已处理回复<和@<格式)
# 匹配所有 <aaa:bbb> 格式,由于 replace_user_references 已经替换了回复<和@<格式,
# 这里匹配到的应该都是单独的格式
user_ref_pattern = r"<([^:<>]+):([^:<>]+)>"
def replace_user_ref(user_match: re.Match) -> str:
user_name = user_match.group(1)
user_id = user_match.group(2)
try:
# 检查是否是机器人自己
if user_id == global_config.bot.qq_account:
return f"{global_config.bot.nickname}(你)"
person = Person(platform=platform, user_id=user_id)
return person.person_name or user_name
except Exception:
# 如果解析失败,使用原始昵称
return user_name
msg_text = re.sub(user_ref_pattern, replace_user_ref, msg_text)
preview = msg_text if len(msg_text) <= 100 else f"{msg_text[:97]}..."
logger.info(f"{self.log_prefix}planner理由引用 {msg_id} -> 消息({preview}")
return f"消息({msg_text}"

View File

@@ -98,8 +98,10 @@ class DefaultReplyer:
available_actions = {}
try:
# 3. 构建 Prompt
timing_logs = []
almost_zero_str = ""
with Timer("构建Prompt", {}): # 内部计时器,可选保留
prompt, selected_expressions = await self.build_prompt_reply_context(
prompt, selected_expressions, timing_logs, almost_zero_str = await self.build_prompt_reply_context(
extra_info=extra_info,
available_actions=available_actions,
chosen_actions=chosen_actions,
@@ -136,9 +138,22 @@ class DefaultReplyer:
content, reasoning_content, model_name, tool_call = await self.llm_generate_content(prompt)
# logger.debug(f"replyer生成内容: {content}")
logger.info(f"模型: [{model_name}][思考等级:{think_level}]生成内容: {content}")
if global_config.debug.show_replyer_reasoning and reasoning_content:
logger.info(f"模型: [{model_name}][思考等级:{think_level}]生成推理:\n{reasoning_content}")
# 统一输出所有日志信息使用try-except确保即使某个步骤出错也能输出
try:
# 1. 输出回复准备日志
timing_log_str = f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s" if timing_logs or almost_zero_str else "回复准备: 无计时信息"
logger.info(timing_log_str)
# 2. 输出Prompt日志
if global_config.debug.show_replyer_prompt:
logger.info(f"\n{prompt}\n")
else:
logger.debug(f"\nreplyer_Prompt:{prompt}\n")
# 3. 输出模型生成内容和推理日志
logger.info(f"模型: [{model_name}][思考等级:{think_level}]生成内容: {content}")
if global_config.debug.show_replyer_reasoning and reasoning_content:
logger.info(f"模型: [{model_name}][思考等级:{think_level}]生成推理:\n{reasoning_content}")
except Exception as e:
logger.warning(f"输出日志时出错: {e}")
llm_response.content = content
llm_response.reasoning = reasoning_content
@@ -162,6 +177,21 @@ class DefaultReplyer:
except Exception as llm_e:
# 精简报错信息
logger.error(f"LLM 生成失败: {llm_e}")
# 即使LLM生成失败也尝试输出已收集的日志信息
try:
# 1. 输出回复准备日志
timing_log_str = f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s" if timing_logs or almost_zero_str else "回复准备: 无计时信息"
logger.info(timing_log_str)
# 2. 输出Prompt日志
if global_config.debug.show_replyer_prompt:
logger.info(f"\n{prompt}\n")
else:
logger.debug(f"\nreplyer_Prompt:{prompt}\n")
# 3. 输出模型生成失败信息
logger.info("模型生成失败,无法输出生成内容和推理")
except Exception as log_e:
logger.warning(f"输出日志时出错: {log_e}")
return False, llm_response # LLM 调用失败则无法生成回复
return True, llm_response
@@ -705,7 +735,7 @@ class DefaultReplyer:
enable_tool: bool = True,
reply_time_point: Optional[float] = time.time(),
think_level: int = 1,
) -> Tuple[str, List[int]]:
) -> Tuple[str, List[int], List[str], str]:
"""
构建回复器上下文
@@ -838,7 +868,8 @@ class DefaultReplyer:
continue
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
# 不再在这里输出日志,而是返回给调用者统一输出
# logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
expression_habits_block, selected_expressions = results_dict["expression_habits"]
expression_habits_block: str
@@ -915,7 +946,7 @@ class DefaultReplyer:
memory_retrieval=memory_retrieval,
chat_prompt=chat_prompt_block,
planner_reasoning=planner_reasoning,
), selected_expressions
), selected_expressions, timing_logs, almost_zero_str
async def build_prompt_rewrite_context(
self,
@@ -1046,10 +1077,11 @@ class DefaultReplyer:
# 直接使用已初始化的模型实例
# logger.info(f"\n{prompt}\n")
if global_config.debug.show_replyer_prompt:
logger.info(f"\n{prompt}\n")
else:
logger.debug(f"\nreplyer_Prompt:{prompt}\n")
# 不再在这里输出日志,而是返回给调用者统一输出
# if global_config.debug.show_replyer_prompt:
# logger.info(f"\n{prompt}\n")
# else:
# logger.debug(f"\nreplyer_Prompt:{prompt}\n")
content, (reasoning_content, model_name, tool_calls) = await self.express_model.generate_response_async(
prompt

View File

@@ -198,21 +198,21 @@ def split_into_sentences_w_remove_punctuation(text: str) -> list[str]:
List[str]: 分割和合并后的句子列表
"""
# 预处理:处理多余的换行符
# 1. 将连续的换行符替换为单个换行符
# 1. 将连续的换行符替换为单个换行符(保留换行符用于分割)
text = re.sub(r"\n\s*\n+", "\n", text)
# 2. 处理换行符和其他分隔符的组合
text = re.sub(r"\n\s*([,。;\s])", r"\1", text)
text = re.sub(r"([,。;\s])\s*\n", r"\1", text)
# 2. 处理换行符和其他分隔符的组合(保留换行符,删除其他分隔符)
text = re.sub(r"\n\s*([,。;\s])", r"\n\1", text)
text = re.sub(r"([,。;\s])\s*\n", r"\1\n", text)
# 处理两个汉字中间的换行符
text = re.sub(r"([\u4e00-\u9fff])\n([\u4e00-\u9fff])", r"\1。\2", text)
# 处理两个汉字中间的换行符(保留换行符,不替换为句号,让换行符强制分割)
# text = re.sub(r"([\u4e00-\u9fff])\n([\u4e00-\u9fff])", r"\1。\2", text) # 注释掉,保留换行符用于分割
len_text = len(text)
if len_text < 3:
return list(text) if random.random() < 0.01 else [text]
# 定义分隔符
separators = {"", ",", " ", "", ";"}
# 定义分隔符(包含换行符,换行符必须强制分割)
separators = {"", ",", " ", "", ";", "\n"}
segments = []
current_segment = ""
@@ -221,13 +221,27 @@ def split_into_sentences_w_remove_punctuation(text: str) -> list[str]:
while i < len(text):
char = text[i]
if char in separators:
# 检查分割条件:如果空格左右都是英文字母、数字,或数字和英文之间,则不分割(仅对空格应用此规则)
can_split = True
if 0 < i < len(text) - 1:
prev_char = text[i - 1]
next_char = text[i + 1]
# 只对空格应用"不分割数字和数字、数字和英文、英文和数字、英文和英文之间的空格"规则
if char == " ":
# 换行符必须强制分割,不受其他规则影响
if char == "\n":
can_split = True
else:
# 检查分割条件
can_split = True
# 检查分隔符左右是否有冒号(中英文),如果有则不分割
if i > 0:
prev_char = text[i - 1]
if prev_char in {":", ""}:
can_split = False
if i < len(text) - 1:
next_char = text[i + 1]
if next_char in {":", ""}:
can_split = False
# 如果左右没有冒号,再检查空格的特殊情况
if can_split and char == " " and i > 0 and i < len(text) - 1:
prev_char = text[i - 1]
next_char = text[i + 1]
# 不分割数字和数字、数字和英文、英文和数字、英文和英文之间的空格
prev_is_alnum = prev_char.isdigit() or is_english_letter(prev_char)
next_is_alnum = next_char.isdigit() or is_english_letter(next_char)
if prev_is_alnum and next_is_alnum:
@@ -237,8 +251,8 @@ def split_into_sentences_w_remove_punctuation(text: str) -> list[str]:
# 只有当当前段不为空时才添加
if current_segment:
segments.append((current_segment, char))
# 如果当前段为空,但分隔符是空格,则也添加一个空段(保留空格
elif char == " ":
# 如果当前段为空,但分隔符是空格或换行符,则也添加一个空段(保留分隔符
elif char in {" ", "\n"}:
segments.append(("", char))
current_segment = ""
else: