优化对上下文的压缩,新增表达方式快速版本

This commit is contained in:
SengokuCola
2026-04-10 12:23:12 +08:00
parent 65276cf763
commit 8bd1c6ee11
15 changed files with 344 additions and 111 deletions

View File

@@ -3,6 +3,7 @@ You need to focus on the dialogue between {bot_name} (AI) and different users so
[Reference Information] [Reference Information]
{identity} {identity}
{time_block}
[End of Reference Information] [End of Reference Information]
You need to analyze based on the provided reference information, the current scenario, and the output rules. You need to analyze based on the provided reference information, the current scenario, and the output rules.

View File

@@ -3,6 +3,7 @@
【参考情報】 【参考情報】
{identity} {identity}
{time_block}
【参考情報ここまで】 【参考情報ここまで】
提供された参考情報、現在の状況、そして出力ルールに基づいて分析してください。 提供された参考情報、現在の状況、そして出力ルールに基づいて分析してください。

View File

@@ -3,6 +3,7 @@
【参考信息】 【参考信息】
{bot_name}的人设:{identity} {bot_name}的人设:{identity}
{time_block}
【参考信息结束】 【参考信息结束】
请你对当前场景和输出规则来进行分析,你可以参考参考信息中的内容,但不用过分遵守,仅供参考。 请你对当前场景和输出规则来进行分析,你可以参考参考信息中的内容,但不用过分遵守,仅供参考。

View File

@@ -1,8 +1,9 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
import json
from typing import Any, Awaitable, Callable, List, Optional from typing import Any, Awaitable, Callable, List, Optional
import json
from json_repair import repair_json from json_repair import repair_json
from sqlmodel import select from sqlmodel import select
@@ -30,7 +31,7 @@ class MaisakaExpressionSelectionResult:
class MaisakaExpressionSelector: class MaisakaExpressionSelector:
"""负责在 replyer 侧完成表达方式筛选与子代理选择。""" """负责在 replyer 侧完成表达方式筛选与子代理二次选择。"""
def _can_use_expressions(self, session_id: str) -> bool: def _can_use_expressions(self, session_id: str) -> bool:
try: try:
@@ -40,6 +41,13 @@ class MaisakaExpressionSelector:
logger.error(f"检查表达方式使用开关失败: {exc}") logger.error(f"检查表达方式使用开关失败: {exc}")
return False return False
def _can_use_advanced_chosen(self, session_id: str) -> bool:
try:
return ExpressionConfigUtils.get_expression_advanced_chosen_for_chat(session_id)
except Exception as exc:
logger.error(f"检查表达方式二次选择开关失败: {exc}")
return False
@staticmethod @staticmethod
def _is_global_expression_group_marker(platform: str, item_id: str) -> bool: def _is_global_expression_group_marker(platform: str, item_id: str) -> bool:
return platform == "*" and item_id == "*" return platform == "*" and item_id == "*"
@@ -101,7 +109,7 @@ class MaisakaExpressionSelector:
"id": expression.id, "id": expression.id,
"situation": expression.situation, "situation": expression.situation,
"style": expression.style, "style": expression.style,
"count": expression.count if getattr(expression, "count", None) is not None else 1, "count": expression.count if expression.count is not None else 1,
} }
for expression in expressions for expression in expressions
if expression.id is not None and expression.situation and expression.style if expression.id is not None and expression.situation and expression.style
@@ -185,7 +193,7 @@ class MaisakaExpressionSelector:
"你只负责根据最近聊天上下文,为这一次可见回复挑选最合适的表达方式。\n" "你只负责根据最近聊天上下文,为这一次可见回复挑选最合适的表达方式。\n"
"请只从下面候选中选择 0 到 3 条最适合当前语境的表达方式。\n" "请只从下面候选中选择 0 到 3 条最适合当前语境的表达方式。\n"
"优先考虑自然、贴合上下文、不生硬、不模板化。\n" "优先考虑自然、贴合上下文、不生硬、不模板化。\n"
"如果没有明显合适的,就返回空列表\n" "如果没有明显合适的,就返回空数组\n"
'严格只输出 JSON对象格式为 {"selected_ids":[123,456]}。\n\n' '严格只输出 JSON对象格式为 {"selected_ids":[123,456]}。\n\n'
f"最近上下文:\n{history_block}\n\n" f"最近上下文:\n{history_block}\n\n"
f"目标消息:{target_text or ''}\n" f"目标消息:{target_text or ''}\n"
@@ -222,6 +230,32 @@ class MaisakaExpressionSelector:
break break
return selected_ids return selected_ids
def _build_direct_selection_result(
self,
*,
session_id: str,
candidates: List[dict[str, Any]],
) -> MaisakaExpressionSelectionResult:
selected_ids = [
candidate["id"]
for candidate in candidates
if isinstance(candidate.get("id"), int)
]
selected_expressions = [
candidate
for candidate in candidates
if candidate.get("id") in selected_ids
]
self._update_last_active_time(selected_ids)
logger.info(
f"表达方式直接注入session_id={session_id} 已选数={len(selected_ids)} "
f"selected_ids={selected_ids!r} 已选预览={self._format_candidate_preview(selected_expressions)}"
)
return MaisakaExpressionSelectionResult(
expression_habits=self._build_expression_habits_block(selected_expressions),
selected_expression_ids=selected_ids,
)
def _update_last_active_time(self, selected_ids: List[int]) -> None: def _update_last_active_time(self, selected_ids: List[int]) -> None:
if not selected_ids: if not selected_ids:
return return
@@ -247,15 +281,22 @@ class MaisakaExpressionSelector:
if not self._can_use_expressions(session_id): if not self._can_use_expressions(session_id):
logger.info(f"表达方式选择已跳过当前会话未启用表达方式session_id={session_id}") logger.info(f"表达方式选择已跳过当前会话未启用表达方式session_id={session_id}")
return MaisakaExpressionSelectionResult() return MaisakaExpressionSelectionResult()
if sub_agent_runner is None:
logger.info(f"表达方式选择已跳过:缺少 sub_agent_runnersession_id={session_id}")
return MaisakaExpressionSelectionResult()
candidates = self._load_expression_candidates(session_id) candidates = self._load_expression_candidates(session_id)
if not candidates: if not candidates:
logger.info(f"表达方式选择已跳过本地候选不足session_id={session_id}") logger.info(f"表达方式选择已跳过本地候选不足session_id={session_id}")
return MaisakaExpressionSelectionResult() return MaisakaExpressionSelectionResult()
if not self._can_use_advanced_chosen(session_id):
return self._build_direct_selection_result(
session_id=session_id,
candidates=candidates,
)
if sub_agent_runner is None:
logger.info(f"表达方式选择已跳过:缺少 sub_agent_runnersession_id={session_id}")
return MaisakaExpressionSelectionResult()
logger.info( logger.info(
f"表达方式选择开始session_id={session_id} 候选数={len(candidates)} " f"表达方式选择开始session_id={session_id} 候选数={len(candidates)} "
f"候选预览={self._format_candidate_preview(candidates)}" f"候选预览={self._format_candidate_preview(candidates)}"
@@ -273,10 +314,9 @@ class MaisakaExpressionSelector:
logger.exception("表达方式选择子代理执行失败") logger.exception("表达方式选择子代理执行失败")
return MaisakaExpressionSelectionResult() return MaisakaExpressionSelectionResult()
# logger.info(f"表达方式子代理原始结果session_id={session_id} response={raw_response!r}")
selected_ids = self._parse_selected_ids(raw_response, candidates) selected_ids = self._parse_selected_ids(raw_response, candidates)
if not selected_ids: if not selected_ids:
logger.info(f"表达方式选择完成但未命中session_id={session_id}") logger.info(f"表达方式选择完成但未命中session_id={session_id}")
return MaisakaExpressionSelectionResult() return MaisakaExpressionSelectionResult()
selected_expressions = [candidate for candidate in candidates if candidate.get("id") in selected_ids] selected_expressions = [candidate for candidate in candidates if candidate.get("id") in selected_ids]

View File

@@ -1,7 +1,7 @@
from typing import Optional from typing import Optional
from src.config.config import global_config
from src.common.logger import get_logger from src.common.logger import get_logger
from src.config.config import global_config
logger = get_logger("common_utils") logger = get_logger("common_utils")
@@ -10,23 +10,14 @@ class TempMethodsExpression:
"""用于临时存放一些方法的类""" """用于临时存放一些方法的类"""
@staticmethod @staticmethod
def get_expression_config_for_chat(chat_stream_id: Optional[str] = None) -> tuple[bool, bool, bool]: def _find_expression_config_item(chat_stream_id: Optional[str] = None):
"""
根据聊天流ID获取表达配置
Args:
chat_stream_id: 聊天流ID格式为哈希值
Returns:
tuple: (是否使用表达, 是否学习表达, 是否启用jargon学习)
"""
if not global_config.expression.learning_list: if not global_config.expression.learning_list:
return True, True, True return None
if chat_stream_id: if chat_stream_id:
for config_item in global_config.expression.learning_list: for config_item in global_config.expression.learning_list:
if not config_item.platform and not config_item.item_id: if not config_item.platform and not config_item.item_id:
continue # 这是全局的 continue
stream_id = TempMethodsExpression._get_stream_id( stream_id = TempMethodsExpression._get_stream_id(
config_item.platform, config_item.platform,
str(config_item.item_id), str(config_item.item_id),
@@ -34,14 +25,44 @@ class TempMethodsExpression:
) )
if stream_id is None: if stream_id is None:
continue continue
if stream_id == chat_stream_id: if stream_id != chat_stream_id:
continue continue
return config_item.use_expression, config_item.enable_learning, config_item.enable_jargon_learning return config_item
for config_item in global_config.expression.learning_list: for config_item in global_config.expression.learning_list:
if not config_item.platform and not config_item.item_id: if not config_item.platform and not config_item.item_id:
return config_item.use_expression, config_item.enable_learning, config_item.enable_jargon_learning return config_item
return True, True, True return None
@staticmethod
def get_expression_advanced_chosen_for_chat(chat_stream_id: Optional[str] = None) -> bool:
"""根据聊天流 ID 获取表达方式是否启用二次选择。"""
config_item = TempMethodsExpression._find_expression_config_item(chat_stream_id)
if config_item is None:
return False
return config_item.advanced_chosen
@staticmethod
def get_expression_config_for_chat(chat_stream_id: Optional[str] = None) -> tuple[bool, bool, bool]:
"""
根据聊天流 ID 获取表达配置。
Args:
chat_stream_id: 聊天流 ID格式为哈希值
Returns:
tuple: (是否使用表达, 是否学习表达, 是否启用 jargon 学习)
"""
config_item = TempMethodsExpression._find_expression_config_item(chat_stream_id)
if config_item is None:
return True, True, True
return (
config_item.use_expression,
config_item.enable_learning,
config_item.enable_jargon_learning,
)
@staticmethod @staticmethod
def _get_stream_id( def _get_stream_id(
@@ -50,15 +71,15 @@ class TempMethodsExpression:
is_group: bool = False, is_group: bool = False,
) -> Optional[str]: ) -> Optional[str]:
""" """
根据平台、ID字符串和是否为群聊生成聊天流ID 根据平台、ID 字符串和是否为群聊生成聊天流 ID
Args: Args:
platform: 平台名称 platform: 平台名称
id_str: 用户或群组的原始ID字符串 id_str: 用户或群组的原始 ID 字符串
is_group: 是否为群聊 is_group: 是否为群聊
Returns: Returns:
str: 生成的聊天流ID哈希值 str: 生成的聊天流 ID哈希值
""" """
try: try:
from src.common.utils.utils_session import SessionUtils from src.common.utils.utils_session import SessionUtils
@@ -68,5 +89,5 @@ class TempMethodsExpression:
else: else:
return SessionUtils.calculate_session_id(platform, user_id=str(id_str)) return SessionUtils.calculate_session_id(platform, user_id=str(id_str))
except Exception as e: except Exception as e:
logger.error(f"生成聊天流ID失败: {e}") logger.error(f"生成聊天流 ID 失败: {e}")
return None return None

View File

@@ -10,24 +10,14 @@ logger = get_logger("config_utils")
class ExpressionConfigUtils: class ExpressionConfigUtils:
@staticmethod @staticmethod
def get_expression_config_for_chat(session_id: Optional[str] = None) -> tuple[bool, bool, bool]: def _find_expression_config_item(session_id: Optional[str] = None):
# sourcery skip: use-next
"""
根据聊天会话ID获取表达配置
Args:
session_id: 聊天会话ID格式为哈希值
Returns:
tuple: (是否使用表达, 是否学习表达, 是否启用jargon学习)
"""
if not global_config.expression.learning_list: if not global_config.expression.learning_list:
return True, True, True return None
if session_id: if session_id:
for config_item in global_config.expression.learning_list: for config_item in global_config.expression.learning_list:
if not config_item.platform and not config_item.item_id: if not config_item.platform and not config_item.item_id:
continue # 这是全局的 continue
stream_id = ExpressionConfigUtils._get_stream_id( stream_id = ExpressionConfigUtils._get_stream_id(
config_item.platform, config_item.platform,
str(config_item.item_id), str(config_item.item_id),
@@ -35,28 +25,59 @@ class ExpressionConfigUtils:
) )
if stream_id is None: if stream_id is None:
continue continue
if stream_id == session_id: if stream_id != session_id:
continue continue
return config_item.use_expression, config_item.enable_learning, config_item.enable_jargon_learning return config_item
for config_item in global_config.expression.learning_list: for config_item in global_config.expression.learning_list:
if not config_item.platform and not config_item.item_id: if not config_item.platform and not config_item.item_id:
return config_item.use_expression, config_item.enable_learning, config_item.enable_jargon_learning return config_item
return True, True, True return None
@staticmethod
def get_expression_advanced_chosen_for_chat(session_id: Optional[str] = None) -> bool:
"""根据聊天会话 ID 获取表达方式是否启用二次选择。"""
config_item = ExpressionConfigUtils._find_expression_config_item(session_id)
if config_item is None:
return False
return config_item.advanced_chosen
@staticmethod
def get_expression_config_for_chat(session_id: Optional[str] = None) -> tuple[bool, bool, bool]:
# sourcery skip: use-next
"""
根据聊天会话 ID 获取表达配置。
Args:
session_id: 聊天会话 ID格式为哈希值
Returns:
tuple: (是否使用表达, 是否学习表达, 是否启用 jargon 学习)
"""
config_item = ExpressionConfigUtils._find_expression_config_item(session_id)
if config_item is None:
return True, True, True
return (
config_item.use_expression,
config_item.enable_learning,
config_item.enable_jargon_learning,
)
@staticmethod @staticmethod
def _get_stream_id(platform: str, id_str: str, is_group: bool = False) -> Optional[str]: def _get_stream_id(platform: str, id_str: str, is_group: bool = False) -> Optional[str]:
# sourcery skip: remove-unnecessary-cast # sourcery skip: remove-unnecessary-cast
""" """
根据平台、ID字符串和是否为群聊生成聊天流ID 根据平台、ID 字符串和是否为群聊生成聊天流 ID
Args: Args:
platform: 平台名称 platform: 平台名称
id_str: 用户或群组的原始ID字符串 id_str: 用户或群组的原始 ID 字符串
is_group: 是否为群聊 is_group: 是否为群聊
Returns: Returns:
str: 生成的聊天流ID哈希值 str: 生成的聊天流 ID哈希值
""" """
try: try:
from src.common.utils.utils_session import SessionUtils from src.common.utils.utils_session import SessionUtils
@@ -66,7 +87,7 @@ class ExpressionConfigUtils:
else: else:
return SessionUtils.calculate_session_id(platform, user_id=str(id_str)) return SessionUtils.calculate_session_id(platform, user_id=str(id_str))
except Exception as e: except Exception as e:
logger.error(f"生成聊天流ID失败: {e}") logger.error(f"生成聊天流 ID 失败: {e}")
return None return None
@@ -91,7 +112,7 @@ class ChatConfigUtils:
else: else:
rule_session_id = SessionUtils.calculate_session_id(rule.platform, user_id=str(rule.item_id)) rule_session_id = SessionUtils.calculate_session_id(rule.platform, user_id=str(rule.item_id))
if rule_session_id != session_id: if rule_session_id != session_id:
continue # 不匹配的会话ID跳过 continue # 不匹配的会话 ID跳过
parsed_range = ChatConfigUtils.parse_range(rule.time) parsed_range = ChatConfigUtils.parse_range(rule.time)
if not parsed_range: if not parsed_range:
continue # 无法解析的时间范围,跳过 continue # 无法解析的时间范围,跳过
@@ -102,7 +123,7 @@ class ChatConfigUtils:
else: # 跨天的时间范围 else: # 跨天的时间范围
in_range = now_min >= start_min or now_min <= end_min in_range = now_min >= start_min or now_min <= end_min
if in_range: if in_range:
return rule.value or 0.0 # 如果规则生效但没有设置值返回0.0 return rule.value or 0.0 # 如果规则生效但没有设置值,返回 0.0
# 没有匹配到会话相关的规则,继续匹配全局规则 # 没有匹配到会话相关的规则,继续匹配全局规则
for rule in global_config.chat.talk_value_rules: for rule in global_config.chat.talk_value_rules:
@@ -118,7 +139,7 @@ class ChatConfigUtils:
else: # 跨天的时间范围 else: # 跨天的时间范围
in_range = now_min >= start_min or now_min <= end_min in_range = now_min >= start_min or now_min <= end_min
if in_range: if in_range:
return rule.value or 0.0 # 如果规则生效但没有设置值返回0.0 return rule.value or 0.0 # 如果规则生效但没有设置值,返回 0.0
return result # 如果没有任何规则生效,返回默认值 return result # 如果没有任何规则生效,返回默认值
@staticmethod @staticmethod

View File

@@ -54,7 +54,7 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config"
BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute() BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute()
MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute() MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute()
MMC_VERSION: str = "1.0.0" MMC_VERSION: str = "1.0.0"
CONFIG_VERSION: str = "8.5.4" CONFIG_VERSION: str = "8.5.5"
MODEL_CONFIG_VERSION: str = "1.13.1" MODEL_CONFIG_VERSION: str = "1.13.1"
logger = get_logger("config") logger = get_logger("config")

View File

@@ -560,6 +560,15 @@ class LearningItem(ConfigBase):
) )
"""是否启用jargon学习""" """是否启用jargon学习"""
advanced_chosen: bool = Field(
default=False,
json_schema_extra={
"x-widget": "switch",
"x-icon": "sparkles",
},
)
"""是否启用基于子代理的二次表达方式选择"""
class ExpressionGroup(ConfigBase): class ExpressionGroup(ConfigBase):
"""表达互通组配置类,若列表为空代表全局共享""" """表达互通组配置类,若列表为空代表全局共享"""
@@ -589,6 +598,7 @@ class ExpressionConfig(ConfigBase):
use_expression=True, use_expression=True,
enable_learning=True, enable_learning=True,
enable_jargon_learning=True, enable_jargon_learning=True,
advanced_chosen=False,
) )
], ],
json_schema_extra={ json_schema_extra={

View File

@@ -386,7 +386,24 @@ class ToolRegistry:
for provider in self._providers: for provider in self._providers:
provider_specs = await provider.list_tools() provider_specs = await provider.list_tools()
if any(spec.name == invocation.tool_name and spec.enabled for spec in provider_specs): if any(spec.name == invocation.tool_name and spec.enabled for spec in provider_specs):
return await provider.invoke(invocation, context) try:
return await provider.invoke(invocation, context)
except Exception as exc:
logger.exception(
"工具调用异常: tool=%s provider=%s",
invocation.tool_name,
getattr(provider, "provider_name", ""),
)
error_message = str(exc).strip()
if error_message:
error_message = f"工具 {invocation.tool_name} 调用失败:{exc.__class__.__name__}: {error_message}"
else:
error_message = f"工具 {invocation.tool_name} 调用失败:{exc.__class__.__name__}"
return ToolExecutionResult(
tool_name=invocation.tool_name,
success=False,
error_message=error_message,
)
return ToolExecutionResult( return ToolExecutionResult(
tool_name=invocation.tool_name, tool_name=invocation.tool_name,

View File

@@ -267,7 +267,7 @@ class ExpressionLearner:
return normalized_entries return normalized_entries
def get_pending_count(self, message_cache: List["SessionMessage"]) -> int: def get_pending_count(self, message_cache: List["SessionMessage"]) -> int:
"""??????????????""" """获取待处理消息数量"""
return max(0, len(message_cache) - self._last_processed_index) return max(0, len(message_cache) - self._last_processed_index)
async def learn( async def learn(
@@ -275,10 +275,10 @@ class ExpressionLearner:
message_cache: List["SessionMessage"], message_cache: List["SessionMessage"],
jargon_miner: Optional["JargonMiner"] = None, jargon_miner: Optional["JargonMiner"] = None,
) -> bool: ) -> bool:
"""?????????????????????""" """学习表达方式"""
pending_messages = message_cache[self._last_processed_index :] pending_messages = message_cache[self._last_processed_index :]
if not pending_messages: if not pending_messages:
logger.debug("??????????????????") logger.debug("没有待处理消息")
return False return False
if len(pending_messages) < self.min_messages_for_extraction: if len(pending_messages) < self.min_messages_for_extraction:
return False return False
@@ -304,7 +304,7 @@ class ExpressionLearner:
) )
response = generation_result.response response = generation_result.response
except Exception as e: except Exception as e:
logger.error(f"????????????????{e}") logger.error(f"学习表达方式失败: {e}")
return False return False
expressions: List[Tuple[str, str, str]] expressions: List[Tuple[str, str, str]]
@@ -319,14 +319,14 @@ class ExpressionLearner:
continue continue
jargon_entries.append((content, source_id)) jargon_entries.append((content, source_id))
existing_contents.add(content) existing_contents.add(content)
logger.info(f"??????????{content}") logger.info(f"从缓存中找到黑话: {content}")
if len(expressions) > 20: if len(expressions) > 20:
logger.info(f"?????????? 20 ???????????{len(expressions)}") logger.info(f"表达方式数量超过20: {len(expressions)}")
expressions = [] expressions = []
if len(jargon_entries) > 30: if len(jargon_entries) > 30:
logger.info(f"???????? 30 ???????????{len(jargon_entries)}") logger.info(f"黑话数量超过30: {len(jargon_entries)}")
jargon_entries = [] jargon_entries = []
after_extract_result = await self._get_runtime_manager().invoke_hook( after_extract_result = await self._get_runtime_manager().invoke_hook(
@@ -337,7 +337,7 @@ class ExpressionLearner:
jargon_entries=self._serialize_jargon_entries(jargon_entries), jargon_entries=self._serialize_jargon_entries(jargon_entries),
) )
if after_extract_result.aborted: if after_extract_result.aborted:
logger.info(f"{self.session_id} ?????????? Hook ??") logger.info(f"{self.session_id} 表达方式选择 Hook 中止")
self._last_processed_index = len(message_cache) self._last_processed_index = len(message_cache)
return False return False
@@ -353,21 +353,21 @@ class ExpressionLearner:
await self._process_jargon_entries(jargon_entries, pending_messages, jargon_miner) await self._process_jargon_entries(jargon_entries, pending_messages, jargon_miner)
if not expressions: if not expressions:
logger.info("????????????") logger.info("没有可学习的表达方式")
self._last_processed_index = len(message_cache) self._last_processed_index = len(message_cache)
return False return False
logger.info(f"???? expressions: {expressions}") logger.info(f"可学习的表达方式: {expressions}")
logger.info(f"???? jargon_entries: {jargon_entries}") logger.info(f"可学习的黑话: {jargon_entries}")
learnt_expressions = self._filter_expressions(expressions, pending_messages) learnt_expressions = self._filter_expressions(expressions, pending_messages)
if not learnt_expressions: if not learnt_expressions:
logger.info("????????????") logger.info("没有可学习的表达方式通过过滤")
self._last_processed_index = len(message_cache) self._last_processed_index = len(message_cache)
return False return False
learnt_expressions_str = "\n".join(f"{situation}->{style}" for situation, style in learnt_expressions) learnt_expressions_str = "\n".join(f"{situation}->{style}" for situation, style in learnt_expressions)
logger.info(f"? {self.session_id} ????????\n{learnt_expressions_str}") logger.info(f"{self.session_id} 可学习的表达方式: \n{learnt_expressions_str}")
for situation, style in learnt_expressions: for situation, style in learnt_expressions:
before_upsert_result = await self._get_runtime_manager().invoke_hook( before_upsert_result = await self._get_runtime_manager().invoke_hook(
@@ -377,14 +377,14 @@ class ExpressionLearner:
style=style, style=style,
) )
if before_upsert_result.aborted: if before_upsert_result.aborted:
logger.info(f"{self.session_id} ???????? Hook ??: situation={situation!r}") logger.info(f"{self.session_id} 表达方式写入 Hook 中止: situation={situation!r}")
continue continue
upsert_kwargs = before_upsert_result.kwargs upsert_kwargs = before_upsert_result.kwargs
situation = str(upsert_kwargs.get("situation", situation) or "").strip() situation = str(upsert_kwargs.get("situation", situation) or "").strip()
style = str(upsert_kwargs.get("style", style) or "").strip() style = str(upsert_kwargs.get("style", style) or "").strip()
if not situation or not style: if not situation or not style:
logger.info(f"{self.session_id} ???????? Hook ??????") logger.info(f"{self.session_id} 表达方式写入 Hook 中止: situation={situation!r}")
continue continue
await self._upsert_expression_to_db(situation, style) await self._upsert_expression_to_db(situation, style)

View File

@@ -292,8 +292,15 @@ class MaisakaChatLoopService:
"file_tools_section": tools_section, "file_tools_section": tools_section,
"group_chat_attention_block": self._build_group_chat_attention_block(), "group_chat_attention_block": self._build_group_chat_attention_block(),
"identity": self._personality_prompt, "identity": self._personality_prompt,
"time_block": self._build_time_block(),
} }
@staticmethod
def _build_time_block() -> str:
"""构建当前时间提示块。"""
return f"当前时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
def _build_group_chat_attention_block(self) -> str: def _build_group_chat_attention_block(self) -> str:
"""构建当前聊天场景下的额外注意事项块。""" """构建当前聊天场景下的额外注意事项块。"""

View File

@@ -0,0 +1,125 @@
"""Maisaka 历史消息轮次结束后处理。"""
from dataclasses import dataclass
from .context_messages import AssistantMessage, LLMContextMessage, ToolResultMessage
from .history_utils import drop_leading_orphan_tool_results, drop_orphan_tool_results
TIMING_HISTORY_TOOL_NAMES = {"continue", "finish", "no_reply", "wait"}
EARLY_TRIM_RATIO = 0.2
@dataclass(slots=True)
class HistoryPostProcessResult:
"""历史后处理结果。"""
history: list[LLMContextMessage]
removed_count: int
remaining_context_count: int
def process_chat_history_after_cycle(
chat_history: list[LLMContextMessage],
*,
max_context_size: int,
) -> HistoryPostProcessResult:
"""在每轮结束后统一执行历史裁切与清理。"""
processed_history = list(chat_history)
removed_timing_tool_count = _remove_early_timing_tool_records(processed_history)
removed_assistant_thought_count = _remove_early_assistant_thoughts(processed_history)
processed_history, orphan_removed_count = drop_orphan_tool_results(processed_history)
remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
removed_overflow_count = 0
while remaining_context_count > max_context_size and processed_history:
removed_message = processed_history.pop(0)
removed_overflow_count += 1
if removed_message.count_in_context:
remaining_context_count -= 1
processed_history, leading_orphan_removed_count = drop_leading_orphan_tool_results(processed_history)
removed_overflow_count += leading_orphan_removed_count
remaining_context_count = sum(1 for message in processed_history if message.count_in_context)
removed_count = (
removed_timing_tool_count
+ removed_assistant_thought_count
+ orphan_removed_count
+ removed_overflow_count
)
return HistoryPostProcessResult(
history=processed_history,
removed_count=removed_count,
remaining_context_count=remaining_context_count,
)
def _remove_early_timing_tool_records(chat_history: list[LLMContextMessage]) -> int:
"""移除最早 20% 的门控/结束类工具链记录。"""
candidate_assistant_indexes = [
index
for index, message in enumerate(chat_history)
if _is_timing_tool_assistant_message(message)
]
remove_count = int(len(candidate_assistant_indexes) * EARLY_TRIM_RATIO)
if remove_count <= 0:
return 0
removed_indexes = set(candidate_assistant_indexes[:remove_count])
removed_tool_call_ids = {
tool_call.call_id
for index in removed_indexes
for tool_call in chat_history[index].tool_calls
if tool_call.call_id
}
filtered_history: list[LLMContextMessage] = []
removed_total = 0
for index, message in enumerate(chat_history):
if index in removed_indexes:
removed_total += 1
continue
if isinstance(message, ToolResultMessage) and message.tool_call_id in removed_tool_call_ids:
removed_total += 1
continue
filtered_history.append(message)
chat_history[:] = filtered_history
return removed_total
def _remove_early_assistant_thoughts(chat_history: list[LLMContextMessage]) -> int:
"""移除最早 20% 的非工具 assistant 思考内容。"""
candidate_indexes = [
index
for index, message in enumerate(chat_history)
if isinstance(message, AssistantMessage)
and not message.tool_calls
and message.source_kind != "perception"
and bool(message.content.strip())
]
remove_count = int(len(candidate_indexes) * EARLY_TRIM_RATIO)
if remove_count <= 0:
return 0
removed_indexes = set(candidate_indexes[:remove_count])
filtered_history: list[LLMContextMessage] = []
removed_total = 0
for index, message in enumerate(chat_history):
if index in removed_indexes:
removed_total += 1
continue
filtered_history.append(message)
chat_history[:] = filtered_history
return removed_total
def _is_timing_tool_assistant_message(message: LLMContextMessage) -> bool:
if not isinstance(message, AssistantMessage) or not message.tool_calls:
return False
return all(tool_call.func_name in TIMING_HISTORY_TOOL_NAMES for tool_call in message.tool_calls)

View File

@@ -34,7 +34,8 @@ from .context_messages import (
ToolResultMessage, ToolResultMessage,
contains_complex_message, contains_complex_message,
) )
from .history_utils import build_prefixed_message_sequence, build_session_message_visible_text, drop_leading_orphan_tool_results from .history_post_processor import process_chat_history_after_cycle
from .history_utils import build_prefixed_message_sequence, build_session_message_visible_text
from .monitor_events import ( from .monitor_events import (
emit_cycle_start, emit_cycle_start,
emit_message_ingested, emit_message_ingested,
@@ -375,8 +376,6 @@ class MaisakaReasoningEngine:
self._runtime._chat_history.append( self._runtime._chat_history.append(
self._build_wait_completed_message(has_new_messages=False) self._build_wait_completed_message(has_new_messages=False)
) )
self._trim_chat_history()
try: try:
timing_gate_required = True timing_gate_required = True
for round_index in range(self._runtime._max_internal_rounds): for round_index in range(self._runtime._max_internal_rounds):
@@ -472,8 +471,8 @@ class MaisakaReasoningEngine:
) )
reasoning_content = response.content or "" reasoning_content = response.content or ""
if self._should_replace_reasoning(reasoning_content): if self._should_replace_reasoning(reasoning_content):
response.content = "我应该根据我上面思考的内容进行反思,重新思考我下一步的行动,我需要分析当前场景,对话,以及我可以使用的工具,然后先输出想法再使用工具" response.content = "我应该根据我上面思考的内容进行反思,重新思考我下一步的行动,我需要分析当前场景,对话,以及我可以使用的工具,然后直接输出我的想法"
response.raw_message.content = "我应该根据我上面思考的内容进行反思,重新思考我下一步的行动,我需要分析当前场景,对话,以及我可以使用的工具,然后先输出想法再使用工具" response.raw_message.content = "我应该根据我上面思考的内容进行反思,重新思考我下一步的行动,我需要分析当前场景,对话,以及我可以使用的工具,然后直接输出我的想法"
logger.info(f"{self._runtime.log_prefix} 当前思考与上一轮过于相似,已替换为重新思考提示") logger.info(f"{self._runtime.log_prefix} 当前思考与上一轮过于相似,已替换为重新思考提示")
self._last_reasoning_content = reasoning_content self._last_reasoning_content = reasoning_content
@@ -502,10 +501,7 @@ class MaisakaReasoningEngine:
) )
interrupted_at = time.time() interrupted_at = time.time()
interrupted_stage_label = "Planner" interrupted_stage_label = "Planner"
interrupted_text = ( interrupted_text = "Planner 收到新消息,开始重新决策"
"Planner 在流式响应阶段被新消息打断。"
"本轮未完成,因此这里展示的是中断说明而不是完整返回。"
)
interrupted_response = ChatResponse( interrupted_response = ChatResponse(
content=interrupted_text or None, content=interrupted_text or None,
tool_calls=[], tool_calls=[],
@@ -528,9 +524,7 @@ class MaisakaReasoningEngine:
"状态:已被新消息打断", "状态:已被新消息打断",
f"打断位置:{interrupted_stage_label} 请求流式响应阶段", f"打断位置:{interrupted_stage_label} 请求流式响应阶段",
f"打断耗时:{interrupted_at - current_stage_started_at:.3f}", f"打断耗时:{interrupted_at - current_stage_started_at:.3f}",
f"打断原因:{str(exc) or '收到外部中断信号'}",
] ]
interrupted_extra_lines.append("展示内容:以下为 Maisaka 侧记录的中断说明")
response = interrupted_response response = interrupted_response
planner_extra_lines = interrupted_extra_lines planner_extra_lines = interrupted_extra_lines
logger.info( logger.info(
@@ -695,7 +689,6 @@ class MaisakaReasoningEngine:
continue continue
self._insert_chat_history_message(history_message) self._insert_chat_history_message(history_message)
self._trim_chat_history()
# 向监控前端广播新消息注入事件 # 向监控前端广播新消息注入事件
user_info = message.message_info.user_info user_info = message.message_info.user_info
@@ -798,6 +791,7 @@ class MaisakaReasoningEngine:
"""结束并记录一轮 Maisaka 思考循环。""" """结束并记录一轮 Maisaka 思考循环。"""
cycle_detail.end_time = time.time() cycle_detail.end_time = time.time()
self._runtime.history_loop.append(cycle_detail) self._runtime.history_loop.append(cycle_detail)
self._post_process_chat_history_after_cycle()
timer_strings = [ timer_strings = [
f"{name}: {duration:.2f}s" f"{name}: {duration:.2f}s"
@@ -807,26 +801,20 @@ class MaisakaReasoningEngine:
self._runtime._log_cycle_completed(cycle_detail, timer_strings) self._runtime._log_cycle_completed(cycle_detail, timer_strings)
return cycle_detail return cycle_detail
def _trim_chat_history(self) -> None: def _post_process_chat_history_after_cycle(self) -> None:
"""裁剪聊天历史,保证用户消息数量不超过配置限制。""" """裁剪聊天历史,保证用户消息数量不超过配置限制。"""
conversation_message_count = sum(1 for message in self._runtime._chat_history if message.count_in_context) process_result = process_chat_history_after_cycle(
if conversation_message_count <= self._runtime._max_context_size: self._runtime._chat_history,
max_context_size=self._runtime._max_context_size,
)
if process_result.removed_count <= 0:
return return
trimmed_history = list(self._runtime._chat_history) self._runtime._chat_history = process_result.history
removed_count = 0 self._runtime._log_history_trimmed(
process_result.removed_count,
while conversation_message_count > self._runtime._max_context_size and trimmed_history: process_result.remaining_context_count,
removed_message = trimmed_history.pop(0) )
removed_count += 1
if removed_message.count_in_context:
conversation_message_count -= 1
trimmed_history, pruned_orphan_count = drop_leading_orphan_tool_results(trimmed_history)
removed_count += pruned_orphan_count
self._runtime._chat_history = trimmed_history
self._runtime._log_history_trimmed(removed_count, conversation_message_count)
@staticmethod @staticmethod
def _calculate_similarity(text1: str, text2: str) -> float: def _calculate_similarity(text1: str, text2: str) -> float:

View File

@@ -437,6 +437,7 @@ class MaisakaHeartFlowChatting:
selected_history, _ = MaisakaChatLoopService.select_llm_context_messages( selected_history, _ = MaisakaChatLoopService.select_llm_context_messages(
self._chat_history, self._chat_history,
request_kind=request_kind,
max_context_size=context_message_limit, max_context_size=context_message_limit,
) )
sub_agent_history = list(selected_history) sub_agent_history = list(selected_history)
@@ -748,7 +749,7 @@ class MaisakaHeartFlowChatting:
return True return True
async def _trigger_expression_learning(self, messages: list[SessionMessage]) -> None: async def _trigger_expression_learning(self, messages: list[SessionMessage]) -> None:
"""?????????????????""" """触发表达方式学习"""
pending_count = self._expression_learner.get_pending_count(self.message_cache) pending_count = self._expression_learner.get_pending_count(self.message_cache)
if not self._should_trigger_learning( if not self._should_trigger_learning(
enabled=self._enable_expression_learning, enabled=self._enable_expression_learning,
@@ -761,21 +762,21 @@ class MaisakaHeartFlowChatting:
self._last_expression_extraction_time = time.time() self._last_expression_extraction_time = time.time()
logger.info( logger.info(
f"{self.log_prefix} ??????: " f"{self.log_prefix} 触发表达方式学习: "
f"??????={len(messages)} ??????={pending_count} " f"消息数量={len(messages)} 待处理消息数量={pending_count} "
f"?????={len(self.message_cache)} " f"缓存总量={len(self.message_cache)} "
f"??????={self._enable_jargon_learning}" f"是否启用黑话学习={self._enable_jargon_learning}"
) )
try: try:
jargon_miner = self._jargon_miner if self._enable_jargon_learning else None jargon_miner = self._jargon_miner if self._enable_jargon_learning else None
learnt_style = await self._expression_learner.learn(self.message_cache, jargon_miner) learnt_style = await self._expression_learner.learn(self.message_cache, jargon_miner)
if learnt_style: if learnt_style:
logger.info(f"{self.log_prefix} ???????") logger.info(f"{self.log_prefix} 表达方式学习成功")
else: else:
logger.debug(f"{self.log_prefix} ???????????????") logger.debug(f"{self.log_prefix} 表达方式学习失败")
except Exception: except Exception:
logger.exception(f"{self.log_prefix} ??????") logger.exception(f"{self.log_prefix} 表达方式学习异常")
async def _init_mcp(self) -> None: async def _init_mcp(self) -> None:
"""初始化 MCP 工具并注册到统一工具层。""" """初始化 MCP 工具并注册到统一工具层。"""
@@ -787,12 +788,12 @@ class MaisakaHeartFlowChatting:
host_callbacks=self._mcp_host_bridge.build_callbacks(), host_callbacks=self._mcp_host_bridge.build_callbacks(),
) )
if self._mcp_manager is None: if self._mcp_manager is None:
logger.info(f"{self.log_prefix} MCP 管理器不可用") logger.info(f"{self.log_prefix} Maisaka MCP 管理器不可用")
return return
mcp_tool_specs = self._mcp_manager.get_tool_specs() mcp_tool_specs = self._mcp_manager.get_tool_specs()
if not mcp_tool_specs: if not mcp_tool_specs:
logger.info(f"{self.log_prefix} 没有可供 Maisaka 使用的 MCP 工具") logger.info(f"{self.log_prefix} Maisaka 没有可供使用的 MCP 工具")
return return
self._tool_registry.register_provider(MCPToolProvider(self._mcp_manager)) self._tool_registry.register_provider(MCPToolProvider(self._mcp_manager))

View File

@@ -326,7 +326,7 @@ async def register_emoji(emoji_id: int, maibot_session: Optional[str] = Cookie(N
if not emoji: if not emoji:
raise HTTPException(status_code=404, detail=f"未找到 ID 为 {emoji_id} 的表情包") raise HTTPException(status_code=404, detail=f"未找到 ID 为 {emoji_id} 的表情包")
if emoji.is_registered: if emoji.is_registered:
return EmojiUpdateResponse(success=True, message="??????????", data=emoji_to_response(emoji)) return EmojiUpdateResponse(success=True, message="表情包已注册", data=emoji_to_response(emoji))
emoji.is_registered = True emoji.is_registered = True
emoji.is_banned = False emoji.is_banned = False