feat:修复孤儿工具报错,为replyer等tool添加统一的控制台展示接口

This commit is contained in:
SengokuCola
2026-04-07 16:21:42 +08:00
parent c5f514946b
commit 6968879a04
11 changed files with 1803 additions and 439 deletions

View File

@@ -0,0 +1,192 @@
from types import SimpleNamespace
from typing import Any, Callable
import pytest
from src.chat.replyer import maisaka_generator as legacy_replyer_module
from src.chat.replyer import maisaka_generator_multi as multimodal_replyer_module
from src.common.data_models.reply_generation_data_models import (
GenerationMetrics,
LLMCompletionResult,
ReplyGenerationResult,
)
from src.core.tooling import ToolExecutionResult, ToolInvocation
from src.maisaka.builtin_tool.context import BuiltinToolRuntimeContext
from src.maisaka.builtin_tool import reply as reply_tool_module
from src.maisaka.monitor_events import emit_planner_finalized
from src.maisaka.reasoning_engine import MaisakaReasoningEngine
class _FakeLLMResult:
def __init__(self) -> None:
self.response = "测试回复"
self.reasoning = "先理解上下文,再给出自然回复。"
self.model_name = "fake-model"
self.tool_calls = []
self.prompt_tokens = 12
self.completion_tokens = 7
self.total_tokens = 19
class _FakeLegacyLLMServiceClient:
def __init__(self, *args: Any, **kwargs: Any) -> None:
del args
del kwargs
async def generate_response(self, prompt: str) -> _FakeLLMResult:
assert prompt
return _FakeLLMResult()
class _FakeMultimodalLLMServiceClient:
def __init__(self, *args: Any, **kwargs: Any) -> None:
del args
del kwargs
async def generate_response_with_messages(self, *, message_factory: Callable[[object], list[Any]]) -> _FakeLLMResult:
assert message_factory(object())
return _FakeLLMResult()
@pytest.mark.asyncio
async def test_legacy_and_multimodal_replyer_monitor_detail_have_same_shape(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(legacy_replyer_module, "LLMServiceClient", _FakeLegacyLLMServiceClient)
monkeypatch.setattr(multimodal_replyer_module, "LLMServiceClient", _FakeMultimodalLLMServiceClient)
monkeypatch.setattr(legacy_replyer_module, "load_prompt", lambda *args, **kwargs: "legacy prompt")
monkeypatch.setattr(multimodal_replyer_module, "load_prompt", lambda *args, **kwargs: "multi prompt")
legacy_generator = legacy_replyer_module.MaisakaReplyGenerator(chat_stream=None, request_type="test_legacy")
multimodal_generator = multimodal_replyer_module.MaisakaReplyGenerator(chat_stream=None, request_type="test_multi")
legacy_success, legacy_result = await legacy_generator.generate_reply_with_context(
stream_id="session-legacy",
chat_history=[],
reply_reason="测试原因",
)
multimodal_success, multimodal_result = await multimodal_generator.generate_reply_with_context(
stream_id="session-multi",
chat_history=[],
reply_reason="测试原因",
)
assert legacy_success is True
assert multimodal_success is True
assert legacy_result.monitor_detail is not None
assert multimodal_result.monitor_detail is not None
assert set(legacy_result.monitor_detail.keys()) == set(multimodal_result.monitor_detail.keys())
assert set(legacy_result.monitor_detail["metrics"].keys()) == set(multimodal_result.monitor_detail["metrics"].keys())
assert legacy_result.monitor_detail["metrics"]["prompt_tokens"] == 12
assert legacy_result.monitor_detail["metrics"]["completion_tokens"] == 7
assert legacy_result.monitor_detail["metrics"]["total_tokens"] == 19
@pytest.mark.asyncio
async def test_reply_tool_puts_monitor_detail_into_metadata(monkeypatch: pytest.MonkeyPatch) -> None:
fake_monitor_detail = {
"prompt_text": "reply prompt",
"reasoning_text": "reply reasoning",
"output_text": "reply output",
"metrics": {"model_name": "fake-model", "total_tokens": 10},
}
fake_reply_result = ReplyGenerationResult(
success=True,
completion=LLMCompletionResult(response_text="测试回复"),
metrics=GenerationMetrics(overall_ms=11.5),
monitor_detail=fake_monitor_detail,
)
class _FakeReplyer:
async def generate_reply_with_context(self, **kwargs: Any) -> tuple[bool, ReplyGenerationResult]:
del kwargs
return True, fake_reply_result
monkeypatch.setattr(reply_tool_module.replyer_manager, "get_replyer", lambda **kwargs: _FakeReplyer())
monkeypatch.setattr(reply_tool_module, "render_cli_message", lambda text: text)
target_message = SimpleNamespace(
message_id="msg-1",
message_info=SimpleNamespace(
user_info=SimpleNamespace(
user_cardname="测试用户",
user_nickname="测试用户",
user_id="user-1",
)
),
)
runtime = SimpleNamespace(
_source_messages_by_id={"msg-1": target_message},
log_prefix="[test]",
chat_stream=SimpleNamespace(platform=reply_tool_module.CLI_PLATFORM_NAME),
session_id="session-1",
_chat_history=[],
_clear_force_continue_until_reply=lambda: None,
run_sub_agent=None,
)
engine = SimpleNamespace(_get_runtime_manager=lambda: None)
tool_ctx = BuiltinToolRuntimeContext(engine=engine, runtime=runtime)
invocation = ToolInvocation(tool_name="reply", arguments={"msg_id": "msg-1", "set_quote": True})
result = await reply_tool_module.handle_tool(tool_ctx, invocation)
assert result.success is True
assert result.metadata["monitor_detail"] == fake_monitor_detail
@pytest.mark.asyncio
async def test_emit_planner_finalized_broadcasts_new_protocol(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict[str, Any] = {}
async def _fake_broadcast(event: str, data: dict[str, Any]) -> None:
captured["event"] = event
captured["data"] = data
monkeypatch.setattr("src.maisaka.monitor_events._broadcast", _fake_broadcast)
await emit_planner_finalized(
session_id="session-1",
cycle_id=3,
request_messages=[{"role": "user", "content": "你好"}],
selected_history_count=5,
tool_count=2,
planner_content="先查询再回复",
planner_tool_calls=[SimpleNamespace(call_id="call-1", func_name="reply", args={"msg_id": "m1"})],
prompt_tokens=100,
completion_tokens=30,
total_tokens=130,
duration_ms=88.5,
tools=[
{
"tool_call_id": "call-1",
"tool_name": "reply",
"tool_args": {"msg_id": "m1"},
"success": True,
"duration_ms": 22.0,
"summary": "- reply [成功]: 已回复",
"detail": {"output_text": "测试回复"},
}
],
time_records={"planner": 0.1, "tool_calls": 0.2},
agent_state="stop",
)
assert captured["event"] == "planner.finalized"
payload = captured["data"]
assert payload["request"]["messages"][0]["content"] == "你好"
assert payload["request"]["tool_count"] == 2
assert payload["planner"]["tool_calls"][0]["id"] == "call-1"
assert payload["tools"][0]["detail"]["output_text"] == "测试回复"
assert payload["final_state"]["agent_state"] == "stop"
def test_reasoning_engine_build_tool_monitor_result_keeps_non_reply_tool_without_detail() -> None:
engine = object.__new__(MaisakaReasoningEngine)
tool_call = SimpleNamespace(call_id="call-2", func_name="query_memory")
invocation = ToolInvocation(tool_name="query_memory", arguments={"query": "Alice"})
result = ToolExecutionResult(tool_name="query_memory", success=True, content="查询成功")
tool_result = engine._build_tool_monitor_result(tool_call, invocation, result, duration_ms=18.6)
assert tool_result["tool_call_id"] == "call-2"
assert tool_result["tool_name"] == "query_memory"
assert tool_result["tool_args"] == {"query": "Alice"}
assert tool_result["detail"] is None

View File

@@ -11,6 +11,7 @@ from src.common.data_models.reply_generation_data_models import (
GenerationMetrics, GenerationMetrics,
LLMCompletionResult, LLMCompletionResult,
ReplyGenerationResult, ReplyGenerationResult,
build_reply_monitor_detail,
) )
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt from src.common.prompt_i18n import load_prompt
@@ -18,10 +19,17 @@ from src.config.config import global_config
from src.core.types import ActionInfo from src.core.types import ActionInfo
from src.services.llm_service import LLMServiceClient from src.services.llm_service import LLMServiceClient
from src.maisaka.context_messages import AssistantMessage, LLMContextMessage, ReferenceMessage, SessionBackedMessage, ToolResultMessage from src.maisaka.context_messages import (
from .maisaka_expression_selector import maisaka_expression_selector AssistantMessage,
LLMContextMessage,
ReferenceMessage,
SessionBackedMessage,
ToolResultMessage,
)
from src.maisaka.message_adapter import parse_speaker_content from src.maisaka.message_adapter import parse_speaker_content
from .maisaka_expression_selector import maisaka_expression_selector
logger = get_logger("replyer") logger = get_logger("replyer")
@@ -50,7 +58,7 @@ class MaisakaReplyGenerator:
self._personality_prompt = self._build_personality_prompt() self._personality_prompt = self._build_personality_prompt()
def _build_personality_prompt(self) -> str: def _build_personality_prompt(self) -> str:
"""构建 replyer 使用的人设描述""" """构建 replyer 使用的人设提示"""
try: try:
bot_name = global_config.bot.nickname bot_name = global_config.bot.nickname
alias_names = global_config.bot.alias_names alias_names = global_config.bot.alias_names
@@ -268,6 +276,11 @@ class MaisakaReplyGenerator:
sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None, sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None,
) -> Tuple[bool, ReplyGenerationResult]: ) -> Tuple[bool, ReplyGenerationResult]:
"""结合上下文生成 Maisaka 的最终可见回复。""" """结合上下文生成 Maisaka 的最终可见回复。"""
def finalize(success_value: bool) -> Tuple[bool, ReplyGenerationResult]:
result.monitor_detail = build_reply_monitor_detail(result)
return success_value, result
del available_actions del available_actions
del chosen_actions del chosen_actions
del extra_info del extra_info
@@ -278,14 +291,14 @@ class MaisakaReplyGenerator:
del unknown_words del unknown_words
result = ReplyGenerationResult() result = ReplyGenerationResult()
overall_started_at = time.perf_counter()
if chat_history is None: if chat_history is None:
result.error_message = "聊天历史为空" result.error_message = "聊天历史为空"
return False, result return finalize(False)
logger.info( logger.info(
f"Maisaka 回复器开始生成: 会话流标识={stream_id} 回复原因={reply_reason!r} " f"Maisaka 回复器开始生成: 会话流标识={stream_id} 回复原因={reply_reason!r} "
f"历史消息数={len(chat_history)} 目标消息编号=" f"历史消息数={len(chat_history)} 目标消息编号={reply_message.message_id if reply_message else None}"
f"{reply_message.message_id if reply_message else None}"
) )
filtered_history = [ filtered_history = [
@@ -293,14 +306,12 @@ class MaisakaReplyGenerator:
for message in chat_history for message in chat_history
if not isinstance(message, (ReferenceMessage, ToolResultMessage)) if not isinstance(message, (ReferenceMessage, ToolResultMessage))
] ]
logger.debug(f"Maisaka 回复器过滤后历史消息数={len(filtered_history)}") logger.debug(f"Maisaka 回复器过滤后历史消息数={len(filtered_history)}")
# Validate that express_model is properly initialized
if self.express_model is None: if self.express_model is None:
logger.error("Maisaka 回复器的回复模型未初始化") logger.error("Maisaka 回复器的回复模型未初始化")
result.error_message = "回复模型尚未初始化" result.error_message = "回复模型尚未初始化"
return False, result return finalize(False)
try: try:
reply_context = await self._build_reply_context( reply_context = await self._build_reply_context(
@@ -312,9 +323,13 @@ class MaisakaReplyGenerator:
) )
except Exception as exc: except Exception as exc:
import traceback import traceback
logger.error(f"Maisaka 回复器构建回复上下文失败: {exc}\n{traceback.format_exc()}") logger.error(f"Maisaka 回复器构建回复上下文失败: {exc}\n{traceback.format_exc()}")
result.error_message = f"构建回复上下文失败: {exc}" result.error_message = f"构建回复上下文失败: {exc}"
return False, result result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
merged_expression_habits = expression_habits.strip() or reply_context.expression_habits merged_expression_habits = expression_habits.strip() or reply_context.expression_habits
result.selected_expression_ids = ( result.selected_expression_ids = (
@@ -328,6 +343,7 @@ class MaisakaReplyGenerator:
f"已选表达编号={result.selected_expression_ids!r}" f"已选表达编号={result.selected_expression_ids!r}"
) )
prompt_started_at = time.perf_counter()
try: try:
prompt = self._build_prompt( prompt = self._build_prompt(
chat_history=filtered_history, chat_history=filtered_history,
@@ -337,26 +353,36 @@ class MaisakaReplyGenerator:
) )
except Exception as exc: except Exception as exc:
import traceback import traceback
logger.error(f"Maisaka 回复器构建提示词失败: {exc}\n{traceback.format_exc()}") logger.error(f"Maisaka 回复器构建提示词失败: {exc}\n{traceback.format_exc()}")
result.error_message = f"构建提示词失败: {exc}" result.error_message = f"构建提示词失败: {exc}"
return False, result result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
prompt_ms = round((time.perf_counter() - prompt_started_at) * 1000, 2)
result.completion.request_prompt = prompt result.completion.request_prompt = prompt
show_replyer_prompt = bool(getattr(global_config.debug, "show_replyer_prompt", False))
show_replyer_reasoning = bool(getattr(global_config.debug, "show_replyer_reasoning", False))
if global_config.debug.show_replyer_prompt: if show_replyer_prompt:
logger.info(f"\nMaisaka 回复器提示词\n{prompt}\n") logger.info(f"\nMaisaka 回复器提示词:\n{prompt}\n")
started_at = time.perf_counter() llm_started_at = time.perf_counter()
try: try:
generation_result = await self.express_model.generate_response(prompt) generation_result = await self.express_model.generate_response(prompt)
except Exception as exc: except Exception as exc:
logger.exception("Maisaka 回复器调用失败") logger.exception("Maisaka 回复器调用失败")
result.error_message = str(exc) result.error_message = str(exc)
result.metrics = GenerationMetrics( result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - started_at) * 1000, 2), prompt_ms=prompt_ms,
llm_ms=round((time.perf_counter() - llm_started_at) * 1000, 2),
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
) )
return False, result return finalize(False)
llm_ms = round((time.perf_counter() - llm_started_at) * 1000, 2)
response_text = (generation_result.response or "").strip() response_text = (generation_result.response or "").strip()
result.success = bool(response_text) result.success = bool(response_text)
result.completion = LLMCompletionResult( result.completion = LLMCompletionResult(
@@ -365,18 +391,27 @@ class MaisakaReplyGenerator:
reasoning_text=generation_result.reasoning or "", reasoning_text=generation_result.reasoning or "",
model_name=generation_result.model_name or "", model_name=generation_result.model_name or "",
tool_calls=generation_result.tool_calls or [], tool_calls=generation_result.tool_calls or [],
prompt_tokens=generation_result.prompt_tokens,
completion_tokens=generation_result.completion_tokens,
total_tokens=generation_result.total_tokens,
) )
result.metrics = GenerationMetrics( result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - started_at) * 1000, 2), prompt_ms=prompt_ms,
llm_ms=llm_ms,
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
stage_logs=[
f"prompt: {prompt_ms} ms",
f"llm: {llm_ms} ms",
],
) )
if global_config.debug.show_replyer_reasoning and result.completion.reasoning_text: if show_replyer_reasoning and result.completion.reasoning_text:
logger.info(f"Maisaka 回复器思考内容\n{result.completion.reasoning_text}") logger.info(f"Maisaka 回复器思考内容:\n{result.completion.reasoning_text}")
if not result.success: if not result.success:
result.error_message = "回复器返回了空内容" result.error_message = "回复器返回了空内容"
logger.warning("Maisaka 回复器返回了空内容") logger.warning("Maisaka 回复器返回了空内容")
return False, result return finalize(False)
logger.info( logger.info(
f"Maisaka 回复器生成成功: 回复文本={response_text!r} " f"Maisaka 回复器生成成功: 回复文本={response_text!r} "
@@ -384,4 +419,4 @@ class MaisakaReplyGenerator:
f"已选表达编号={result.selected_expression_ids!r}" f"已选表达编号={result.selected_expression_ids!r}"
) )
result.text_fragments = [response_text] result.text_fragments = [response_text]
return True, result return finalize(True)

View File

@@ -16,13 +16,19 @@ from src.common.data_models.reply_generation_data_models import (
GenerationMetrics, GenerationMetrics,
LLMCompletionResult, LLMCompletionResult,
ReplyGenerationResult, ReplyGenerationResult,
build_reply_monitor_detail,
) )
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt from src.common.prompt_i18n import load_prompt
from src.config.config import global_config from src.config.config import global_config
from src.core.types import ActionInfo from src.core.types import ActionInfo
from src.llm_models.payload_content.message import ImageMessagePart, Message, MessageBuilder, RoleType, TextMessagePart from src.llm_models.payload_content.message import (
from src.maisaka.monitor_events import emit_replier_request, emit_replier_response ImageMessagePart,
Message,
MessageBuilder,
RoleType,
TextMessagePart,
)
from src.services.llm_service import LLMServiceClient from src.services.llm_service import LLMServiceClient
from src.maisaka.context_messages import ( from src.maisaka.context_messages import (
@@ -32,10 +38,11 @@ from src.maisaka.context_messages import (
SessionBackedMessage, SessionBackedMessage,
ToolResultMessage, ToolResultMessage,
) )
from .maisaka_expression_selector import maisaka_expression_selector
from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content from src.maisaka.message_adapter import clone_message_sequence, parse_speaker_content
from src.maisaka.prompt_cli_renderer import PromptCLIVisualizer from src.maisaka.prompt_cli_renderer import PromptCLIVisualizer
from .maisaka_expression_selector import maisaka_expression_selector
logger = get_logger("replyer") logger = get_logger("replyer")
@@ -177,7 +184,7 @@ class MaisakaReplyGenerator:
return f"{system_prompt}\n\n" + "\n\n".join(sections) return f"{system_prompt}\n\n" + "\n\n".join(sections)
def _build_reply_instruction(self) -> str: def _build_reply_instruction(self) -> str:
return "请自然地回复。请注意不要输出多余内容(包括不必要的前后缀冒号括号表情包at或 @等 ),只输出发言内容就好" return "请自然地回复。不要输出多余说明、括号、at 或额外标记,只输出实际要发送的内容"
def _build_multimodal_user_message( def _build_multimodal_user_message(
self, self,
@@ -342,6 +349,11 @@ class MaisakaReplyGenerator:
selected_expression_ids: Optional[List[int]] = None, selected_expression_ids: Optional[List[int]] = None,
sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None, sub_agent_runner: Optional[Callable[[str], Awaitable[str]]] = None,
) -> Tuple[bool, ReplyGenerationResult]: ) -> Tuple[bool, ReplyGenerationResult]:
def finalize(success_value: bool) -> Tuple[bool, ReplyGenerationResult]:
result.monitor_detail = build_reply_monitor_detail(result)
return success_value, result
del available_actions del available_actions
del chosen_actions del chosen_actions
del extra_info del extra_info
@@ -352,9 +364,10 @@ class MaisakaReplyGenerator:
del unknown_words del unknown_words
result = ReplyGenerationResult() result = ReplyGenerationResult()
overall_started_at = time.perf_counter()
if chat_history is None: if chat_history is None:
result.error_message = "聊天历史为空" result.error_message = "聊天历史为空"
return False, result return finalize(False)
logger.info( logger.info(
f"Maisaka 回复器开始生成: 流={stream_id} 原因={reply_reason!r} " f"Maisaka 回复器开始生成: 流={stream_id} 原因={reply_reason!r} "
@@ -370,7 +383,7 @@ class MaisakaReplyGenerator:
if self.express_model is None: if self.express_model is None:
logger.error("回复模型未初始化") logger.error("回复模型未初始化")
result.error_message = "回复模型尚未初始化" result.error_message = "回复模型尚未初始化"
return False, result return finalize(False)
try: try:
reply_context = await self._build_reply_context( reply_context = await self._build_reply_context(
@@ -382,9 +395,13 @@ class MaisakaReplyGenerator:
) )
except Exception as exc: except Exception as exc:
import traceback import traceback
logger.error(f"构建回复上下文失败: {exc}\n{traceback.format_exc()}") logger.error(f"构建回复上下文失败: {exc}\n{traceback.format_exc()}")
result.error_message = f"构建回复上下文失败: {exc}" result.error_message = f"构建回复上下文失败: {exc}"
return False, result result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
merged_expression_habits = expression_habits.strip() or reply_context.expression_habits merged_expression_habits = expression_habits.strip() or reply_context.expression_habits
result.selected_expression_ids = ( result.selected_expression_ids = (
@@ -397,6 +414,7 @@ class MaisakaReplyGenerator:
f"回复上下文完成: 流={stream_id} 已选表达={result.selected_expression_ids!r}" f"回复上下文完成: 流={stream_id} 已选表达={result.selected_expression_ids!r}"
) )
prompt_started_at = time.perf_counter()
try: try:
request_messages = self._build_request_messages( request_messages = self._build_request_messages(
chat_history=filtered_history, chat_history=filtered_history,
@@ -406,11 +424,18 @@ class MaisakaReplyGenerator:
) )
except Exception as exc: except Exception as exc:
import traceback import traceback
logger.error(f"构建提示词失败: {exc}\n{traceback.format_exc()}") logger.error(f"构建提示词失败: {exc}\n{traceback.format_exc()}")
result.error_message = f"构建提示词失败: {exc}" result.error_message = f"构建提示词失败: {exc}"
return False, result result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
)
return finalize(False)
prompt_ms = round((time.perf_counter() - prompt_started_at) * 1000, 2)
prompt_preview = self._build_request_prompt_preview(request_messages) prompt_preview = self._build_request_prompt_preview(request_messages)
show_replyer_prompt = bool(getattr(global_config.debug, "show_replyer_prompt", False))
show_replyer_reasoning = bool(getattr(global_config.debug, "show_replyer_reasoning", False))
def message_factory(_client: object) -> List[Message]: def message_factory(_client: object) -> List[Message]:
return request_messages return request_messages
@@ -418,7 +443,7 @@ class MaisakaReplyGenerator:
result.completion.request_prompt = prompt_preview result.completion.request_prompt = prompt_preview
preview_chat_id = self._resolve_session_id(stream_id) preview_chat_id = self._resolve_session_id(stream_id)
replyer_prompt_section: RenderableType | None = None replyer_prompt_section: RenderableType | None = None
if global_config.debug.show_replyer_prompt: if show_replyer_prompt:
replyer_prompt_section = PromptCLIVisualizer.build_text_section( replyer_prompt_section = PromptCLIVisualizer.build_text_section(
prompt_preview, prompt_preview,
category="replyer", category="replyer",
@@ -428,15 +453,7 @@ class MaisakaReplyGenerator:
folded=global_config.debug.fold_maisaka_thinking, folded=global_config.debug.fold_maisaka_thinking,
) )
started_at = time.perf_counter() llm_started_at = time.perf_counter()
# 向监控前端广播回复器请求事件
await emit_replier_request(
session_id=preview_chat_id,
messages=request_messages,
model_name=getattr(self.express_model, "model_name", ""),
)
try: try:
generation_result = await self.express_model.generate_response_with_messages( generation_result = await self.express_model.generate_response_with_messages(
message_factory=message_factory message_factory=message_factory
@@ -445,10 +462,13 @@ class MaisakaReplyGenerator:
logger.exception("Maisaka 回复器调用失败") logger.exception("Maisaka 回复器调用失败")
result.error_message = str(exc) result.error_message = str(exc)
result.metrics = GenerationMetrics( result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - started_at) * 1000, 2), prompt_ms=prompt_ms,
llm_ms=round((time.perf_counter() - llm_started_at) * 1000, 2),
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
) )
return False, result return finalize(False)
llm_ms = round((time.perf_counter() - llm_started_at) * 1000, 2)
response_text = (generation_result.response or "").strip() response_text = (generation_result.response or "").strip()
result.success = bool(response_text) result.success = bool(response_text)
result.completion = LLMCompletionResult( result.completion = LLMCompletionResult(
@@ -457,36 +477,33 @@ class MaisakaReplyGenerator:
reasoning_text=generation_result.reasoning or "", reasoning_text=generation_result.reasoning or "",
model_name=generation_result.model_name or "", model_name=generation_result.model_name or "",
tool_calls=generation_result.tool_calls or [], tool_calls=generation_result.tool_calls or [],
)
result.metrics = GenerationMetrics(
overall_ms=round((time.perf_counter() - started_at) * 1000, 2),
)
# 向监控前端广播回复器响应事件
await emit_replier_response(
session_id=preview_chat_id,
content=response_text,
reasoning=generation_result.reasoning or "",
model_name=generation_result.model_name or "",
prompt_tokens=generation_result.prompt_tokens, prompt_tokens=generation_result.prompt_tokens,
completion_tokens=generation_result.completion_tokens, completion_tokens=generation_result.completion_tokens,
total_tokens=generation_result.total_tokens, total_tokens=generation_result.total_tokens,
duration_ms=result.metrics.overall_ms or 0.0, )
success=result.success, result.metrics = GenerationMetrics(
prompt_ms=prompt_ms,
llm_ms=llm_ms,
overall_ms=round((time.perf_counter() - overall_started_at) * 1000, 2),
stage_logs=[
f"prompt: {prompt_ms} ms",
f"llm: {llm_ms} ms",
],
) )
if global_config.debug.show_replyer_reasoning and result.completion.reasoning_text: if show_replyer_reasoning and result.completion.reasoning_text:
logger.info(f"Maisaka 回复器思考内容\n{result.completion.reasoning_text}") logger.info(f"Maisaka 回复器思考内容:\n{result.completion.reasoning_text}")
if not result.success: if not result.success:
result.error_message = "回复器返回了空内容" result.error_message = "回复器返回了空内容"
logger.warning("Maisaka 回复器返回了空内容") logger.warning("Maisaka 回复器返回了空内容")
return False, result return finalize(False)
logger.info( logger.info(
f"Maisaka 回复器生成成功: 文本={response_text!r} 总耗时ms={result.metrics.overall_ms} 已选表达={result.selected_expression_ids!r}" f"Maisaka 回复器生成成功: 文本={response_text!r} "
f"总耗时ms={result.metrics.overall_ms} 已选表达={result.selected_expression_ids!r}"
) )
if global_config.debug.show_replyer_prompt or global_config.debug.show_replyer_reasoning: if show_replyer_prompt or show_replyer_reasoning:
summary_lines = [ summary_lines = [
f"流ID: {preview_chat_id or 'unknown'}", f"流ID: {preview_chat_id or 'unknown'}",
f"耗时: {result.metrics.overall_ms} ms", f"耗时: {result.metrics.overall_ms} ms",
@@ -497,7 +514,7 @@ class MaisakaReplyGenerator:
renderables: List[RenderableType] = [Text("\n".join(summary_lines))] renderables: List[RenderableType] = [Text("\n".join(summary_lines))]
if replyer_prompt_section is not None: if replyer_prompt_section is not None:
renderables.append(replyer_prompt_section) renderables.append(replyer_prompt_section)
if global_config.debug.show_replyer_reasoning and result.completion.reasoning_text: if show_replyer_reasoning and result.completion.reasoning_text:
renderables.append( renderables.append(
Panel( Panel(
Text(result.completion.reasoning_text), Text(result.completion.reasoning_text),
@@ -523,4 +540,4 @@ class MaisakaReplyGenerator:
) )
) )
result.text_fragments = [response_text] result.text_fragments = [response_text]
return True, result return finalize(True)

View File

@@ -1,6 +1,6 @@
"""回复生成结果相关数据模型。 """回复生成结果相关数据模型。
该模块用于描述新版回复链中的三个层次: 该模块用于描述新版回复链中的三个层次:
1. LLM 原始完成结果。 1. LLM 原始完成结果。
2. 生成过程中的耗时与调试信息。 2. 生成过程中的耗时与调试信息。
@@ -23,13 +23,6 @@ class LLMCompletionResult(BaseDataModel):
该模型只描述模型调用本身的输入与输出,不承载回复切分、 该模型只描述模型调用本身的输入与输出,不承载回复切分、
消息序列拼装或表达方式选择等后处理结果。 消息序列拼装或表达方式选择等后处理结果。
Attributes:
request_prompt: 实际发送给模型的 Prompt 文本。
response_text: 模型返回的主文本内容。
reasoning_text: 模型返回的推理内容。
model_name: 本次请求实际使用的模型名称。
tool_calls: 模型返回的工具调用列表。
""" """
request_prompt: str = field( request_prompt: str = field(
@@ -52,19 +45,23 @@ class LLMCompletionResult(BaseDataModel):
default_factory=list, default_factory=list,
metadata={"description": "模型返回的工具调用列表。"}, metadata={"description": "模型返回的工具调用列表。"},
) )
prompt_tokens: int = field(
default=0,
metadata={"description": "本次请求的输入 Token 数。"},
)
completion_tokens: int = field(
default=0,
metadata={"description": "本次请求的输出 Token 数。"},
)
total_tokens: int = field(
default=0,
metadata={"description": "本次请求的总 Token 数。"},
)
@dataclass @dataclass
class GenerationMetrics(BaseDataModel): class GenerationMetrics(BaseDataModel):
"""一次生成流程的耗时与调试指标。 """一次生成流程的耗时与调试指标。"""
Attributes:
prompt_ms: Prompt 构建耗时,单位为毫秒。
llm_ms: LLM 调用耗时,单位为毫秒。
overall_ms: 整个生成流程总耗时,单位为毫秒。
stage_logs: 各阶段的简短耗时日志列表。
extra: 额外指标字典,用于承载不适合单独升格为字段的监控信息。
"""
prompt_ms: Optional[float] = field( prompt_ms: Optional[float] = field(
default=None, default=None,
@@ -90,20 +87,7 @@ class GenerationMetrics(BaseDataModel):
@dataclass @dataclass
class ReplyGenerationResult(BaseDataModel): class ReplyGenerationResult(BaseDataModel):
"""回复链的最终结构化结果。 """回复链的最终结构化结果。"""
该模型用于承接回复器和生成服务合并后的最终产物,供 HFC、
BrainChat、发送服务和日志系统继续消费。
Attributes:
success: 本次回复生成是否成功。
completion: LLM 原始完成结果。
metrics: 本次生成的耗时与调试指标。
selected_expression_ids: 本次选中的表达方式 ID 列表。
text_fragments: 对模型输出进行切分、规范化后的文本片段列表。
message_sequence: 最终可直接发送的消息序列。
error_message: 失败时的错误描述;成功时为空。
"""
success: bool = field( success: bool = field(
default=False, default=False,
@@ -133,10 +117,70 @@ class ReplyGenerationResult(BaseDataModel):
default_factory=str, default_factory=str,
metadata={"description": "失败时的错误描述;成功时通常为空字符串。"}, metadata={"description": "失败时的错误描述;成功时通常为空字符串。"},
) )
monitor_detail: Optional[Dict[str, Any]] = field(
default=None,
metadata={"description": "供监控层直接消费的通用 tool 展示详情。"},
)
def build_reply_monitor_detail(result: ReplyGenerationResult) -> Dict[str, Any]:
"""构建 reply 工具统一监控详情结构。"""
detail: Dict[str, Any] = {}
prompt_text = result.completion.request_prompt.strip()
reasoning_text = result.completion.reasoning_text.strip()
output_text = result.completion.response_text.strip()
if prompt_text:
detail["prompt_text"] = prompt_text
if reasoning_text:
detail["reasoning_text"] = reasoning_text
if output_text:
detail["output_text"] = output_text
metrics: Dict[str, Any] = {}
if result.completion.model_name.strip():
metrics["model_name"] = result.completion.model_name.strip()
if result.completion.prompt_tokens > 0:
metrics["prompt_tokens"] = result.completion.prompt_tokens
if result.completion.completion_tokens > 0:
metrics["completion_tokens"] = result.completion.completion_tokens
if result.completion.total_tokens > 0:
metrics["total_tokens"] = result.completion.total_tokens
if result.metrics.prompt_ms is not None:
metrics["prompt_ms"] = result.metrics.prompt_ms
if result.metrics.llm_ms is not None:
metrics["llm_ms"] = result.metrics.llm_ms
if result.metrics.overall_ms is not None:
metrics["overall_ms"] = result.metrics.overall_ms
if metrics:
detail["metrics"] = metrics
extra_sections: List[Dict[str, str]] = []
if result.selected_expression_ids:
extra_sections.append({
"title": "已选表达方式",
"content": ", ".join(str(item) for item in result.selected_expression_ids),
})
if result.metrics.stage_logs:
extra_sections.append({
"title": "阶段日志",
"content": "\n".join(result.metrics.stage_logs),
})
if result.error_message.strip():
extra_sections.append({
"title": "错误信息",
"content": result.error_message.strip(),
})
if extra_sections:
detail["extra_sections"] = extra_sections
return detail
__all__ = [ __all__ = [
"GenerationMetrics", "GenerationMetrics",
"LLMCompletionResult", "LLMCompletionResult",
"ReplyGenerationResult", "ReplyGenerationResult",
"build_reply_monitor_detail",
] ]

View File

@@ -57,6 +57,15 @@ def get_tool_spec() -> ToolSpec:
) )
def _build_monitor_metadata(reply_result: object) -> dict[str, object]:
"""从 reply 结果中提取统一监控详情。"""
monitor_detail = getattr(reply_result, "monitor_detail", None)
if isinstance(monitor_detail, dict):
return {"monitor_detail": monitor_detail}
return {}
async def handle_tool( async def handle_tool(
tool_ctx: BuiltinToolRuntimeContext, tool_ctx: BuiltinToolRuntimeContext,
invocation: ToolInvocation, invocation: ToolInvocation,
@@ -71,7 +80,7 @@ async def handle_tool(
if not target_message_id: if not target_message_id:
return tool_ctx.build_failure_result( return tool_ctx.build_failure_result(
invocation.tool_name, invocation.tool_name,
"回复工具需要提供有效的 `msg_id` 参数。", "reply 工具需要提供有效的 `msg_id` 参数。",
) )
target_message = tool_ctx.runtime._source_messages_by_id.get(target_message_id) target_message = tool_ctx.runtime._source_messages_by_id.get(target_message_id)
@@ -129,6 +138,7 @@ async def handle_tool(
"生成可见回复时发生异常。", "生成可见回复时发生异常。",
) )
reply_metadata = _build_monitor_metadata(reply_result)
reply_text = reply_result.completion.response_text.strip() if success else "" reply_text = reply_result.completion.response_text.strip() if success else ""
if not reply_text: if not reply_text:
logger.warning( logger.warning(
@@ -138,6 +148,7 @@ async def handle_tool(
return tool_ctx.build_failure_result( return tool_ctx.build_failure_result(
invocation.tool_name, invocation.tool_name,
"生成可见回复失败。", "生成可见回复失败。",
metadata=reply_metadata,
) )
reply_segments = tool_ctx.post_process_reply_text(reply_text) reply_segments = tool_ctx.post_process_reply_text(reply_text)
@@ -170,6 +181,7 @@ async def handle_tool(
return tool_ctx.build_failure_result( return tool_ctx.build_failure_result(
invocation.tool_name, invocation.tool_name,
"发送可见回复时发生异常。", "发送可见回复时发生异常。",
metadata=reply_metadata,
) )
if not sent: if not sent:
@@ -181,6 +193,7 @@ async def handle_tool(
"set_quote": set_quote, "set_quote": set_quote,
"reply_segments": reply_segments, "reply_segments": reply_segments,
}, },
metadata=reply_metadata,
) )
target_user_info = target_message.message_info.user_info target_user_info = target_message.message_info.user_info
@@ -202,4 +215,5 @@ async def handle_tool(
"reply_segments": reply_segments, "reply_segments": reply_segments,
"target_user_name": target_user_name, "target_user_name": target_user_name,
}, },
metadata=reply_metadata,
) )

View File

@@ -2,7 +2,6 @@
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from time import perf_counter
from typing import Any, List, Optional, Sequence from typing import Any, List, Optional, Sequence
import asyncio import asyncio
@@ -11,7 +10,6 @@ import random
from pydantic import BaseModel, Field as PydanticField from pydantic import BaseModel, Field as PydanticField
from rich.console import RenderableType from rich.console import RenderableType
from rich.panel import Panel
from src.common.data_models.llm_service_data_models import LLMGenerationOptions from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt from src.common.prompt_i18n import load_prompt
@@ -35,7 +33,7 @@ from src.services.llm_service import LLMServiceClient
from .builtin_tool import get_builtin_tools from .builtin_tool import get_builtin_tools
from .context_messages import AssistantMessage, LLMContextMessage from .context_messages import AssistantMessage, LLMContextMessage
from .history_utils import drop_leading_orphan_tool_results from .history_utils import drop_orphan_tool_results
from .prompt_cli_renderer import PromptCLIVisualizer from .prompt_cli_renderer import PromptCLIVisualizer
@@ -45,8 +43,10 @@ class ChatResponse:
content: Optional[str] content: Optional[str]
tool_calls: List[ToolCall] tool_calls: List[ToolCall]
request_messages: List[Message]
raw_message: AssistantMessage raw_message: AssistantMessage
selected_history_count: int selected_history_count: int
tool_count: int
prompt_tokens: int prompt_tokens: int
built_message_count: int built_message_count: int
completion_tokens: int completion_tokens: int
@@ -742,7 +742,6 @@ class MaisakaChatLoopService:
folded=global_config.debug.fold_maisaka_thinking, folded=global_config.debug.fold_maisaka_thinking,
) )
request_started_at = perf_counter()
logger.info( logger.info(
"规划器请求开始: " "规划器请求开始: "
f"已选上下文消息数={len(selected_history)} " f"已选上下文消息数={len(selected_history)} "
@@ -808,8 +807,10 @@ class MaisakaChatLoopService:
return ChatResponse( return ChatResponse(
content=final_response or None, content=final_response or None,
tool_calls=final_tool_calls, tool_calls=final_tool_calls,
request_messages=list(built_messages),
raw_message=raw_message, raw_message=raw_message,
selected_history_count=len(selected_history), selected_history_count=len(selected_history),
tool_count=len(all_tools),
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
built_message_count=len(built_messages), built_message_count=len(built_messages),
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
@@ -846,7 +847,7 @@ class MaisakaChatLoopService:
selected_indices.reverse() selected_indices.reverse()
selected_history = [chat_history[index] for index in selected_indices] selected_history = [chat_history[index] for index in selected_indices]
selected_history, hidden_assistant_count = MaisakaChatLoopService._hide_early_assistant_messages(selected_history) selected_history, hidden_assistant_count = MaisakaChatLoopService._hide_early_assistant_messages(selected_history)
selected_history, _ = drop_leading_orphan_tool_results(selected_history) selected_history, _ = drop_orphan_tool_results(selected_history)
selection_reason = ( selection_reason = (
f"上下文裁剪:最近 {effective_context_size} 条 user/assistant 消息," f"上下文裁剪:最近 {effective_context_size} 条 user/assistant 消息,"
f"实际发送 {len(selected_history)}" f"实际发送 {len(selected_history)}"
@@ -890,7 +891,7 @@ class MaisakaChatLoopService:
selected_indices.reverse() selected_indices.reverse()
selected_history = [chat_history[index] for index in selected_indices] selected_history = [chat_history[index] for index in selected_indices]
selected_history, hidden_assistant_count = MaisakaChatLoopService._hide_early_assistant_messages(selected_history) selected_history, hidden_assistant_count = MaisakaChatLoopService._hide_early_assistant_messages(selected_history)
selected_history, _ = drop_leading_orphan_tool_results(selected_history) selected_history, _ = drop_orphan_tool_results(selected_history)
return ( return (
selected_history, selected_history,
( (
@@ -935,10 +936,10 @@ class MaisakaChatLoopService:
return filtered_history, hidden_assistant_count return filtered_history, hidden_assistant_count
@staticmethod @staticmethod
def _drop_leading_orphan_tool_results( def _drop_orphan_tool_results(
selected_history: List[LLMContextMessage], selected_history: List[LLMContextMessage],
) -> List[LLMContextMessage]: ) -> List[LLMContextMessage]:
"""移除窗口前缀中缺少对应 tool_call 的工具结果消息。""" """移除窗口中缺少对应 tool_call 的工具结果消息。"""
normalized_history, _ = drop_leading_orphan_tool_results(selected_history) normalized_history, _ = drop_orphan_tool_results(selected_history)
return normalized_history return normalized_history

View File

@@ -78,3 +78,30 @@ def drop_leading_orphan_tool_results(
if first_valid_index == 0: if first_valid_index == 0:
return chat_history, 0 return chat_history, 0
return chat_history[first_valid_index:], first_valid_index return chat_history[first_valid_index:], first_valid_index
def drop_orphan_tool_results(
chat_history: list[LLMContextMessage],
) -> tuple[list[LLMContextMessage], int]:
"""移除窗口任意位置中缺少对应 tool_call 的工具结果消息。"""
if not chat_history:
return chat_history, 0
available_tool_call_ids = {
tool_call.call_id
for message in chat_history
if isinstance(message, AssistantMessage)
for tool_call in message.tool_calls
if tool_call.call_id
}
filtered_history: list[LLMContextMessage] = []
removed_count = 0
for message in chat_history:
if isinstance(message, ToolResultMessage) and message.tool_call_id not in available_tool_call_ids:
removed_count += 1
continue
filtered_history.append(message)
return filtered_history, removed_count

View File

@@ -1,74 +1,50 @@
"""MaiSaka 实时监控事件广播模块。 """MaiSaka 实时监控事件广播模块。
通过统一 WebSocket 将 MaiSaka 推理引擎各阶段状态实时推送给前端监控界面 通过统一 WebSocket 将 MaiSaka 推理引擎各阶段状态实时推送给前端监控界面
无需落盘 HTML/TXT 中间文件即可在 WebUI 中渲染完整的聊天流推理过程。
""" """
from typing import Any, Dict, List, Optional from datetime import datetime
import time import time
from typing import Any, Dict, List, Optional
from src.common.logger import get_logger from src.common.logger import get_logger
logger = get_logger("maisaka_monitor") logger = get_logger("maisaka_monitor")
# WebSocket 广播使用的业务域与主题
MONITOR_DOMAIN = "maisaka_monitor" MONITOR_DOMAIN = "maisaka_monitor"
MONITOR_TOPIC = "main" MONITOR_TOPIC = "main"
def _serialize_message(message: Any) -> Dict[str, Any]: def _normalize_payload_value(value: Any) -> Any:
"""单条 LLM 消息序列化为可通过 WebSocket 传输的字典。 """事件载荷中的任意值规范化为可序列化结构。"""
对二进制数据(如图片)仅保留元信息,不传输原始字节以减小带宽占用。 if value is None or isinstance(value, (str, int, float, bool)):
return value
Args: if isinstance(value, datetime):
message: 原始消息对象,可以是 dict 或带 role/content 属性的消息实例。 return value.isoformat()
if isinstance(value, dict):
Returns: normalized_dict: Dict[str, Any] = {}
Dict[str, Any]: 序列化后的消息字典。 for key, item in value.items():
""" normalized_dict[str(key)] = _normalize_payload_value(item)
if isinstance(message, dict): return normalized_dict
serialized: Dict[str, Any] = { if isinstance(value, (list, tuple, set)):
"role": str(message.get("role", "unknown")), return [_normalize_payload_value(item) for item in value]
"content": message.get("content"), if hasattr(value, "model_dump"):
} try:
if message.get("tool_call_id"): return _normalize_payload_value(value.model_dump())
serialized["tool_call_id"] = message["tool_call_id"] except Exception:
if message.get("tool_calls"): return str(value)
serialized["tool_calls"] = _serialize_tool_calls_from_dicts(message["tool_calls"]) if hasattr(value, "__dict__"):
return serialized try:
return _normalize_payload_value(dict(value.__dict__))
raw_role = getattr(message, "role", "unknown") except Exception:
role_str = raw_role.value if hasattr(raw_role, "value") else str(raw_role) # type: ignore[union-attr] return str(value)
return str(value)
serialized = {
"role": role_str,
"content": _extract_text_content(getattr(message, "content", None)),
}
tool_call_id = getattr(message, "tool_call_id", None)
if tool_call_id:
serialized["tool_call_id"] = str(tool_call_id)
tool_calls = getattr(message, "tool_calls", None)
if tool_calls:
serialized["tool_calls"] = _serialize_tool_calls_from_objects(tool_calls)
return serialized
def _extract_text_content(content: Any) -> Optional[str]: def _extract_text_content(content: Any) -> Optional[str]:
"""从消息内容中提取纯文本表示。 """从消息内容中提取纯文本表示。"""
支持字符串、列表(多模态内容块)等格式,对图片仅保留占位信息。
Args:
content: 消息的原始 content 字段。
Returns:
Optional[str]: 提取后的文本内容。
"""
if content is None: if content is None:
return None return None
if isinstance(content, str): if isinstance(content, str):
@@ -91,23 +67,17 @@ def _extract_text_content(content: Any) -> Optional[str]:
def _serialize_tool_calls_from_objects(tool_calls: List[Any]) -> List[Dict[str, Any]]: def _serialize_tool_calls_from_objects(tool_calls: List[Any]) -> List[Dict[str, Any]]:
"""将工具调用对象列表序列化为字典列表。 """将工具调用对象列表序列化为字典列表。"""
Args:
tool_calls: 工具调用对象列表ToolCall 或类似结构)。
Returns:
List[Dict[str, Any]]: 序列化后的工具调用列表。
"""
result: List[Dict[str, Any]] = [] result: List[Dict[str, Any]] = []
for tc in tool_calls: for tool_call in tool_calls:
serialized: Dict[str, Any] = { serialized: Dict[str, Any] = {
"id": getattr(tc, "id", None) or getattr(tc, "tool_call_id", ""), "id": getattr(tool_call, "id", None) or getattr(tool_call, "call_id", ""),
"name": getattr(tc, "func_name", None) or getattr(tc, "name", "unknown"), "name": getattr(tool_call, "func_name", None) or getattr(tool_call, "name", "unknown"),
} }
args = getattr(tc, "args", None) or getattr(tc, "arguments", None) args = getattr(tool_call, "args", None) or getattr(tool_call, "arguments", None)
if isinstance(args, dict): if isinstance(args, dict):
serialized["arguments"] = args serialized["arguments"] = _normalize_payload_value(args)
elif isinstance(args, str): elif isinstance(args, str):
serialized["arguments_raw"] = args serialized["arguments_raw"] = args
result.append(serialized) result.append(serialized)
@@ -115,73 +85,101 @@ def _serialize_tool_calls_from_objects(tool_calls: List[Any]) -> List[Dict[str,
def _serialize_tool_calls_from_dicts(tool_calls: List[Any]) -> List[Dict[str, Any]]: def _serialize_tool_calls_from_dicts(tool_calls: List[Any]) -> List[Dict[str, Any]]:
"""将工具调用字典列表标准化为可传输格式。 """将工具调用字典列表标准化为可传输格式。"""
Args:
tool_calls: 工具调用字典列表。
Returns:
List[Dict[str, Any]]: 标准化后的工具调用列表。
"""
result: List[Dict[str, Any]] = [] result: List[Dict[str, Any]] = []
for tc in tool_calls: for tool_call in tool_calls:
if isinstance(tc, dict): if isinstance(tool_call, dict):
result.append({ result.append({
"id": tc.get("id", ""), "id": str(tool_call.get("id", "")),
"name": tc.get("name", tc.get("func_name", "unknown")), "name": str(tool_call.get("name", tool_call.get("func_name", "unknown"))),
"arguments": tc.get("arguments", tc.get("args", {})), "arguments": _normalize_payload_value(tool_call.get("arguments", tool_call.get("args", {}))),
}) })
else: continue
result.append({ result.append({
"id": getattr(tc, "id", ""), "id": str(getattr(tool_call, "id", getattr(tool_call, "call_id", ""))),
"name": getattr(tc, "func_name", "unknown"), "name": str(getattr(tool_call, "func_name", getattr(tool_call, "name", "unknown"))),
"arguments": getattr(tc, "args", {}), "arguments": _normalize_payload_value(getattr(tool_call, "args", getattr(tool_call, "arguments", {}))),
}) })
return result return result
def _serialize_message(message: Any) -> Dict[str, Any]:
"""将单条消息序列化为可通过 WebSocket 传输的字典。"""
if isinstance(message, dict):
serialized: Dict[str, Any] = {
"role": str(message.get("role", "unknown")),
"content": _extract_text_content(message.get("content")),
}
if message.get("tool_call_id"):
serialized["tool_call_id"] = str(message["tool_call_id"])
if message.get("tool_calls"):
serialized["tool_calls"] = _serialize_tool_calls_from_dicts(message["tool_calls"])
return serialized
raw_role = getattr(message, "role", "unknown")
role_str = raw_role.value if hasattr(raw_role, "value") else str(raw_role)
serialized = {
"role": role_str,
"content": _extract_text_content(getattr(message, "content", None)),
}
tool_call_id = getattr(message, "tool_call_id", None)
if tool_call_id:
serialized["tool_call_id"] = str(tool_call_id)
tool_calls = getattr(message, "tool_calls", None)
if tool_calls:
serialized["tool_calls"] = _serialize_tool_calls_from_objects(tool_calls)
return serialized
def _serialize_messages(messages: List[Any]) -> List[Dict[str, Any]]: def _serialize_messages(messages: List[Any]) -> List[Dict[str, Any]]:
"""批量序列化消息列表。 """批量序列化消息列表。"""
Args: return [_serialize_message(message) for message in messages]
messages: 原始消息列表。
Returns:
List[Dict[str, Any]]: 序列化后的消息字典列表。 def _serialize_tool_results(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
""" """标准化最终 planner 卡中的工具结果列表。"""
return [_serialize_message(msg) for msg in messages]
serialized_tools: List[Dict[str, Any]] = []
for tool in tools:
serialized_tool = {
"tool_call_id": str(tool.get("tool_call_id", "")),
"tool_name": str(tool.get("tool_name", "")),
"tool_args": _normalize_payload_value(tool.get("tool_args", {})),
"success": bool(tool.get("success", False)),
"duration_ms": float(tool.get("duration_ms", 0.0) or 0.0),
"summary": str(tool.get("summary", "")),
}
detail = tool.get("detail")
if detail is not None:
serialized_tool["detail"] = _normalize_payload_value(detail)
serialized_tools.append(serialized_tool)
return serialized_tools
async def _broadcast(event: str, data: Dict[str, Any]) -> None: async def _broadcast(event: str, data: Dict[str, Any]) -> None:
"""通过统一 WebSocket 管理器向所有订阅了 maisaka_monitor 主题的连接广播事件。 """通过统一 WebSocket 管理器向监控主题广播事件。"""
延迟导入 websocket_manager 以避免循环依赖。
Args:
event: 事件名称。
data: 事件数据。
"""
try: try:
from src.webui.routers.websocket.manager import websocket_manager from src.webui.routers.websocket.manager import websocket_manager
subscription_key = f"{MONITOR_DOMAIN}:{MONITOR_TOPIC}" subscription_key = f"{MONITOR_DOMAIN}:{MONITOR_TOPIC}"
total_connections = len(websocket_manager.connections) total_connections = len(websocket_manager.connections)
subscriber_count = sum( subscriber_count = sum(
1 for conn in websocket_manager.connections.values() 1
if subscription_key in conn.subscriptions for connection in websocket_manager.connections.values()
if subscription_key in connection.subscriptions
) )
# 诊断:打印 manager 对象 id 和连接状态
logger.info( logger.info(
f"[诊断] _broadcast: manager_id={id(websocket_manager)} " f"[诊断] _broadcast: manager_id={id(websocket_manager)} "
f"总连接={total_connections} 订阅者={subscriber_count} event={event}" f"总连接={total_connections} 订阅者={subscriber_count} event={event}"
) )
if subscriber_count == 0 and total_connections > 0:
for cid, conn in websocket_manager.connections.items():
logger.info(
f"[诊断] 连接={cid[:8]}… 订阅={conn.subscriptions}"
)
await websocket_manager.broadcast_to_topic( await websocket_manager.broadcast_to_topic(
domain=MONITOR_DOMAIN, domain=MONITOR_DOMAIN,
topic=MONITOR_TOPIC, topic=MONITOR_TOPIC,
@@ -193,12 +191,8 @@ async def _broadcast(event: str, data: Dict[str, Any]) -> None:
async def emit_session_start(session_id: str, session_name: str) -> None: async def emit_session_start(session_id: str, session_name: str) -> None:
"""广播会话开始事件。 """广播会话开始事件。"""
Args:
session_id: 聊天流 ID。
session_name: 聊天流显示名称。
"""
await _broadcast("session.start", { await _broadcast("session.start", {
"session_id": session_id, "session_id": session_id,
"session_name": session_name, "session_name": session_name,
@@ -213,17 +207,8 @@ async def emit_message_ingested(
message_id: str, message_id: str,
timestamp: float, timestamp: float,
) -> None: ) -> None:
"""广播新消息注入事件。 """广播新消息注入事件。"""
当新的用户消息被纳入 MaiSaka 推理上下文时触发。
Args:
session_id: 聊天流 ID。
speaker_name: 发言者名称。
content: 消息文本内容。
message_id: 消息 ID。
timestamp: 消息时间戳。
"""
await _broadcast("message.ingested", { await _broadcast("message.ingested", {
"session_id": session_id, "session_id": session_id,
"speaker_name": speaker_name, "speaker_name": speaker_name,
@@ -240,15 +225,8 @@ async def emit_cycle_start(
max_rounds: int, max_rounds: int,
history_count: int, history_count: int,
) -> None: ) -> None:
"""广播推理循环开始事件。 """广播推理循环开始事件。"""
Args:
session_id: 聊天流 ID。
cycle_id: 循环编号。
round_index: 当前回合索引(从 0 开始)。
max_rounds: 最大回合数。
history_count: 当前上下文消息数。
"""
await _broadcast("cycle.start", { await _broadcast("cycle.start", {
"session_id": session_id, "session_id": session_id,
"cycle_id": cycle_id, "cycle_id": cycle_id,
@@ -270,19 +248,8 @@ async def emit_timing_gate_result(
selected_history_count: int, selected_history_count: int,
duration_ms: float, duration_ms: float,
) -> None: ) -> None:
"""广播 Timing Gate 子代理结果事件。 """广播 Timing Gate 结果事件。"""
Args:
session_id: 聊天流 ID。
cycle_id: 循环编号。
action: 控制决策continue/wait/no_reply
content: Timing Gate 返回的文本内容。
tool_calls: 工具调用列表。
messages: 发送给 Timing Gate 的消息列表。
prompt_tokens: 输入 Token 数。
selected_history_count: 已选上下文消息数。
duration_ms: 执行耗时(毫秒)。
"""
await _broadcast("timing_gate.result", { await _broadcast("timing_gate.result", {
"session_id": session_id, "session_id": session_id,
"cycle_id": cycle_id, "cycle_id": cycle_id,
@@ -297,177 +264,45 @@ async def emit_timing_gate_result(
}) })
async def emit_planner_request( async def emit_planner_finalized(
*,
session_id: str, session_id: str,
cycle_id: int, cycle_id: int,
messages: List[Any], request_messages: List[Any],
tool_count: int,
selected_history_count: int, selected_history_count: int,
) -> None: tool_count: int,
"""广播规划器请求开始事件。 planner_content: Optional[str],
planner_tool_calls: List[Any],
携带完整的消息列表,前端可以增量渲染新增消息。
Args:
session_id: 聊天流 ID。
cycle_id: 循环编号。
messages: 发送给规划器的完整消息列表。
tool_count: 可用工具数量。
selected_history_count: 已选上下文消息数。
"""
await _broadcast("planner.request", {
"session_id": session_id,
"cycle_id": cycle_id,
"messages": _serialize_messages(messages),
"tool_count": tool_count,
"selected_history_count": selected_history_count,
"timestamp": time.time(),
})
async def emit_planner_response(
session_id: str,
cycle_id: int,
content: Optional[str],
tool_calls: List[Any],
prompt_tokens: int, prompt_tokens: int,
completion_tokens: int, completion_tokens: int,
total_tokens: int, total_tokens: int,
duration_ms: float, duration_ms: float,
) -> None: tools: List[Dict[str, Any]],
"""广播规划器响应事件。
Args:
session_id: 聊天流 ID。
cycle_id: 循环编号。
content: 规划器返回的思考文本。
tool_calls: 规划器返回的工具调用列表。
prompt_tokens: 输入 Token 数。
completion_tokens: 输出 Token 数。
total_tokens: 总 Token 数。
duration_ms: 执行耗时(毫秒)。
"""
await _broadcast("planner.response", {
"session_id": session_id,
"cycle_id": cycle_id,
"content": content,
"tool_calls": _serialize_tool_calls_from_objects(tool_calls),
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": total_tokens,
"duration_ms": duration_ms,
"timestamp": time.time(),
})
async def emit_tool_execution(
session_id: str,
cycle_id: int,
tool_name: str,
tool_args: Dict[str, Any],
result_summary: str,
success: bool,
duration_ms: float,
) -> None:
"""广播工具执行结果事件。
Args:
session_id: 聊天流 ID。
cycle_id: 循环编号。
tool_name: 工具名称。
tool_args: 工具参数。
result_summary: 执行结果摘要。
success: 是否成功。
duration_ms: 执行耗时(毫秒)。
"""
await _broadcast("tool.execution", {
"session_id": session_id,
"cycle_id": cycle_id,
"tool_name": tool_name,
"tool_args": tool_args,
"result_summary": result_summary,
"success": success,
"duration_ms": duration_ms,
"timestamp": time.time(),
})
async def emit_cycle_end(
session_id: str,
cycle_id: int,
time_records: Dict[str, float], time_records: Dict[str, float],
agent_state: str, agent_state: str,
) -> None: ) -> None:
"""广播推理循环结束事件。 """广播一轮 planner 结束后的最终聚合事件。"""
Args: await _broadcast("planner.finalized", {
session_id: 聊天流 ID。
cycle_id: 循环编号。
time_records: 各阶段耗时记录。
agent_state: 循环结束后的代理状态。
"""
await _broadcast("cycle.end", {
"session_id": session_id, "session_id": session_id,
"cycle_id": cycle_id, "cycle_id": cycle_id,
"time_records": time_records,
"agent_state": agent_state,
"timestamp": time.time(), "timestamp": time.time(),
}) "request": {
"messages": _serialize_messages(request_messages),
"selected_history_count": selected_history_count,
async def emit_replier_request( "tool_count": tool_count,
session_id: str, },
messages: List[Any], "planner": {
model_name: str = "", "content": planner_content,
) -> None: "tool_calls": _serialize_tool_calls_from_objects(planner_tool_calls),
"""广播回复器请求开始事件。
Args:
session_id: 聊天流 ID。
messages: 发送给回复器的消息列表。
model_name: 使用的模型名称。
"""
await _broadcast("replier.request", {
"session_id": session_id,
"messages": _serialize_messages(messages),
"model_name": model_name,
"timestamp": time.time(),
})
async def emit_replier_response(
session_id: str,
content: Optional[str],
reasoning: str,
model_name: str,
prompt_tokens: int,
completion_tokens: int,
total_tokens: int,
duration_ms: float,
success: bool,
) -> None:
"""广播回复器响应事件。
Args:
session_id: 聊天流 ID。
content: 回复器生成的文本。
reasoning: 回复器的思考过程文本。
model_name: 使用的模型名称。
prompt_tokens: 输入 Token 数。
completion_tokens: 输出 Token 数。
total_tokens: 总 Token 数。
duration_ms: 执行耗时(毫秒)。
success: 是否生成成功。
"""
await _broadcast("replier.response", {
"session_id": session_id,
"content": content,
"reasoning": reasoning,
"model_name": model_name,
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,
"total_tokens": total_tokens, "total_tokens": total_tokens,
"duration_ms": duration_ms, "duration_ms": duration_ms,
"success": success, },
"timestamp": time.time(), "tools": _serialize_tool_results(tools),
"final_state": {
"time_records": _normalize_payload_value(time_records),
"agent_state": agent_state,
},
}) })

View File

@@ -36,12 +36,10 @@ from .context_messages import (
) )
from .history_utils import build_prefixed_message_sequence, build_session_message_visible_text, drop_leading_orphan_tool_results from .history_utils import build_prefixed_message_sequence, build_session_message_visible_text, drop_leading_orphan_tool_results
from .monitor_events import ( from .monitor_events import (
emit_cycle_end,
emit_cycle_start, emit_cycle_start,
emit_message_ingested, emit_message_ingested,
emit_planner_response, emit_planner_finalized,
emit_timing_gate_result, emit_timing_gate_result,
emit_tool_execution,
) )
from .planner_message_utils import build_planner_user_prefix_from_session_message from .planner_message_utils import build_planner_user_prefix_from_session_message
@@ -279,6 +277,7 @@ class MaisakaReasoningEngine:
ChatResponse( ChatResponse(
content=reason, content=reason,
tool_calls=[], tool_calls=[],
request_messages=[],
raw_message=AssistantMessage( raw_message=AssistantMessage(
content="", content="",
timestamp=datetime.now(), timestamp=datetime.now(),
@@ -288,6 +287,7 @@ class MaisakaReasoningEngine:
sum(1 for message in self._runtime._chat_history if message.count_in_context), sum(1 for message in self._runtime._chat_history if message.count_in_context),
self._runtime._max_context_size, self._runtime._max_context_size,
), ),
tool_count=0,
prompt_tokens=0, prompt_tokens=0,
built_message_count=0, built_message_count=0,
completion_tokens=0, completion_tokens=0,
@@ -346,6 +346,9 @@ class MaisakaReasoningEngine:
history_count=len(self._runtime._chat_history), history_count=len(self._runtime._chat_history),
) )
planner_started_at = 0.0 planner_started_at = 0.0
planner_duration_ms = 0.0
response: Optional[ChatResponse] = None
tool_monitor_results: list[dict[str, Any]] = []
try: try:
visual_refresh_started_at = time.time() visual_refresh_started_at = time.time()
refreshed_message_count = await self._refresh_chat_history_visual_placeholders() refreshed_message_count = await self._refresh_chat_history_visual_placeholders()
@@ -403,17 +406,6 @@ class MaisakaReasoningEngine:
f"回合={round_index + 1} " f"回合={round_index + 1} "
f"耗时={cycle_detail.time_records['planner']:.3f}" f"耗时={cycle_detail.time_records['planner']:.3f}"
) )
await emit_planner_response(
session_id=self._runtime.session_id,
cycle_id=cycle_detail.cycle_id,
content=response.content,
tool_calls=response.tool_calls,
prompt_tokens=response.prompt_tokens,
completion_tokens=response.completion_tokens,
total_tokens=response.total_tokens,
duration_ms=planner_duration_ms,
)
reasoning_content = response.content or "" reasoning_content = response.content or ""
if self._should_replace_reasoning(reasoning_content): if self._should_replace_reasoning(reasoning_content):
response.content = "我应该根据我上面思考的内容进行反思,重新思考我下一步的行动,我需要分析当前场景,对话,以及我可以使用的工具,然后先输出想法再使用工具" response.content = "我应该根据我上面思考的内容进行反思,重新思考我下一步的行动,我需要分析当前场景,对话,以及我可以使用的工具,然后先输出想法再使用工具"
@@ -423,10 +415,11 @@ class MaisakaReasoningEngine:
self._last_reasoning_content = reasoning_content self._last_reasoning_content = reasoning_content
self._runtime._chat_history.append(response.raw_message) self._runtime._chat_history.append(response.raw_message)
tool_result_summaries: list[str] = [] tool_result_summaries: list[str] = []
tool_monitor_results = []
if response.tool_calls: if response.tool_calls:
tool_started_at = time.time() tool_started_at = time.time()
should_pause, tool_result_summaries = await self._handle_tool_calls( should_pause, tool_result_summaries, tool_monitor_results = await self._handle_tool_calls(
response.tool_calls, response.tool_calls,
response.content or "", response.content or "",
anchor_message, anchor_message,
@@ -463,11 +456,22 @@ class MaisakaReasoningEngine:
) )
break break
finally: finally:
self._end_cycle(cycle_detail) completed_cycle = self._end_cycle(cycle_detail)
await emit_cycle_end( if response is not None:
await emit_planner_finalized(
session_id=self._runtime.session_id, session_id=self._runtime.session_id,
cycle_id=cycle_detail.cycle_id, cycle_id=cycle_detail.cycle_id,
time_records=dict(cycle_detail.time_records), request_messages=response.request_messages,
selected_history_count=response.selected_history_count,
tool_count=response.tool_count,
planner_content=response.content,
planner_tool_calls=response.tool_calls,
prompt_tokens=response.prompt_tokens,
completion_tokens=response.completion_tokens,
total_tokens=response.total_tokens,
duration_ms=planner_duration_ms,
tools=tool_monitor_results,
time_records=dict(completed_cycle.time_records),
agent_state=self._runtime._agent_state, agent_state=self._runtime._agent_state,
) )
finally: finally:
@@ -683,7 +687,7 @@ class MaisakaReasoningEngine:
def _drop_leading_orphan_tool_results( def _drop_leading_orphan_tool_results(
chat_history: list[LLMContextMessage], chat_history: list[LLMContextMessage],
) -> tuple[list[LLMContextMessage], int]: ) -> tuple[list[LLMContextMessage], int]:
"""清理历史前缀中缺少对应 assistant tool_call 的工具结果消息。""" """清理历史窗口中缺少对应 assistant tool_call 的工具结果消息。"""
return drop_leading_orphan_tool_results(chat_history) return drop_leading_orphan_tool_results(chat_history)
@@ -1039,12 +1043,38 @@ class MaisakaReasoningEngine:
normalized_content = self._truncate_tool_record_text(history_content, max_length=200) normalized_content = self._truncate_tool_record_text(history_content, max_length=200)
return f"- {tool_call.func_name} {summary_prefix}: {normalized_content}" return f"- {tool_call.func_name} {summary_prefix}: {normalized_content}"
def _build_tool_monitor_result(
self,
tool_call: ToolCall,
invocation: ToolInvocation,
result: ToolExecutionResult,
duration_ms: float,
) -> dict[str, Any]:
"""构建 planner.finalized 中单个工具的监控结果。"""
monitor_detail = result.metadata.get("monitor_detail")
normalized_detail = None
if monitor_detail is not None:
normalized_detail = self._normalize_tool_record_value(monitor_detail)
return {
"tool_call_id": tool_call.call_id,
"tool_name": tool_call.func_name,
"tool_args": self._normalize_tool_record_value(
invocation.arguments if isinstance(invocation.arguments, dict) else {}
),
"success": result.success,
"duration_ms": round(duration_ms, 2),
"summary": self._build_tool_result_summary(tool_call, result),
"detail": normalized_detail,
}
async def _handle_tool_calls( async def _handle_tool_calls(
self, self,
tool_calls: list[ToolCall], tool_calls: list[ToolCall],
latest_thought: str, latest_thought: str,
anchor_message: SessionMessage, anchor_message: SessionMessage,
) -> tuple[bool, list[str]]: ) -> tuple[bool, list[str], list[dict[str, Any]]]:
"""执行一批统一工具调用。 """执行一批统一工具调用。
Args: Args:
@@ -1057,6 +1087,7 @@ class MaisakaReasoningEngine:
""" """
tool_result_summaries: list[str] = [] tool_result_summaries: list[str] = []
tool_monitor_results: list[dict[str, Any]] = []
if self._runtime._tool_registry is None: if self._runtime._tool_registry is None:
for tool_call in tool_calls: for tool_call in tool_calls:
@@ -1069,7 +1100,10 @@ class MaisakaReasoningEngine:
await self._store_tool_execution_record(invocation, result, None) await self._store_tool_execution_record(invocation, result, None)
self._append_tool_execution_result(tool_call, result) self._append_tool_execution_result(tool_call, result)
tool_result_summaries.append(self._build_tool_result_summary(tool_call, result)) tool_result_summaries.append(self._build_tool_result_summary(tool_call, result))
return False, tool_result_summaries tool_monitor_results.append(
self._build_tool_monitor_result(tool_call, invocation, result, duration_ms=0.0)
)
return False, tool_result_summaries, tool_monitor_results
execution_context = self._build_tool_execution_context(latest_thought, anchor_message) execution_context = self._build_tool_execution_context(latest_thought, anchor_message)
tool_spec_map = { tool_spec_map = {
@@ -1088,24 +1122,17 @@ class MaisakaReasoningEngine:
) )
self._append_tool_execution_result(tool_call, result) self._append_tool_execution_result(tool_call, result)
tool_result_summaries.append(self._build_tool_result_summary(tool_call, result)) tool_result_summaries.append(self._build_tool_result_summary(tool_call, result))
tool_monitor_results.append(
self._build_tool_monitor_result(tool_call, invocation, result, tool_duration_ms)
)
# 向监控前端广播工具执行结果 # 向监控前端广播工具执行结果
cycle_id = self._runtime._current_cycle_detail.cycle_id if self._runtime._current_cycle_detail else 0
await emit_tool_execution(
session_id=self._runtime.session_id,
cycle_id=cycle_id,
tool_name=tool_call.func_name,
tool_args=invocation.arguments if isinstance(invocation.arguments, dict) else {},
result_summary=result.content[:500] if result.content else (result.error_message or "")[:500],
success=result.success,
duration_ms=tool_duration_ms,
)
if not result.success and tool_call.func_name == "reply": if not result.success and tool_call.func_name == "reply":
logger.warning(f"{self._runtime.log_prefix} 回复工具未生成可见消息,将继续下一轮循环") logger.warning(f"{self._runtime.log_prefix} 回复工具未生成可见消息,将继续下一轮循环")
if bool(result.metadata.get("pause_execution", False)): if bool(result.metadata.get("pause_execution", False)):
return True, tool_result_summaries return True, tool_result_summaries, tool_monitor_results
return False, tool_result_summaries return False, tool_result_summaries, tool_monitor_results

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,49 @@
from datetime import datetime
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
from src.llm_models.payload_content.tool_option import ToolCall
from src.maisaka.chat_loop_service import MaisakaChatLoopService
from src.maisaka.context_messages import AssistantMessage, SessionBackedMessage, ToolResultMessage
def _build_user_message(text: str) -> SessionBackedMessage:
return SessionBackedMessage(
raw_message=MessageSequence([TextComponent(text)]),
visible_text=text,
timestamp=datetime.now(),
)
def test_select_llm_context_messages_drops_orphan_tool_results_anywhere() -> None:
assistant_message = AssistantMessage(
content="",
timestamp=datetime.now(),
tool_calls=[ToolCall(call_id="call_1", func_name="wait", args={"seconds": 30})],
)
orphan_tool_message = ToolResultMessage(
content="当前对话循环已暂停,等待新消息到来。",
timestamp=datetime.now(),
tool_call_id="orphan_call",
)
matched_tool_message = ToolResultMessage(
content="等待 30 秒。",
timestamp=datetime.now(),
tool_call_id="call_1",
tool_name="wait",
)
chat_history = [
_build_user_message("第一条消息"),
orphan_tool_message,
assistant_message,
matched_tool_message,
_build_user_message("第二条消息"),
]
selected_history, _ = MaisakaChatLoopService.select_llm_context_messages(
chat_history,
max_context_size=8,
)
assert orphan_tool_message not in selected_history
assert assistant_message in selected_history
assert matched_tool_message in selected_history