feat:修复gemini tool问题,简化表情包识别,修复非多模态plan图片识别
This commit is contained in:
138
src/maisaka/chat_history_visual_refresher.py
Normal file
138
src/maisaka/chat_history_visual_refresher.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""Maisaka 聊天历史视觉占位刷新器。"""
|
||||
|
||||
from typing import Awaitable, Callable, Optional
|
||||
|
||||
from sqlmodel import select
|
||||
|
||||
from src.chat.message_receive.message import SessionMessage
|
||||
from src.common.data_models.message_component_data_model import EmojiComponent, ForwardNodeComponent, ImageComponent
|
||||
from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Images, ImageType
|
||||
from src.common.logger import get_logger
|
||||
|
||||
from .context_messages import LLMContextMessage, SessionBackedMessage
|
||||
|
||||
logger = get_logger("maisaka_chat_history_visual_refresher")
|
||||
|
||||
BuildHistoryMessage = Callable[[SessionMessage, str], Awaitable[Optional[LLMContextMessage]]]
|
||||
BuildVisibleText = Callable[[SessionMessage], str]
|
||||
|
||||
|
||||
async def refresh_chat_history_visual_placeholders(
|
||||
*,
|
||||
chat_history: list[LLMContextMessage],
|
||||
build_history_message: BuildHistoryMessage,
|
||||
build_visible_text: BuildVisibleText,
|
||||
) -> int:
|
||||
"""在进入新一轮规划前,尝试用已完成的识图结果刷新历史占位。"""
|
||||
|
||||
refreshed_count = 0
|
||||
for index, history_message in enumerate(chat_history):
|
||||
if not isinstance(history_message, SessionBackedMessage):
|
||||
continue
|
||||
|
||||
original_message = history_message.original_message
|
||||
if original_message is None:
|
||||
continue
|
||||
|
||||
visual_components_updated = _refresh_pending_visual_components(original_message.raw_message.components)
|
||||
if visual_components_updated:
|
||||
await original_message.process(
|
||||
enable_heavy_media_analysis=False,
|
||||
enable_voice_transcription=False,
|
||||
)
|
||||
|
||||
refreshed_visible_text = build_visible_text(original_message)
|
||||
if not visual_components_updated and refreshed_visible_text == history_message.visible_text:
|
||||
continue
|
||||
|
||||
rebuilt_history_message = await build_history_message(original_message, history_message.source_kind)
|
||||
if rebuilt_history_message is None:
|
||||
continue
|
||||
|
||||
chat_history[index] = rebuilt_history_message
|
||||
refreshed_count += 1
|
||||
|
||||
return refreshed_count
|
||||
|
||||
|
||||
def _refresh_pending_visual_components(components: list[object]) -> bool:
|
||||
"""用缓存中的描述更新尚未补全文本的图片与表情组件。"""
|
||||
|
||||
refreshed = False
|
||||
for component in components:
|
||||
if isinstance(component, ImageComponent):
|
||||
if _should_refresh_image_component(component):
|
||||
image_description = _lookup_cached_image_description(component.binary_hash)
|
||||
if image_description:
|
||||
component.content = f"[图片:{image_description}]"
|
||||
refreshed = True
|
||||
continue
|
||||
|
||||
if isinstance(component, EmojiComponent):
|
||||
if _should_refresh_emoji_component(component):
|
||||
emoji_description = _lookup_cached_emoji_description(component.binary_hash)
|
||||
if emoji_description:
|
||||
component.content = f"[表情包: {emoji_description}]"
|
||||
refreshed = True
|
||||
continue
|
||||
|
||||
if not isinstance(component, ForwardNodeComponent):
|
||||
continue
|
||||
|
||||
for forward_component in component.forward_components:
|
||||
if _refresh_pending_visual_components(forward_component.content):
|
||||
refreshed = True
|
||||
|
||||
return refreshed
|
||||
|
||||
|
||||
def _should_refresh_image_component(component: ImageComponent) -> bool:
|
||||
"""判断图片组件当前是否仍处于待补全文本的占位状态。"""
|
||||
|
||||
return not component.content or component.content == "[图片]"
|
||||
|
||||
|
||||
def _should_refresh_emoji_component(component: EmojiComponent) -> bool:
|
||||
"""判断表情组件当前是否仍处于待补全文本的占位状态。"""
|
||||
|
||||
return not component.content or component.content == "[表情包]"
|
||||
|
||||
|
||||
def _lookup_cached_image_description(image_hash: str) -> str:
|
||||
"""从数据库读取已完成的图片描述,不触发新的识图请求。"""
|
||||
|
||||
if not image_hash:
|
||||
return ""
|
||||
|
||||
try:
|
||||
with get_db_session() as session:
|
||||
statement = select(Images).filter_by(image_hash=image_hash, image_type=ImageType.IMAGE).limit(1)
|
||||
if image_record := session.exec(statement).first():
|
||||
if image_record.no_file_flag:
|
||||
return ""
|
||||
if image_record.vlm_processed and image_record.description:
|
||||
return str(image_record.description).strip()
|
||||
except Exception as exc:
|
||||
logger.warning(f"读取图片缓存描述失败,image_hash={image_hash}: {exc}")
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _lookup_cached_emoji_description(emoji_hash: str) -> str:
|
||||
"""从数据库读取已完成的表情描述,不触发新的识别请求。"""
|
||||
|
||||
if not emoji_hash:
|
||||
return ""
|
||||
|
||||
try:
|
||||
with get_db_session() as session:
|
||||
statement = select(Images).filter_by(image_hash=emoji_hash, image_type=ImageType.EMOJI).limit(1)
|
||||
if image_record := session.exec(statement).first():
|
||||
if image_record.no_file_flag or not image_record.description:
|
||||
return ""
|
||||
return str(image_record.description).strip()
|
||||
except Exception as exc:
|
||||
logger.warning(f"读取表情缓存描述失败,emoji_hash={emoji_hash}: {exc}")
|
||||
|
||||
return ""
|
||||
@@ -196,6 +196,7 @@ def _build_message_from_sequence(
|
||||
fallback_text: str,
|
||||
*,
|
||||
tool_call_id: Optional[str] = None,
|
||||
tool_name: Optional[str] = None,
|
||||
tool_calls: Optional[list[ToolCall]] = None,
|
||||
) -> Optional[Message]:
|
||||
"""根据消息片段构造统一 LLM 消息。"""
|
||||
@@ -204,6 +205,8 @@ def _build_message_from_sequence(
|
||||
builder.set_tool_calls(tool_calls)
|
||||
if role == RoleType.Tool and tool_call_id:
|
||||
builder.add_tool_call(tool_call_id)
|
||||
if role == RoleType.Tool and tool_name:
|
||||
builder.set_tool_name(tool_name)
|
||||
|
||||
has_content = False
|
||||
for component in message_sequence.components:
|
||||
@@ -481,4 +484,5 @@ class ToolResultMessage(LLMContextMessage):
|
||||
message_sequence,
|
||||
self.content,
|
||||
tool_call_id=self.tool_call_id,
|
||||
tool_name=self.tool_name,
|
||||
)
|
||||
|
||||
@@ -66,11 +66,11 @@ def build_visible_text_from_sequence(message_sequence: MessageSequence) -> str:
|
||||
continue
|
||||
|
||||
if isinstance(component, EmojiComponent):
|
||||
parts.append("[表情包]")
|
||||
parts.append(component.content or "[表情包]")
|
||||
continue
|
||||
|
||||
if isinstance(component, ImageComponent):
|
||||
parts.append("[图片]")
|
||||
parts.append(component.content or "[图片]")
|
||||
continue
|
||||
|
||||
if isinstance(component, ReplyComponent):
|
||||
|
||||
@@ -24,6 +24,7 @@ from src.services import database_service as database_api
|
||||
from .builtin_tool import get_action_tool_specs
|
||||
from .builtin_tool import build_builtin_tool_handlers as build_split_builtin_tool_handlers
|
||||
from .builtin_tool import get_timing_tools
|
||||
from .chat_history_visual_refresher import refresh_chat_history_visual_placeholders
|
||||
from .builtin_tool.context import BuiltinToolRuntimeContext
|
||||
from .context_messages import (
|
||||
AssistantMessage,
|
||||
@@ -103,16 +104,22 @@ class MaisakaReasoningEngine:
|
||||
"""运行一轮可被新消息打断的主 planner 请求。"""
|
||||
|
||||
interrupt_flag = asyncio.Event()
|
||||
self._runtime._planner_interrupt_flag = interrupt_flag
|
||||
interrupted = False
|
||||
self._runtime._bind_planner_interrupt_flag(interrupt_flag)
|
||||
self._runtime._chat_loop_service.set_interrupt_flag(interrupt_flag)
|
||||
try:
|
||||
return await self._runtime._chat_loop_service.chat_loop_step(
|
||||
self._runtime._chat_history,
|
||||
tool_definitions=tool_definitions,
|
||||
)
|
||||
except ReqAbortException:
|
||||
interrupted = True
|
||||
raise
|
||||
finally:
|
||||
if self._runtime._planner_interrupt_flag is interrupt_flag:
|
||||
self._runtime._planner_interrupt_flag = None
|
||||
self._runtime._unbind_planner_interrupt_flag(
|
||||
interrupt_flag,
|
||||
interrupted=interrupted,
|
||||
)
|
||||
self._runtime._chat_loop_service.set_interrupt_flag(None)
|
||||
|
||||
async def _run_interruptible_sub_agent(
|
||||
@@ -125,7 +132,8 @@ class MaisakaReasoningEngine:
|
||||
"""运行一轮可被新消息打断的临时子代理请求。"""
|
||||
|
||||
interrupt_flag = asyncio.Event()
|
||||
self._runtime._planner_interrupt_flag = interrupt_flag
|
||||
interrupted = False
|
||||
self._runtime._bind_planner_interrupt_flag(interrupt_flag)
|
||||
try:
|
||||
return await self._runtime.run_sub_agent(
|
||||
context_message_limit=context_message_limit,
|
||||
@@ -136,9 +144,14 @@ class MaisakaReasoningEngine:
|
||||
temperature=0.1,
|
||||
tool_definitions=tool_definitions,
|
||||
)
|
||||
except ReqAbortException:
|
||||
interrupted = True
|
||||
raise
|
||||
finally:
|
||||
if self._runtime._planner_interrupt_flag is interrupt_flag:
|
||||
self._runtime._planner_interrupt_flag = None
|
||||
self._runtime._unbind_planner_interrupt_flag(
|
||||
interrupt_flag,
|
||||
interrupted=interrupted,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_timing_gate_fallback_prompt() -> str:
|
||||
@@ -313,6 +326,14 @@ class MaisakaReasoningEngine:
|
||||
)
|
||||
planner_started_at = 0.0
|
||||
try:
|
||||
visual_refresh_started_at = time.time()
|
||||
refreshed_message_count = await self._refresh_chat_history_visual_placeholders()
|
||||
cycle_detail.time_records["visual_refresh"] = time.time() - visual_refresh_started_at
|
||||
if refreshed_message_count > 0:
|
||||
logger.info(
|
||||
f"{self._runtime.log_prefix} 本轮思考前已刷新 {refreshed_message_count} 条视觉占位历史消息"
|
||||
)
|
||||
|
||||
timing_started_at = time.time()
|
||||
timing_action, timing_response, timing_tool_results = await self._run_timing_gate(anchor_message)
|
||||
timing_duration_ms = (time.time() - timing_started_at) * 1000
|
||||
@@ -526,7 +547,12 @@ class MaisakaReasoningEngine:
|
||||
timestamp=message.timestamp.timestamp(),
|
||||
)
|
||||
|
||||
async def _build_history_message(self, message: SessionMessage) -> Optional[LLMContextMessage]:
|
||||
async def _build_history_message(
|
||||
self,
|
||||
message: SessionMessage,
|
||||
*,
|
||||
source_kind: str = "user",
|
||||
) -> Optional[LLMContextMessage]:
|
||||
"""根据真实消息构造对应的上下文消息。"""
|
||||
|
||||
source_sequence = message.raw_message
|
||||
@@ -537,7 +563,7 @@ class MaisakaReasoningEngine:
|
||||
message,
|
||||
planner_prefix=planner_prefix,
|
||||
visible_text=visible_text,
|
||||
source_kind="user",
|
||||
source_kind=source_kind,
|
||||
)
|
||||
|
||||
user_sequence = await self._build_message_sequence(message, planner_prefix=planner_prefix)
|
||||
@@ -548,7 +574,7 @@ class MaisakaReasoningEngine:
|
||||
message,
|
||||
raw_message=user_sequence,
|
||||
visible_text=visible_text,
|
||||
source_kind="user",
|
||||
source_kind=source_kind,
|
||||
)
|
||||
|
||||
async def _build_message_sequence(
|
||||
@@ -601,6 +627,18 @@ class MaisakaReasoningEngine:
|
||||
if isinstance(result, Exception):
|
||||
logger.warning(f"{self._runtime.log_prefix} 回填图片或表情二进制数据失败,Maisaka 将退化为文本占位: {result}")
|
||||
|
||||
async def _refresh_chat_history_visual_placeholders(self) -> int:
|
||||
"""在进入新一轮规划前,尝试用已完成的识图结果刷新历史占位。"""
|
||||
|
||||
return await refresh_chat_history_visual_placeholders(
|
||||
chat_history=self._runtime._chat_history,
|
||||
build_history_message=lambda message, source_kind: self._build_history_message(
|
||||
message,
|
||||
source_kind=source_kind,
|
||||
),
|
||||
build_visible_text=lambda message: self._build_legacy_visible_text(message, message.raw_message),
|
||||
)
|
||||
|
||||
def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str:
|
||||
user_info = message.message_info.user_info
|
||||
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
|
||||
|
||||
@@ -84,6 +84,12 @@ class MaisakaHeartFlowChatting:
|
||||
self._wait_until: Optional[float] = None
|
||||
self._pending_wait_tool_call_id: Optional[str] = None
|
||||
self._planner_interrupt_flag: Optional[asyncio.Event] = None
|
||||
self._planner_interrupt_requested = False
|
||||
self._planner_interrupt_consecutive_count = 0
|
||||
self._planner_interrupt_max_consecutive_count = max(
|
||||
0,
|
||||
int(global_config.maisaka.planner_interrupt_max_consecutive_count),
|
||||
)
|
||||
|
||||
expr_use, jargon_learn, expr_learn = ExpressionConfigUtils.get_expression_config_for_chat(session_id)
|
||||
self._enable_expression_use = expr_use
|
||||
@@ -167,14 +173,51 @@ class MaisakaHeartFlowChatting:
|
||||
if self._agent_state == self._STATE_RUNNING:
|
||||
self._message_debounce_required = True
|
||||
if self._agent_state == self._STATE_RUNNING and self._planner_interrupt_flag is not None:
|
||||
logger.info(
|
||||
f"{self.log_prefix} 收到新消息,发起规划器打断; "
|
||||
f"消息编号={message.message_id} 缓存条数={len(self.message_cache)} "
|
||||
f"时间戳={time.time():.3f}"
|
||||
)
|
||||
self._planner_interrupt_flag.set()
|
||||
if self._planner_interrupt_requested:
|
||||
logger.info(
|
||||
f"{self.log_prefix} 收到新消息,但当前请求已发起过一次规划器打断,"
|
||||
f"本次不重复打断; 消息编号={message.message_id} "
|
||||
f"连续打断次数={self._planner_interrupt_consecutive_count}/"
|
||||
f"{self._planner_interrupt_max_consecutive_count}"
|
||||
)
|
||||
elif self._planner_interrupt_consecutive_count >= self._planner_interrupt_max_consecutive_count:
|
||||
logger.info(
|
||||
f"{self.log_prefix} 收到新消息,但已达到规划器连续打断上限,"
|
||||
f"将等待当前请求自然完成; 消息编号={message.message_id} "
|
||||
f"连续打断次数={self._planner_interrupt_consecutive_count}/"
|
||||
f"{self._planner_interrupt_max_consecutive_count}"
|
||||
)
|
||||
else:
|
||||
self._planner_interrupt_requested = True
|
||||
self._planner_interrupt_consecutive_count += 1
|
||||
logger.info(
|
||||
f"{self.log_prefix} 收到新消息,发起规划器打断; "
|
||||
f"消息编号={message.message_id} 缓存条数={len(self.message_cache)} "
|
||||
f"时间戳={time.time():.3f} "
|
||||
f"连续打断次数={self._planner_interrupt_consecutive_count}/"
|
||||
f"{self._planner_interrupt_max_consecutive_count}"
|
||||
)
|
||||
self._planner_interrupt_flag.set()
|
||||
self._new_message_event.set()
|
||||
|
||||
def _bind_planner_interrupt_flag(self, interrupt_flag: asyncio.Event) -> None:
|
||||
"""绑定当前可打断请求使用的中断标记。"""
|
||||
self._planner_interrupt_flag = interrupt_flag
|
||||
self._planner_interrupt_requested = False
|
||||
|
||||
def _unbind_planner_interrupt_flag(
|
||||
self,
|
||||
interrupt_flag: asyncio.Event,
|
||||
*,
|
||||
interrupted: bool,
|
||||
) -> None:
|
||||
"""解绑当前可打断请求的中断标记,并维护连续打断计数。"""
|
||||
if self._planner_interrupt_flag is interrupt_flag:
|
||||
self._planner_interrupt_flag = None
|
||||
self._planner_interrupt_requested = False
|
||||
if not interrupted:
|
||||
self._planner_interrupt_consecutive_count = 0
|
||||
|
||||
def _ensure_background_tasks_running(self) -> None:
|
||||
"""确保后台任务仍在运行,若崩溃则自动拉起。"""
|
||||
if not self._running:
|
||||
@@ -513,7 +556,6 @@ class MaisakaHeartFlowChatting:
|
||||
if not global_config.debug.show_maisaka_thinking:
|
||||
return
|
||||
|
||||
session_name = chat_manager.get_session_name(self.session_id) or self.session_id
|
||||
body_lines = [
|
||||
f"上下文占用:{selected_history_count}/{self._max_context_size} 条",
|
||||
f"本次请求token消耗:{self._format_token_count(prompt_tokens)}",
|
||||
|
||||
Reference in New Issue
Block a user