feat:重新启用表达学习黑话学习,黑话使用和表达使用
This commit is contained in:
@@ -1,17 +1,24 @@
|
||||
"""Maisaka 推理引擎。"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from sqlmodel import select
|
||||
|
||||
from src.chat.heart_flow.heartFC_utils import CycleDetail
|
||||
from src.chat.message_receive.message import SessionMessage
|
||||
from src.chat.replyer.replyer_manager import replyer_manager
|
||||
from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Jargon
|
||||
from src.common.data_models.mai_message_data_model import UserInfo
|
||||
from src.common.data_models.message_component_data_model import MessageSequence
|
||||
from src.common.data_models.message_component_data_model import MessageSequence, TextComponent
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import global_config
|
||||
from src.learners.jargon_explainer import search_jargon
|
||||
from src.llm_models.payload_content.tool_option import ToolCall
|
||||
from src.services import send_service
|
||||
|
||||
@@ -20,8 +27,10 @@ from .message_adapter import (
|
||||
build_visible_text_from_sequence,
|
||||
clone_message_sequence,
|
||||
format_speaker_content,
|
||||
get_message_text,
|
||||
get_message_role,
|
||||
)
|
||||
from .reply_context_builder import MaisakaReplyContextBuilder
|
||||
from .tool_handlers import (
|
||||
handle_mcp_tool,
|
||||
handle_unknown_tool,
|
||||
@@ -38,6 +47,7 @@ class MaisakaReasoningEngine:
|
||||
|
||||
def __init__(self, runtime: "MaisakaHeartFlowChatting") -> None:
|
||||
self._runtime = runtime
|
||||
self._reply_context_builder = MaisakaReplyContextBuilder(runtime.session_id)
|
||||
|
||||
async def run_loop(self) -> None:
|
||||
"""独立消费消息批次,并执行对应的内部思考轮次。"""
|
||||
@@ -99,8 +109,7 @@ class MaisakaReasoningEngine:
|
||||
"""处理传入消息列表,将其转换为历史消息并加入聊天历史缓存。"""
|
||||
for message in messages:
|
||||
# 构建用户消息序列
|
||||
user_sequence = await self._build_message_sequence(message)
|
||||
visible_text = build_visible_text_from_sequence(user_sequence).strip()
|
||||
user_sequence, visible_text = await self._build_message_sequence(message)
|
||||
if not user_sequence.components:
|
||||
continue
|
||||
|
||||
@@ -116,14 +125,15 @@ class MaisakaReasoningEngine:
|
||||
raw_message=user_sequence,
|
||||
display_text=visible_text,
|
||||
)
|
||||
self._runtime._chat_history.append(history_message)
|
||||
insert_index = self._insert_chat_history_message(history_message)
|
||||
reference_message = await self._build_jargon_reference_message(message)
|
||||
if reference_message is not None:
|
||||
self._runtime._chat_history.insert(insert_index + 1, reference_message)
|
||||
self._trim_chat_history()
|
||||
|
||||
async def _build_message_sequence(self, message: SessionMessage) -> MessageSequence:
|
||||
async def _build_message_sequence(self, message: SessionMessage) -> tuple[MessageSequence, str]:
|
||||
message_sequence = MessageSequence([])
|
||||
user_info = message.message_info.user_info
|
||||
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
|
||||
message_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, message.message_id))
|
||||
planner_prefix = self._build_planner_user_prefix(message)
|
||||
|
||||
appended_component = False
|
||||
if global_config.maisaka.direct_image_input:
|
||||
@@ -131,18 +141,153 @@ class MaisakaReasoningEngine:
|
||||
else:
|
||||
source_sequence = message.raw_message
|
||||
|
||||
for component in clone_message_sequence(source_sequence).components:
|
||||
planner_components = clone_message_sequence(source_sequence).components
|
||||
if planner_components and isinstance(planner_components[0], TextComponent):
|
||||
planner_components[0].text = planner_prefix + planner_components[0].text
|
||||
else:
|
||||
planner_components.insert(0, TextComponent(planner_prefix))
|
||||
|
||||
for component in planner_components:
|
||||
message_sequence.components.append(component)
|
||||
appended_component = True
|
||||
|
||||
legacy_visible_text = self._build_legacy_visible_text(message, source_sequence)
|
||||
if not appended_component:
|
||||
if not message.processed_plain_text:
|
||||
await message.process()
|
||||
content = (message.processed_plain_text or "").strip()
|
||||
if content:
|
||||
message_sequence.text(content)
|
||||
message_sequence.text(planner_prefix + content)
|
||||
legacy_visible_text = self._build_legacy_visible_text_from_text(message, content)
|
||||
|
||||
return message_sequence
|
||||
return message_sequence, legacy_visible_text
|
||||
|
||||
@staticmethod
|
||||
def _build_planner_user_prefix(message: SessionMessage) -> str:
|
||||
user_info = message.message_info.user_info
|
||||
timestamp_text = message.timestamp.strftime("%H:%M:%S")
|
||||
user_name = user_info.user_nickname or user_info.user_id
|
||||
group_card = user_info.user_cardname or ""
|
||||
message_id = message.message_id or ""
|
||||
return (
|
||||
f"[时间]{timestamp_text}\n"
|
||||
f"[用户]{user_name}\n"
|
||||
f"[用户群昵称]{group_card}\n"
|
||||
f"[msg_id]{message_id}\n"
|
||||
"[发言内容]"
|
||||
)
|
||||
|
||||
def _build_legacy_visible_text(self, message: SessionMessage, source_sequence: MessageSequence) -> str:
|
||||
user_info = message.message_info.user_info
|
||||
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
|
||||
legacy_sequence = MessageSequence([])
|
||||
legacy_sequence.text(format_speaker_content(speaker_name, "", message.timestamp, message.message_id))
|
||||
for component in clone_message_sequence(source_sequence).components:
|
||||
legacy_sequence.components.append(component)
|
||||
return build_visible_text_from_sequence(legacy_sequence).strip()
|
||||
|
||||
def _build_legacy_visible_text_from_text(self, message: SessionMessage, content: str) -> str:
|
||||
user_info = message.message_info.user_info
|
||||
speaker_name = user_info.user_cardname or user_info.user_nickname or user_info.user_id
|
||||
return format_speaker_content(speaker_name, content, message.timestamp, message.message_id).strip()
|
||||
|
||||
def _insert_chat_history_message(self, message: SessionMessage) -> int:
|
||||
"""按时间顺序将消息插入聊天历史,同时保留 system 消息在最前。"""
|
||||
if not self._runtime._chat_history:
|
||||
self._runtime._chat_history.append(message)
|
||||
return 0
|
||||
|
||||
insert_at = len(self._runtime._chat_history)
|
||||
for index, existing_message in enumerate(self._runtime._chat_history):
|
||||
if get_message_role(existing_message) == "system":
|
||||
continue
|
||||
if existing_message.timestamp > message.timestamp:
|
||||
insert_at = index
|
||||
break
|
||||
|
||||
self._runtime._chat_history.insert(insert_at, message)
|
||||
return insert_at
|
||||
|
||||
async def _build_jargon_reference_message(self, message: SessionMessage) -> Optional[SessionMessage]:
|
||||
"""如果命中了黑话词条,则构建一条额外的参考信息消息。"""
|
||||
content = (get_message_text(message) or "").strip()
|
||||
if not content:
|
||||
if not message.processed_plain_text:
|
||||
await message.process()
|
||||
content = (message.processed_plain_text or "").strip()
|
||||
if not content:
|
||||
return None
|
||||
|
||||
matched_words = self._find_jargon_words_in_text(content)
|
||||
if not matched_words:
|
||||
return None
|
||||
|
||||
reference_text = (
|
||||
"[参考信息]\n"
|
||||
f"{','.join(matched_words)}可能是jargon,可以使用query_jargon来查看其含义"
|
||||
)
|
||||
reference_sequence = MessageSequence([TextComponent(reference_text)])
|
||||
return build_message(
|
||||
role="user",
|
||||
content="",
|
||||
source="user_reference",
|
||||
timestamp=message.timestamp,
|
||||
platform=message.platform,
|
||||
session_id=self._runtime.session_id,
|
||||
group_info=self._runtime._build_group_info(message),
|
||||
user_info=self._runtime._build_runtime_user_info(),
|
||||
raw_message=reference_sequence,
|
||||
display_text=reference_text,
|
||||
)
|
||||
|
||||
def _find_jargon_words_in_text(self, content: str) -> list[str]:
|
||||
"""匹配正文中出现的 jargon 词条。"""
|
||||
lowered_content = content.lower()
|
||||
matches: list[str] = []
|
||||
seen_words: set[str] = set()
|
||||
|
||||
with get_db_session(auto_commit=False) as session:
|
||||
query = select(Jargon).where(Jargon.is_jargon.is_(True)).order_by(Jargon.count.desc()).limit(200) # type: ignore[attr-defined]
|
||||
jargons = session.exec(query).all()
|
||||
|
||||
for jargon in jargons:
|
||||
jargon_content = str(jargon.content or "").strip()
|
||||
if not jargon_content:
|
||||
continue
|
||||
if jargon_content in seen_words:
|
||||
continue
|
||||
if not self._is_visible_jargon(jargon):
|
||||
continue
|
||||
if not self._jargon_matches_text(jargon_content, lowered_content, content):
|
||||
continue
|
||||
|
||||
seen_words.add(jargon_content)
|
||||
matches.append(jargon_content)
|
||||
if len(matches) >= 8:
|
||||
break
|
||||
|
||||
return matches
|
||||
|
||||
def _is_visible_jargon(self, jargon: Jargon) -> bool:
|
||||
"""判断当前会话是否可见该 jargon。"""
|
||||
if global_config.expression.all_global_jargon or bool(jargon.is_global):
|
||||
return True
|
||||
|
||||
try:
|
||||
session_id_dict = json.loads(jargon.session_id_dict or "{}")
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
logger.warning(f"Failed to parse jargon.session_id_dict: jargon_id={jargon.id}")
|
||||
return False
|
||||
return self._runtime.session_id in session_id_dict
|
||||
|
||||
@staticmethod
|
||||
def _jargon_matches_text(jargon_content: str, lowered_content: str, original_content: str) -> bool:
|
||||
"""判断词条是否命中消息正文。"""
|
||||
if re.search(r"[\u4e00-\u9fff]", jargon_content):
|
||||
return jargon_content in original_content
|
||||
|
||||
pattern = rf"\b{re.escape(jargon_content.lower())}\b"
|
||||
return re.search(pattern, lowered_content) is not None
|
||||
|
||||
def _start_cycle(self) -> CycleDetail:
|
||||
"""开始一轮 Maisaka 思考循环。"""
|
||||
@@ -166,21 +311,24 @@ class MaisakaReasoningEngine:
|
||||
|
||||
def _trim_chat_history(self) -> None:
|
||||
"""裁剪聊天历史,保证用户消息数量不超过配置限制。"""
|
||||
user_message_count = sum(1 for message in self._runtime._chat_history if get_message_role(message) == "user")
|
||||
if user_message_count <= self._runtime._max_context_size:
|
||||
counted_roles = {"user", "assistant"}
|
||||
conversation_message_count = sum(
|
||||
1 for message in self._runtime._chat_history if get_message_role(message) in counted_roles
|
||||
)
|
||||
if conversation_message_count <= self._runtime._max_context_size:
|
||||
return
|
||||
|
||||
trimmed_history = list(self._runtime._chat_history)
|
||||
removed_count = 0
|
||||
|
||||
while user_message_count >= self._runtime._max_context_size and trimmed_history:
|
||||
while conversation_message_count >= self._runtime._max_context_size and trimmed_history:
|
||||
removed_message = trimmed_history.pop(0)
|
||||
removed_count += 1
|
||||
if get_message_role(removed_message) == "user":
|
||||
user_message_count -= 1
|
||||
if get_message_role(removed_message) in counted_roles:
|
||||
conversation_message_count -= 1
|
||||
|
||||
self._runtime._chat_history = trimmed_history
|
||||
self._runtime._log_history_trimmed(removed_count, user_message_count)
|
||||
self._runtime._log_history_trimmed(removed_count, conversation_message_count)
|
||||
|
||||
async def _handle_tool_calls(
|
||||
self,
|
||||
@@ -204,6 +352,10 @@ class MaisakaReasoningEngine:
|
||||
)
|
||||
continue
|
||||
|
||||
if tool_call.func_name == "query_jargon":
|
||||
await self._handle_query_jargon(tool_call)
|
||||
continue
|
||||
|
||||
if tool_call.func_name == "wait":
|
||||
seconds = (tool_call.args or {}).get("seconds", 30)
|
||||
try:
|
||||
@@ -238,6 +390,68 @@ class MaisakaReasoningEngine:
|
||||
|
||||
return False
|
||||
|
||||
async def _handle_query_jargon(self, tool_call: ToolCall) -> None:
|
||||
tool_args = tool_call.args or {}
|
||||
raw_words = tool_args.get("words")
|
||||
|
||||
if not isinstance(raw_words, list):
|
||||
self._runtime._chat_history.append(
|
||||
self._build_tool_message(tool_call, "query_jargon requires a words array.")
|
||||
)
|
||||
return
|
||||
|
||||
words: list[str] = []
|
||||
seen_words: set[str] = set()
|
||||
for item in raw_words:
|
||||
if not isinstance(item, str):
|
||||
continue
|
||||
word = item.strip()
|
||||
if not word or word in seen_words:
|
||||
continue
|
||||
seen_words.add(word)
|
||||
words.append(word)
|
||||
|
||||
if not words:
|
||||
self._runtime._chat_history.append(
|
||||
self._build_tool_message(tool_call, "query_jargon requires at least one non-empty word.")
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"{self._runtime.log_prefix} query_jargon triggered: words={words!r}")
|
||||
|
||||
results: list[dict[str, object]] = []
|
||||
for word in words:
|
||||
exact_matches = search_jargon(
|
||||
keyword=word,
|
||||
chat_id=self._runtime.session_id,
|
||||
limit=5,
|
||||
case_sensitive=False,
|
||||
fuzzy=False,
|
||||
)
|
||||
matched_entries = exact_matches or search_jargon(
|
||||
keyword=word,
|
||||
chat_id=self._runtime.session_id,
|
||||
limit=5,
|
||||
case_sensitive=False,
|
||||
fuzzy=True,
|
||||
)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"word": word,
|
||||
"found": bool(matched_entries),
|
||||
"matches": matched_entries,
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"{self._runtime.log_prefix} query_jargon finished: results={results!r}")
|
||||
self._runtime._chat_history.append(
|
||||
self._build_tool_message(
|
||||
tool_call,
|
||||
json.dumps({"results": results}, ensure_ascii=False),
|
||||
)
|
||||
)
|
||||
|
||||
async def _handle_reply(
|
||||
self,
|
||||
tool_call: ToolCall,
|
||||
@@ -246,6 +460,9 @@ class MaisakaReasoningEngine:
|
||||
) -> bool:
|
||||
tool_args = tool_call.args or {}
|
||||
target_message_id = str(tool_args.get("msg_id") or "").strip()
|
||||
quote_reply = bool(tool_args.get("quote", True))
|
||||
raw_unknown_words = tool_args.get("unknown_words")
|
||||
unknown_words = raw_unknown_words if isinstance(raw_unknown_words, list) else None
|
||||
if not target_message_id:
|
||||
self._runtime._chat_history.append(
|
||||
self._build_tool_message(tool_call, "reply requires a valid msg_id argument.")
|
||||
@@ -261,7 +478,7 @@ class MaisakaReasoningEngine:
|
||||
|
||||
logger.info(
|
||||
f"{self._runtime.log_prefix} reply tool triggered: "
|
||||
f"target_msg_id={target_message_id} latest_thought={latest_thought!r}"
|
||||
f"target_msg_id={target_message_id} quote={quote_reply} latest_thought={latest_thought!r}"
|
||||
)
|
||||
logger.info(f"{self._runtime.log_prefix} acquiring Maisaka reply generator")
|
||||
try:
|
||||
@@ -288,6 +505,34 @@ class MaisakaReasoningEngine:
|
||||
return False
|
||||
|
||||
logger.info(f"{self._runtime.log_prefix} acquired Maisaka reply generator successfully")
|
||||
logger.info(
|
||||
f"{self._runtime.log_prefix} building reply context: "
|
||||
f"target_msg_id={target_message_id} unknown_words={unknown_words!r}"
|
||||
)
|
||||
|
||||
try:
|
||||
reply_context = await self._reply_context_builder.build(
|
||||
chat_history=self._runtime._chat_history,
|
||||
reply_message=target_message,
|
||||
reply_reason=latest_thought,
|
||||
unknown_words=unknown_words,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"{self._runtime.log_prefix} reply context builder crashed: "
|
||||
f"target_msg_id={target_message_id}"
|
||||
)
|
||||
self._runtime._chat_history.append(
|
||||
self._build_tool_message(tool_call, "Reply context preparation crashed.")
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
f"{self._runtime.log_prefix} reply context built: "
|
||||
f"target_msg_id={target_message_id} "
|
||||
f"selected_expression_ids={reply_context.selected_expression_ids!r} "
|
||||
f"has_jargon_explanation={bool(reply_context.jargon_explanation.strip())}"
|
||||
)
|
||||
|
||||
try:
|
||||
success, reply_result = await replyer.generate_reply_with_context(
|
||||
@@ -295,7 +540,11 @@ class MaisakaReasoningEngine:
|
||||
stream_id=self._runtime.session_id,
|
||||
reply_message=target_message,
|
||||
chat_history=self._runtime._chat_history,
|
||||
unknown_words=unknown_words,
|
||||
log_reply=False,
|
||||
expression_habits=reply_context.expression_habits,
|
||||
jargon_explanation=reply_context.jargon_explanation,
|
||||
selected_expression_ids=reply_context.selected_expression_ids,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(f"{self._runtime.log_prefix} reply generator crashed: target_msg_id={target_message_id}")
|
||||
@@ -322,14 +571,15 @@ class MaisakaReasoningEngine:
|
||||
|
||||
logger.info(
|
||||
f"{self._runtime.log_prefix} sending guided reply: "
|
||||
f"target_msg_id={target_message_id} reply_text={reply_text!r}"
|
||||
f"target_msg_id={target_message_id} quote={quote_reply} reply_text={reply_text!r}"
|
||||
)
|
||||
try:
|
||||
sent = await send_service.text_to_stream(
|
||||
text=reply_text,
|
||||
stream_id=self._runtime.session_id,
|
||||
set_reply=True,
|
||||
reply_message=target_message,
|
||||
set_reply=quote_reply,
|
||||
reply_message=target_message if quote_reply else None,
|
||||
selected_expressions=reply_result.selected_expression_ids or None,
|
||||
typing=False,
|
||||
)
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user