remove:移除tool_use模型,修复Jargon提取问题,修改统计为tool统计

This commit is contained in:
SengokuCola
2026-03-29 16:26:34 +08:00
parent 868438e3c1
commit 82bbf0fd52
25 changed files with 906 additions and 311 deletions

View File

@@ -1,13 +1,14 @@
"""Maisaka 推理引擎。"""
import difflib
import json
import asyncio
import re
import time
from datetime import datetime
from typing import TYPE_CHECKING, Optional
import asyncio
import difflib
import json
import re
import time
from sqlmodel import select
from src.chat.heart_flow.heartFC_utils import CycleDetail
@@ -21,13 +22,14 @@ from src.common.logger import get_logger
from src.config.config import global_config
from src.learners.jargon_explainer import search_jargon
from src.llm_models.payload_content.tool_option import ToolCall
from src.services import send_service
from src.services import database_service as database_api, send_service
from .message_adapter import (
build_message,
build_visible_text_from_sequence,
clone_message_sequence,
format_speaker_content,
get_message_source,
get_message_text,
get_message_role,
)
@@ -69,6 +71,8 @@ class MaisakaReasoningEngine:
cycle_detail = self._start_cycle()
self._runtime._log_cycle_started(cycle_detail, round_index)
try:
# 每次LLM生成前动态添加参考消息到最新位置
self._append_jargon_reference_message()
planner_started_at = time.time()
response = await self._runtime._chat_loop_service.chat_loop_step(self._runtime._chat_history)
cycle_detail.time_records["planner"] = time.time() - planner_started_at
@@ -134,10 +138,7 @@ class MaisakaReasoningEngine:
raw_message=user_sequence,
display_text=visible_text,
)
insert_index = self._insert_chat_history_message(history_message)
reference_message = await self._build_jargon_reference_message(message)
if reference_message is not None:
self._runtime._chat_history.insert(insert_index + 1, reference_message)
self._insert_chat_history_message(history_message)
self._trim_chat_history()
async def _build_message_sequence(self, message: SessionMessage) -> tuple[MessageSequence, str]:
@@ -217,65 +218,84 @@ class MaisakaReasoningEngine:
self._runtime._chat_history.insert(insert_at, message)
return insert_at
async def _build_jargon_reference_message(self, message: SessionMessage) -> Optional[SessionMessage]:
"""如果命中了黑话词条,则构建一条额外的参考信息消息"""
content = (get_message_text(message) or "").strip()
def _append_jargon_reference_message(self) -> None:
"""每次LLM生成前如果命中了黑话词条,则添加一条参考信息消息到聊天历史末尾"""
content = self._build_user_history_corpus()
if not content:
if not message.processed_plain_text:
await message.process()
content = (message.processed_plain_text or "").strip()
if not content:
return None
return
matched_words = self._find_jargon_words_in_text(content)
if not matched_words:
return None
return
reference_text = (
"[参考信息]\n"
f"{','.join(matched_words)}可能是jargon可以使用query_jargon来查看其含义"
)
reference_sequence = MessageSequence([TextComponent(reference_text)])
return build_message(
# 使用当前时间作为时间戳
reference_message = build_message(
role="user",
content="",
source="user_reference",
timestamp=message.timestamp,
platform=message.platform,
timestamp=datetime.now(),
platform=self._runtime.chat_stream.platform,
session_id=self._runtime.session_id,
group_info=self._runtime._build_group_info(message),
group_info=self._runtime._build_group_info(),
user_info=self._runtime._build_runtime_user_info(),
raw_message=reference_sequence,
display_text=reference_text,
)
self._runtime._chat_history.append(reference_message)
def _build_user_history_corpus(self) -> str:
"""拼接当前聊天记录内所有用户消息的正文,用于统一匹配黑话。"""
parts: list[str] = []
for history_message in self._runtime._chat_history:
if get_message_role(history_message) != "user":
continue
if get_message_source(history_message) != "user":
continue
text = (get_message_text(history_message) or "").strip()
if not text:
continue
parts.append(text)
return "\n".join(parts)
def _find_jargon_words_in_text(self, content: str) -> list[str]:
"""匹配正文中出现的 jargon 词条。"""
lowered_content = content.lower()
matches: list[str] = []
matched_entries: list[tuple[int, int, int, str]] = []
seen_words: set[str] = set()
with get_db_session(auto_commit=False) as session:
query = select(Jargon).where(Jargon.is_jargon.is_(True)).order_by(Jargon.count.desc()).limit(200) # type: ignore[attr-defined]
query = (
select(Jargon)
.where(Jargon.is_jargon.is_(True))
.order_by(Jargon.count.desc()) # type: ignore[attr-defined]
)
jargons = session.exec(query).all()
for jargon in jargons:
jargon_content = str(jargon.content or "").strip()
if not jargon_content:
continue
if jargon_content in seen_words:
normalized_content = jargon_content.lower()
if normalized_content in seen_words:
continue
if not self._is_visible_jargon(jargon):
continue
if not self._jargon_matches_text(jargon_content, lowered_content, content):
match_position = self._get_jargon_match_position(jargon_content, lowered_content, content)
if match_position is None:
continue
seen_words.add(jargon_content)
matches.append(jargon_content)
if len(matches) >= 8:
break
seen_words.add(normalized_content)
matched_entries.append((match_position, -len(jargon_content), -int(jargon.count or 0), jargon_content))
return matches
matched_entries.sort()
return [matched_content for _, _, _, matched_content in matched_entries[:8]]
def _is_visible_jargon(self, jargon: Jargon) -> bool:
"""判断当前会话是否可见该 jargon。"""
@@ -290,13 +310,17 @@ class MaisakaReasoningEngine:
return self._runtime.session_id in session_id_dict
@staticmethod
def _jargon_matches_text(jargon_content: str, lowered_content: str, original_content: str) -> bool:
"""判断词条是否命中消息正文"""
def _get_jargon_match_position(jargon_content: str, lowered_content: str, original_content: str) -> Optional[int]:
"""返回 jargon 在文本中的首次命中位置,未命中时返回 `None`"""
if re.search(r"[\u4e00-\u9fff]", jargon_content):
return jargon_content in original_content
match_index = original_content.lower().find(jargon_content.lower())
return match_index if match_index >= 0 else None
pattern = rf"\b{re.escape(jargon_content.lower())}\b"
return re.search(pattern, lowered_content) is not None
match = re.search(pattern, lowered_content)
if match is None:
return None
return match.start()
def _start_cycle(self) -> CycleDetail:
"""开始一轮 Maisaka 思考循环。"""
@@ -559,7 +583,6 @@ class MaisakaReasoningEngine:
chat_history=self._runtime._chat_history,
reply_message=target_message,
reply_reason=latest_thought,
unknown_words=unknown_words,
)
except Exception:
logger.exception(
@@ -587,7 +610,6 @@ class MaisakaReasoningEngine:
unknown_words=unknown_words,
log_reply=False,
expression_habits=reply_context.expression_habits,
jargon_explanation=reply_context.jargon_explanation,
selected_expression_ids=reply_context.selected_expression_ids,
)
except Exception:
@@ -645,6 +667,25 @@ class MaisakaReasoningEngine:
if not sent:
return False
target_user_info = target_message.message_info.user_info
target_user_name = (
target_user_info.user_cardname
or target_user_info.user_nickname
or target_user_info.user_id
)
if self._runtime.chat_stream is not None:
await database_api.store_tool_info(
chat_stream=self._runtime.chat_stream,
display_prompt=f"你对{target_user_name}进行了回复:{reply_text}",
tool_data={
"msg_id": target_message_id,
"quote": quote_reply,
"reply_text": reply_text,
},
tool_name="reply",
tool_reasoning=latest_thought,
)
bot_name = global_config.bot.nickname.strip() or "MaiSaka"
self._runtime._chat_history.append(
build_message(

View File

@@ -13,9 +13,8 @@ from src.common.database.database import get_db_session
from src.common.database.database_model import Expression, Jargon
from src.common.logger import get_logger
from src.config.config import global_config
from src.learners.jargon_explainer import search_jargon
from .message_adapter import get_message_text, parse_speaker_content
from .message_adapter import get_message_role, get_message_source, get_message_text, parse_speaker_content
logger = get_logger("maisaka_reply_context")
@@ -40,6 +39,7 @@ class _ExpressionRecord:
class _JargonRecord:
jargon_id: Optional[int]
content: str
count: int
meaning: str
session_id_dict: str
is_global: bool
@@ -56,7 +56,6 @@ class MaisakaReplyContextBuilder:
chat_history: List[SessionMessage],
reply_message: Optional[SessionMessage],
reply_reason: str,
unknown_words: Optional[List[str]] = None,
) -> ReplyContextBuildResult:
"""构建 reply 前置上下文。"""
expression_habits, selected_expression_ids = self._build_expression_habits(
@@ -67,7 +66,6 @@ class MaisakaReplyContextBuilder:
jargon_explanation = self._build_jargon_explanation(
chat_history=chat_history,
reply_message=reply_message,
unknown_words=unknown_words,
)
return ReplyContextBuildResult(
expression_habits=expression_habits,
@@ -129,56 +127,13 @@ class MaisakaReplyContextBuilder:
self,
chat_history: List[SessionMessage],
reply_message: Optional[SessionMessage],
unknown_words: Optional[List[str]],
) -> str:
"""查询并格式化黑话解释。"""
if not global_config.expression.enable_jargon_explanation:
return ""
if global_config.expression.jargon_mode == "planner":
return self._build_planner_jargon_explanation(unknown_words or [])
return self._build_context_jargon_explanation(chat_history, reply_message)
def _build_planner_jargon_explanation(self, unknown_words: List[str]) -> str:
"""基于 planner 传入的 unknown_words 构建黑话解释。"""
normalized_words: List[str] = []
seen_words: set[str] = set()
for raw_word in unknown_words:
word = str(raw_word or "").strip()
if not word:
continue
lowered = word.lower()
if lowered in seen_words:
continue
seen_words.add(lowered)
normalized_words.append(word)
if not normalized_words:
return ""
lines: List[str] = []
seen_entries: set[str] = set()
for word in normalized_words:
matches = search_jargon(word, chat_id=self._session_id, limit=3, fuzzy=False)
if not matches:
matches = search_jargon(word, chat_id=self._session_id, limit=3, fuzzy=True)
for match in matches:
content = str(match.get("content") or "").strip()
meaning = str(match.get("meaning") or "").strip()
if not content or not meaning:
continue
entry_key = f"{content}\n{meaning}"
if entry_key in seen_entries:
continue
seen_entries.add(entry_key)
lines.append(f"- {content}: {meaning}")
if not lines:
return ""
return "【黑话解释】\n" + "\n".join(lines[:8])
def _build_context_jargon_explanation(
self,
chat_history: List[SessionMessage],
@@ -190,22 +145,25 @@ class MaisakaReplyContextBuilder:
return ""
jargon_records = self._load_jargon_records()
lines: List[str] = []
matched_records: List[tuple[int, int, int, _JargonRecord]] = []
seen_contents: set[str] = set()
for jargon in jargon_records:
if not jargon.content or not jargon.meaning:
continue
if jargon.content in seen_contents:
normalized_content = jargon.content.lower()
if normalized_content in seen_contents:
continue
if not self._is_visible_jargon(jargon):
continue
if not self._is_jargon_in_corpus(jargon.content, corpus):
match_position = self._get_jargon_match_position(jargon.content, corpus)
if match_position is None:
continue
seen_contents.add(jargon.content)
lines.append(f"- {jargon.content}: {jargon.meaning}")
if len(lines) >= 8:
break
seen_contents.add(normalized_content)
matched_records.append((match_position, -len(jargon.content), -jargon.count, jargon))
matched_records.sort()
lines = [f"- {jargon.content}: {jargon.meaning}" for _, _, _, jargon in matched_records[:8]]
if not lines:
return ""
@@ -219,13 +177,14 @@ class MaisakaReplyContextBuilder:
def _load_jargon_records(self) -> List[_JargonRecord]:
"""在 session 内提取黑话的静态数据,避免 detached ORM 对象。"""
with get_db_session(auto_commit=False) as session:
query = select(Jargon).where(Jargon.meaning != "") # type: ignore[attr-defined]
query = query.order_by(Jargon.count.desc()).limit(200) # type: ignore[attr-defined]
query = select(Jargon).where(Jargon.is_jargon.is_(True), Jargon.meaning != "") # type: ignore[attr-defined]
query = query.order_by(Jargon.count.desc()) # type: ignore[attr-defined]
jargons = session.exec(query).all()
return [
_JargonRecord(
jargon_id=jargon.id,
content=(jargon.content or "").strip(),
count=int(jargon.count or 0),
meaning=(jargon.meaning or "").strip(),
session_id_dict=jargon.session_id_dict or "{}",
is_global=bool(jargon.is_global),
@@ -238,20 +197,26 @@ class MaisakaReplyContextBuilder:
chat_history: List[SessionMessage],
reply_message: Optional[SessionMessage],
) -> str:
"""最近上下文拼成待匹配文本。"""
"""当前聊天记录内所有用户消息拼成待匹配文本。"""
parts: List[str] = []
for message in chat_history[-20:]:
for message in chat_history:
if get_message_role(message) != "user":
continue
if get_message_source(message) != "user":
continue
text = get_message_text(message).strip()
if not text:
continue
_, body = parse_speaker_content(text)
parts.append(body.strip() or text)
if reply_message is not None:
if reply_message is not None and get_message_source(reply_message) == "user":
reply_text = get_message_text(reply_message).strip()
if reply_text:
_, body = parse_speaker_content(reply_text)
parts.append(body.strip() or reply_text)
normalized_reply_text = body.strip() or reply_text
if normalized_reply_text not in parts:
parts.append(normalized_reply_text)
return "\n".join(parts)
@@ -268,10 +233,16 @@ class MaisakaReplyContextBuilder:
return self._session_id in session_id_dict
@staticmethod
def _is_jargon_in_corpus(content: str, corpus: str) -> bool:
"""判断黑话词条是否出现在上下文中"""
def _get_jargon_match_position(content: str, corpus: str) -> Optional[int]:
"""返回 jargon 在上下文中的首次命中位置,未命中时返回 `None`"""
if re.search(r"[\u4e00-\u9fff]", content):
return re.search(re.escape(content), corpus, flags=re.IGNORECASE) is not None
match = re.search(re.escape(content), corpus, flags=re.IGNORECASE)
if match is None:
return None
return match.start()
pattern = rf"\b{re.escape(content)}\b"
return re.search(pattern, corpus, flags=re.IGNORECASE) is not None
match = re.search(pattern, corpus, flags=re.IGNORECASE)
if match is None:
return None
return match.start()