diff --git a/src/config/official_configs.py b/src/config/official_configs.py index bf90c876..1f58810d 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -1582,30 +1582,6 @@ class DatabaseConfig(ConfigBase): """ -class MaiSakaConfig(ConfigBase): - """MaiSaka 对话系统配置类""" - - __ui_label__ = "MaiSaka" - __ui_icon__ = "message-circle" - cli_user_name: str = Field( - default="用户", - json_schema_extra={ - "x-widget": "input", - "x-icon": "user", - }, - ) - """MaiSaka 使用的用户名称""" - - show_image_path: bool = Field( - default=True, - json_schema_extra={ - "x-widget": "switch", - "x-icon": "image", - }, - ) - """是否显示图片本地路径""" - - class MCPAuthorizationConfig(ConfigBase): """MCP HTTP 认证配置。""" diff --git a/src/learners/expression_auto_check_task.py b/src/learners/expression_auto_check_task.py index 54c4ee68..a860f2a3 100644 --- a/src/learners/expression_auto_check_task.py +++ b/src/learners/expression_auto_check_task.py @@ -8,107 +8,24 @@ 4. 未通过评估的:rejected=1, checked=1 """ +from typing import List + import asyncio import random -from typing import List from sqlmodel import select -from src.common.data_models.llm_service_data_models import LLMGenerationOptions from src.common.database.database import get_db_session from src.common.database.database_model import Expression from src.common.logger import get_logger from src.config.config import global_config from src.learners.expression_review_store import get_review_state, set_review_state -from src.learners.expression_utils import parse_evaluation_response +from src.learners.expression_utils import check_expression_suitability from src.manager.async_task_manager import AsyncTask -from src.services.llm_service import LLMServiceClient logger = get_logger("expressor") -def create_evaluation_prompt(situation: str, style: str) -> str: - """ - 创建评估提示词。 - - Args: - situation: 情景 - style: 风格 - - Returns: - 评估提示词 - """ - base_criteria = [ - "表达方式或言语风格是否与使用条件或使用情景匹配", - "允许部分语法错误或口语化或缺省出现", - "表达方式不能太过特指,需要具有泛用性", - "一般不涉及具体的人名或名称", - ] - - custom_criteria = global_config.expression.expression_auto_check_custom_criteria - - all_criteria = base_criteria.copy() - if custom_criteria: - all_criteria.extend(custom_criteria) - - criteria_list = "\n".join([f"{i + 1}. {criterion}" for i, criterion in enumerate(all_criteria)]) - - prompt = f"""请评估以下表达方式或语言风格以及使用条件或使用情景是否合适: -使用条件或使用情景:{situation} -表达方式或言语风格:{style} - -请从以下方面进行评估: -{criteria_list} - -请以 JSON 格式输出评估结果: -{{ - "suitable": true/false, - "reason": "评估理由(如果不合适,请说明原因)" -}} -如果合适,suitable 设为 true;如果不合适,suitable 设为 false,并在 reason 中说明原因。 -请严格按照 JSON 格式输出,不要包含其他内容。""" - - return prompt - - -judge_llm = LLMServiceClient(task_name="utils", request_type="expression_check") - - -async def single_expression_check(situation: str, style: str) -> tuple[bool, str, str | None]: - """ - 执行单次 LLM 评估。 - - Args: - situation: 情景 - style: 风格 - - Returns: - (suitable, reason, error) 元组,如果出错则 suitable 为 False,error 包含错误信息 - """ - try: - prompt = create_evaluation_prompt(situation, style) - logger.debug(f"正在评估表达方式: situation={situation}, style={style}") - - generation_result = await judge_llm.generate_response( - prompt=prompt, - options=LLMGenerationOptions(temperature=0.6, max_tokens=1024), - ) - response = generation_result.response - logger.debug(f"LLM响应: {response}") - - evaluation = parse_evaluation_response(response) - - suitable = bool(evaluation.get("suitable", False)) - reason = str(evaluation.get("reason", "未提供理由")) - - logger.debug(f"评估结果: {'通过' if suitable else '不通过'}") - return suitable, reason, None - - except Exception as e: - logger.error(f"评估表达方式 (situation={situation}, style={style}) 时出错: {e}") - return False, f"评估过程出错: {str(e)}", str(e) - - class ExpressionAutoCheckTask(AsyncTask): """表达方式自动检查定时任务。""" @@ -164,7 +81,7 @@ class ExpressionAutoCheckTask(AsyncTask): Returns: True 表示通过,False 表示不通过 """ - suitable, reason, error = await single_expression_check( + suitable, reason, error = await check_expression_suitability( expression.situation, expression.style, ) diff --git a/src/learners/expression_learner.py b/src/learners/expression_learner.py index a7696b5f..b2733fa4 100644 --- a/src/learners/expression_learner.py +++ b/src/learners/expression_learner.py @@ -31,11 +31,9 @@ if TYPE_CHECKING: logger = get_logger("expressor") express_learn_model = LLMServiceClient( - task_name="utils", request_type="expression.learner" + task_name="replyer", request_type="expression.learner" ) -summary_model = LLMServiceClient(task_name="utils", request_type="expression.summary") -check_model = LLMServiceClient(task_name="utils", request_type="expression.check") - +summary_model = LLMServiceClient(task_name="replyer", request_type="expression.summary") def register_expression_hook_specs(registry: HookSpecRegistry) -> List[HookSpec]: """注册表达方式系统内置 Hook 规格。 diff --git a/src/learners/expression_utils.py b/src/learners/expression_utils.py index 6f68480c..52f15d6e 100644 --- a/src/learners/expression_utils.py +++ b/src/learners/expression_utils.py @@ -12,7 +12,7 @@ from src.services.llm_service import LLMServiceClient logger = get_logger("expression_utils") -judge_llm = LLMServiceClient(task_name="utils", request_type="expression_check") +judge_llm = LLMServiceClient(task_name="replyer", request_type="expression_check") def _normalize_repair_json_result(repaired_result: Any) -> str: diff --git a/src/learners/jargon_explainer_old.py b/src/learners/jargon_explainer_old.py deleted file mode 100644 index 330da8cb..00000000 --- a/src/learners/jargon_explainer_old.py +++ /dev/null @@ -1,348 +0,0 @@ -import re -import time -from typing import List, Dict, Optional, Any - -from src.common.logger import get_logger -from src.common.database.database_model import Jargon -from src.common.data_models.llm_service_data_models import LLMGenerationOptions -from src.services.llm_service import LLMServiceClient -from src.config.config import global_config -from src.prompt.prompt_manager import prompt_manager -from src.learners.jargon_explainer import search_jargon -from src.learners.learner_utils_old import ( - is_bot_message, - contains_bot_self_name, - parse_chat_id_list, - chat_id_list_contains, -) - -logger = get_logger("jargon") - - -class JargonExplainer: - """黑话解释器,用于在回复前识别和解释上下文中的黑话""" - - def __init__(self, chat_id: str) -> None: - self.chat_id = chat_id - self.llm = LLMServiceClient( - task_name="utils", - request_type="jargon.explain", - ) - - def match_jargon_from_messages(self, messages: List[Any]) -> List[Dict[str, str]]: - """ - 通过直接匹配数据库中的jargon字符串来提取黑话 - - Args: - messages: 消息列表 - - Returns: - List[Dict[str, str]]: 提取到的黑话列表,每个元素包含content - """ - start_time = time.time() - - if not messages: - return [] - - # 收集所有消息的文本内容 - message_texts: List[str] = [] - for msg in messages: - # 跳过机器人自己的消息 - if is_bot_message(msg): - continue - - msg_text = ( - getattr(msg, "display_message", None) or getattr(msg, "processed_plain_text", None) or "" - ).strip() - if msg_text: - message_texts.append(msg_text) - - if not message_texts: - return [] - - # 合并所有消息文本 - combined_text = " ".join(message_texts) - - # 查询所有有meaning的jargon记录 - query = Jargon.select().where((Jargon.meaning.is_null(False)) & (Jargon.meaning != "")) - - # 根据all_global配置决定查询逻辑 - if global_config.expression.all_global_jargon: - # 开启all_global:只查询is_global=True的记录 - query = query.where(Jargon.is_global) - else: - # 关闭all_global:查询is_global=True或chat_id列表包含当前chat_id的记录 - # 这里先查询所有,然后在Python层面过滤 - pass - - # 按count降序排序,优先匹配出现频率高的 - query = query.order_by(Jargon.count.desc()) - - # 执行查询并匹配 - matched_jargon: Dict[str, Dict[str, str]] = {} - query_time = time.time() - - for jargon in query: - content = jargon.content or "" - if not content or not content.strip(): - continue - - # 跳过包含机器人昵称的词条 - if contains_bot_self_name(content): - continue - - # 检查chat_id(如果all_global=False) - if not global_config.expression.all_global_jargon: - if jargon.is_global: - # 全局黑话,包含 - pass - else: - # 检查chat_id列表是否包含当前chat_id - chat_id_list = parse_chat_id_list(jargon.chat_id) - if not chat_id_list_contains(chat_id_list, self.chat_id): - continue - - # 在文本中查找匹配(大小写不敏感) - pattern = re.escape(content) - # 使用单词边界或中文字符边界来匹配,避免部分匹配 - # 对于中文,使用Unicode字符类;对于英文,使用单词边界 - if re.search(r"[\u4e00-\u9fff]", content): - # 包含中文,使用更宽松的匹配 - search_pattern = pattern - else: - # 纯英文/数字,使用单词边界 - search_pattern = r"\b" + pattern + r"\b" - - if re.search(search_pattern, combined_text, re.IGNORECASE): - # 找到匹配,记录(去重) - if content not in matched_jargon: - matched_jargon[content] = {"content": content} - - match_time = time.time() - total_time = match_time - start_time - query_duration = query_time - start_time - match_duration = match_time - query_time - - logger.debug( - f"黑话匹配完成: 查询耗时 {query_duration:.3f}s, 匹配耗时 {match_duration:.3f}s, " - f"总耗时 {total_time:.3f}s, 匹配到 {len(matched_jargon)} 个黑话" - ) - - return list(matched_jargon.values()) - - async def explain_jargon(self, messages: List[Any], chat_context: str) -> Optional[str]: - """ - 解释上下文中的黑话 - - Args: - messages: 消息列表 - chat_context: 聊天上下文的文本表示 - - Returns: - Optional[str]: 黑话解释的概括文本,如果没有黑话则返回None - """ - if not messages: - return None - - # 直接匹配方式:从数据库中查询jargon并在消息中匹配 - jargon_entries = self.match_jargon_from_messages(messages) - - if not jargon_entries: - return None - - # 去重(按content) - unique_jargon: Dict[str, Dict[str, str]] = {} - for entry in jargon_entries: - content = entry["content"] - if content not in unique_jargon: - unique_jargon[content] = entry - - jargon_list = list(unique_jargon.values()) - logger.info(f"从上下文中提取到 {len(jargon_list)} 个黑话: {[j['content'] for j in jargon_list]}") - - # 查询每个黑话的含义 - jargon_explanations: List[str] = [] - for entry in jargon_list: - content = entry["content"] - - # 根据是否开启全局黑话,决定查询方式 - if global_config.expression.all_global_jargon: - # 开启全局黑话:查询所有is_global=True的记录 - results = search_jargon( - keyword=content, - chat_id=None, # 不指定chat_id,查询全局黑话 - limit=1, - case_sensitive=False, - fuzzy=False, # 精确匹配 - ) - else: - # 关闭全局黑话:优先查询当前聊天或全局的黑话 - results = search_jargon( - keyword=content, - chat_id=self.chat_id, - limit=1, - case_sensitive=False, - fuzzy=False, # 精确匹配 - ) - - if results and len(results) > 0: - meaning = results[0].get("meaning", "").strip() - if meaning: - jargon_explanations.append(f"- {content}: {meaning}") - else: - logger.info(f"黑话 {content} 没有找到含义") - else: - logger.info(f"黑话 {content} 未在数据库中找到") - - if not jargon_explanations: - logger.info("没有找到任何黑话的含义,跳过解释") - return None - - # 拼接所有黑话解释 - explanations_text = "\n".join(jargon_explanations) - - # 使用LLM概括黑话解释 - prompt_of_summarize = prompt_manager.get_prompt("jargon_explainer_summarize") - prompt_of_summarize.add_context("chat_context", lambda _: chat_context) - prompt_of_summarize.add_context("jargon_explanations", lambda _: explanations_text) - summarize_prompt = await prompt_manager.render_prompt(prompt_of_summarize) - - summary_result = await self.llm.generate_response( - summarize_prompt, options=LLMGenerationOptions(temperature=0.3) - ) - summary = summary_result.response - if not summary: - # 如果LLM概括失败,直接返回原始解释 - return f"上下文中的黑话解释:\n{explanations_text}" - - summary = summary.strip() - if not summary: - return f"上下文中的黑话解释:\n{explanations_text}" - - return summary - - -async def explain_jargon_in_context(chat_id: str, messages: List[Any], chat_context: str) -> Optional[str]: - """ - 解释上下文中的黑话(便捷函数) - - Args: - chat_id: 聊天ID - messages: 消息列表 - chat_context: 聊天上下文的文本表示 - - Returns: - Optional[str]: 黑话解释的概括文本,如果没有黑话则返回None - """ - explainer = JargonExplainer(chat_id) - return await explainer.explain_jargon(messages, chat_context) - - -def match_jargon_from_text(chat_text: str, chat_id: str) -> List[str]: - """直接在聊天文本中匹配已知的jargon,返回出现过的黑话列表 - - Args: - chat_text: 要匹配的聊天文本 - chat_id: 聊天ID - - Returns: - List[str]: 匹配到的黑话列表 - """ - if not chat_text or not chat_text.strip(): - return [] - - query = Jargon.select().where((Jargon.meaning.is_null(False)) & (Jargon.meaning != "")) - if global_config.expression.all_global_jargon: - query = query.where(Jargon.is_global) - - query = query.order_by(Jargon.count.desc()) - - matched: Dict[str, None] = {} - - for jargon in query: - content = (jargon.content or "").strip() - if not content: - continue - - if not global_config.expression.all_global_jargon and not jargon.is_global: - chat_id_list = parse_chat_id_list(jargon.chat_id) - if not chat_id_list_contains(chat_id_list, chat_id): - continue - - pattern = re.escape(content) - if re.search(r"[\u4e00-\u9fff]", content): - search_pattern = pattern - else: - search_pattern = r"\b" + pattern + r"\b" - - if re.search(search_pattern, chat_text, re.IGNORECASE): - matched[content] = None - - logger.info(f"匹配到 {len(matched)} 个黑话") - - return list(matched.keys()) - - -async def retrieve_concepts_with_jargon(concepts: List[str], chat_id: str) -> str: - """对概念列表进行jargon检索 - - Args: - concepts: 概念列表 - chat_id: 聊天ID - - Returns: - str: 检索结果字符串 - """ - if not concepts: - return "" - - results = [] - exact_matches = [] # 收集所有精确匹配的概念 - for concept in concepts: - concept = concept.strip() - if not concept: - continue - - # 先尝试精确匹配 - jargon_results = search_jargon(keyword=concept, chat_id=chat_id, limit=10, case_sensitive=False, fuzzy=False) - - is_fuzzy_match = False - - # 如果精确匹配未找到,尝试模糊搜索 - if not jargon_results: - jargon_results = search_jargon(keyword=concept, chat_id=chat_id, limit=10, case_sensitive=False, fuzzy=True) - is_fuzzy_match = True - - if jargon_results: - # 找到结果 - if is_fuzzy_match: - # 模糊匹配 - output_parts = [f"未精确匹配到'{concept}'"] - for result in jargon_results: - found_content = result.get("content", "").strip() - meaning = result.get("meaning", "").strip() - if found_content and meaning: - output_parts.append(f"找到 '{found_content}' 的含义为:{meaning}") - results.append("\n".join(output_parts)) # 换行分隔每个jargon解释 - logger.info(f"在jargon库中找到匹配(模糊搜索): {concept},找到{len(jargon_results)}条结果") - else: - # 精确匹配 - output_parts = [] - for result in jargon_results: - meaning = result.get("meaning", "").strip() - if meaning: - output_parts.append(f"'{concept}' 为黑话或者网络简写,含义为:{meaning}") - # 换行分隔每个jargon解释 - results.append("\n".join(output_parts) if len(output_parts) > 1 else output_parts[0]) - exact_matches.append(concept) # 收集精确匹配的概念,稍后统一打印 - else: - # 未找到,不返回占位信息,只记录日志 - logger.info(f"在jargon库中未找到匹配: {concept}") - - # 合并所有精确匹配的日志 - if exact_matches: - logger.info(f"找到黑话: {', '.join(exact_matches)},共找到{len(exact_matches)}条结果") - - if results: - return "你了解以下词语可能的含义:\n" + "\n".join(results) + "\n" - return "" diff --git a/src/learners/jargon_miner.py b/src/learners/jargon_miner.py index d59e0046..277f7e5f 100644 --- a/src/learners/jargon_miner.py +++ b/src/learners/jargon_miner.py @@ -23,8 +23,8 @@ from .expression_utils import is_single_char_jargon logger = get_logger("jargon") -llm_extract = LLMServiceClient(task_name="utils", request_type="jargon.extract") -llm_inference = LLMServiceClient(task_name="utils", request_type="jargon.inference") +llm_extract = LLMServiceClient(task_name="replyer", request_type="jargon.extract") +llm_inference = LLMServiceClient(task_name="replyer", request_type="jargon.inference") class JargonEntry(TypedDict): diff --git a/src/maisaka/builtin_tool/reply.py b/src/maisaka/builtin_tool/reply.py index a2d3ddbc..f03d9d2c 100644 --- a/src/maisaka/builtin_tool/reply.py +++ b/src/maisaka/builtin_tool/reply.py @@ -1,6 +1,6 @@ """reply 内置工具。""" -from typing import Optional +from typing import Any, Optional import traceback @@ -75,6 +75,25 @@ def _build_monitor_metadata(reply_result: ReplyGenerationResult) -> dict[str, ob return {} +def _build_send_result( + *, + index: int, + segment: str, + set_quote: bool, + success: bool, + message_id: str = "", +) -> dict[str, Any]: + """构建分段回复的轻量发送结果。""" + + return { + "index": index, + "segment": segment, + "set_quote": set_quote, + "success": success, + "message_id": message_id, + } + + async def handle_tool( tool_ctx: BuiltinToolRuntimeContext, invocation: ToolInvocation, @@ -165,19 +184,29 @@ async def handle_tool( reply_segments = tool_ctx.post_process_reply_text(reply_text) combined_reply_text = "".join(reply_segments) sent_message_ids: list[str] = [] + send_results: list[dict[str, Any]] = [] try: sent = False if tool_ctx.runtime.chat_stream.platform == CLI_PLATFORM_NAME: - for segment in reply_segments: + for index, segment in enumerate(reply_segments): render_cli_message(segment) + send_results.append( + _build_send_result( + index=index, + segment=segment, + set_quote=effective_set_quote if index == 0 else False, + success=True, + ) + ) sent = True else: for index, segment in enumerate(reply_segments): + segment_set_quote = effective_set_quote if index == 0 else False sent_message = await send_service.text_to_stream_with_message( text=segment, stream_id=tool_ctx.runtime.session_id, - set_reply=effective_set_quote if index == 0 else False, - reply_message=target_message if effective_set_quote and index == 0 else None, + set_reply=segment_set_quote, + reply_message=target_message if segment_set_quote else None, selected_expressions=reply_result.selected_expression_ids or None, typing=index > 0, sync_to_maisaka_history=True, @@ -185,10 +214,27 @@ async def handle_tool( ) sent = sent_message is not None if not sent: + send_results.append( + _build_send_result( + index=index, + segment=segment, + set_quote=segment_set_quote, + success=False, + ) + ) break sent_message_id = str(getattr(sent_message, "message_id", "") or "").strip() if sent_message_id: sent_message_ids.append(sent_message_id) + send_results.append( + _build_send_result( + index=index, + segment=segment, + set_quote=segment_set_quote, + success=True, + message_id=sent_message_id, + ) + ) except Exception: logger.exception( f"{tool_ctx.runtime.log_prefix} 发送文字消息时发生异常,目标消息编号={target_message_id}" @@ -208,6 +254,7 @@ async def handle_tool( "set_quote": set_quote, "effective_set_quote": effective_set_quote, "reply_segments": reply_segments, + "send_results": send_results, }, metadata=reply_metadata, ) @@ -219,17 +266,27 @@ async def handle_tool( tool_ctx.append_guided_reply_to_chat_history(combined_reply_text) tool_ctx.runtime._record_reply_sent() reply_metadata["sent_message_ids"] = sent_message_ids - await tool_ctx.runtime.track_reply_effect( - tool_call_id=invocation.call_id, - target_message=target_message, - set_quote=effective_set_quote, - reply_text=combined_reply_text, - reply_segments=reply_segments, - planner_reasoning=latest_thought, - reference_info=reference_info, - reply_metadata=reply_metadata, - replyer_context_messages=replyer_chat_history, - ) + reply_metadata["send_results"] = send_results + track_reply_effect = getattr(tool_ctx.runtime, "track_reply_effect", None) + if track_reply_effect is not None: + await track_reply_effect( + tool_call_id=invocation.call_id, + target_message=target_message, + set_quote=effective_set_quote, + reply_text=combined_reply_text, + reply_segments=reply_segments, + planner_reasoning=latest_thought, + reference_info=reference_info, + tool_context={ + "tool_name": invocation.tool_name, + "call_id": invocation.call_id, + "arguments": dict(invocation.arguments or {}), + "reasoning": latest_thought, + }, + send_results=send_results, + reply_metadata=reply_metadata, + replyer_context_messages=replyer_chat_history, + ) return tool_ctx.build_success_result( invocation.tool_name, "回复已生成并发送。", @@ -239,6 +296,7 @@ async def handle_tool( "effective_set_quote": effective_set_quote, "reply_text": combined_reply_text, "reply_segments": reply_segments, + "send_results": send_results, "target_user_name": target_user_name, }, metadata=reply_metadata, diff --git a/src/maisaka/reply_effect/models.py b/src/maisaka/reply_effect/models.py index d7a32f84..628c8c44 100644 --- a/src/maisaka/reply_effect/models.py +++ b/src/maisaka/reply_effect/models.py @@ -50,6 +50,8 @@ class ReplySnapshot: reply_segments: List[str] planner_reasoning: str reference_info: str + tool_context: Dict[str, Any] = field(default_factory=dict) + send_results: List[Dict[str, Any]] = field(default_factory=list) reply_metadata: Dict[str, Any] = field(default_factory=dict) diff --git a/src/maisaka/reply_effect/tracker.py b/src/maisaka/reply_effect/tracker.py index c2e7c8ef..425b534a 100644 --- a/src/maisaka/reply_effect/tracker.py +++ b/src/maisaka/reply_effect/tracker.py @@ -67,6 +67,8 @@ class ReplyEffectTracker: reply_segments: List[str], planner_reasoning: str, reference_info: str, + tool_context: Dict[str, Any] | None = None, + send_results: List[Dict[str, Any]] | None = None, reply_metadata: Dict[str, Any] | None = None, context_snapshot: List[Dict[str, Any]] | None = None, ) -> ReplyEffectRecord: @@ -88,6 +90,8 @@ class ReplyEffectTracker: reply_segments=list(reply_segments), planner_reasoning=planner_reasoning, reference_info=reference_info, + tool_context=dict(tool_context or {}), + send_results=list(send_results or []), reply_metadata=dict(reply_metadata or {}), ), target_user=UserSnapshot( diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py index d3e6b41b..6ae6cb66 100644 --- a/src/maisaka/runtime.py +++ b/src/maisaka/runtime.py @@ -296,6 +296,8 @@ class MaisakaHeartFlowChatting: reply_segments: list[str], planner_reasoning: str, reference_info: str, + tool_context: Optional[dict[str, Any]] = None, + send_results: Optional[list[dict[str, Any]]] = None, reply_metadata: Optional[dict[str, Any]] = None, replyer_context_messages: Optional[Sequence[LLMContextMessage]] = None, ) -> None: @@ -322,6 +324,8 @@ class MaisakaHeartFlowChatting: reply_segments=reply_segments, planner_reasoning=planner_reasoning, reference_info=reference_info, + tool_context=tool_context, + send_results=send_results, reply_metadata=enriched_reply_metadata, context_snapshot=context_snapshot, )