diff --git a/mai_knowledge/knowledge.json b/mai_knowledge/knowledge.json index 3a65cd12..b32e6625 100644 --- a/mai_knowledge/knowledge.json +++ b/mai_knowledge/knowledge.json @@ -1,5 +1,69 @@ { - "1": [], + "1": [ + { + "id": "know_1_1774770946.623486", + "content": "备战中考", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:55:46.623486" + }, + { + "id": "know_1_1774771765.051286", + "content": "性别为女性", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:09:25.051286" + }, + { + "id": "know_1_1774771851.333504", + "content": "用户是I人(内向型人格)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:10:51.333504" + }, + { + "id": "know_1_1774771894.517183", + "content": "用户名为小千,被他人称为“宝宝”,结合语境推测为女性", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:11:34.517183" + }, + { + "id": "know_1_1774771923.859455", + "content": "小千是I人(内向型人格)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:12:03.859455" + }, + { + "id": "know_1_1774771993.479732", + "content": "小千是女性", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:13:13.479732" + }, + { + "id": "know_1_1774772079.496335", + "content": "用户名为小千,被他人称为“宝宝”,推测为女性或处于亲密社交语境中(注:性别非明确陈述,但基于昵称高频使用及语境,高置信度归纳为女性或女性化称呼偏好,若严格遵循“明确表达”则此项存疑。鉴于指令要求“高置信度可归纳”,且群内互动模式符合典型女性向昵称习惯,此处提取为倾向性事实)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:14:39.496335" + } + ], "2": [ { "id": "know_2_1774768612.298128", @@ -18,6 +82,78 @@ "source": "maisaka_learning" }, "created_at": "2026-03-29T15:17:25.029561" + }, + { + "id": "know_2_1774771068.355999", + "content": "喜欢用夸张、幽默或古风修辞表达观点", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:57:48.355999" + }, + { + "id": "know_2_1774771397.764996", + "content": "性格幽默,喜欢使用夸张比喻和古风表达", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:03:17.764996" + }, + { + "id": "know_2_1774771471.03367", + "content": "幽默风趣,喜欢使用夸张比喻和玩梗", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:04:31.033670" + }, + { + "id": "know_2_1774771765.052285", + "content": "性格不孤僻,社交圈较广", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:09:25.052285" + }, + { + "id": "know_2_1774771851.33601", + "content": "用户表现出社恐倾向,喜欢回避社交互动", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:10:51.336010" + }, + { + "id": "know_2_1774771894.520185", + "content": "性格偏向内向(I人),有社恐倾向,喜欢回避社交压力", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:11:34.520185" + }, + { + "id": "know_2_1774771958.585244", + "content": "小千是内向型人格(I人)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:12:38.585244" + }, + { + "id": "know_2_1774771993.481732", + "content": "小千性格内向(I人)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:13:13.481732" } ], "3": [], @@ -41,6 +177,213 @@ "source": "maisaka_learning" }, "created_at": "2026-03-29T15:15:17.122405" + }, + { + "id": "know_6_1774769406.247087", + "content": "喜欢动漫风格插画", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:30:06.247087" + }, + { + "id": "know_6_1774770487.207364", + "content": "关注显卡硬件参数(如显存、型号)及深度学习/炼丹应用", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:48:07.207364" + }, + { + "id": "know_6_1774770487.209372", + "content": "对游戏光影效果感兴趣", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:48:07.209372" + }, + { + "id": "know_6_1774770603.063873", + "content": "喜欢玩《我的世界》和VRChat", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:50:03.063873" + }, + { + "id": "know_6_1774770654.654349", + "content": "关注显卡硬件参数(如4090、48G显存、5090)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:50:54.654349" + }, + { + "id": "know_6_1774770654.655356", + "content": "使用VRChat进行社交娱乐", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:50:54.655356" + }, + { + "id": "know_6_1774770734.287947", + "content": "关注显卡硬件(如4090、3050)及AI炼丹技术", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:52:14.287947" + }, + { + "id": "know_6_1774770734.289944", + "content": "玩《我的世界》并配置光影效果", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:52:14.289944" + }, + { + "id": "know_6_1774770734.291944", + "content": "计划游玩VRChat", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:52:14.291944" + }, + { + "id": "know_6_1774771033.111011", + "content": "喜欢玩VRChat", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:57:13.111011" + }, + { + "id": "know_6_1774771068.358999", + "content": "关注VRChat等虚拟现实游戏及硬件性能话题", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:57:48.358999" + }, + { + "id": "know_6_1774771233.980219", + "content": "使用VRChat(VRC)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:00:33.980219" + }, + { + "id": "know_6_1774771397.766996", + "content": "对VRChat(VRC)及虚拟形象社交感兴趣", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:03:17.766996" + }, + { + "id": "know_6_1774771471.03567", + "content": "对VRChat等虚拟社交游戏感兴趣", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:04:31.035670" + }, + { + "id": "know_6_1774771894.521183", + "content": "熟悉二次元文化、动漫角色及互联网流行梗(Meme)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:11:34.521183" + }, + { + "id": "know_6_1774771923.861534", + "content": "小千玩CS:GO游戏", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:12:03.861534" + }, + { + "id": "know_6_1774771958.587243", + "content": "回声者_Echoderd喜欢玩CS:GO游戏", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:12:38.587243" + }, + { + "id": "know_6_1774771993.483732", + "content": "小千喜欢二次元文化及动漫游戏圈梗", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:13:13.483732" + }, + { + "id": "know_6_1774772079.499335", + "content": "熟悉并喜爱二次元文化、动漫角色及互联网梗图(如阴间美学、病娇系、黑长直萌妹等风格)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:14:39.499335" + }, + { + "id": "know_6_1774772112.716455", + "content": "小千关注CS:GO游戏及中考备考话题", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:15:12.716455" + }, + { + "id": "know_6_1774772154.873237", + "content": "用户玩CS:GO游戏", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:15:54.873237" + }, + { + "id": "know_6_1774772186.438797", + "content": "玩CS:GO游戏", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:16:26.438797" + }, + { + "id": "know_6_1774772730.867535", + "content": "熟悉《我的青春恋爱物语果然有问题》及二次元表情包文化", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:25:30.867535" } ], "7": [ @@ -61,9 +404,127 @@ "source": "maisaka_learning" }, "created_at": "2026-03-29T15:16:13.741823" + }, + { + "id": "know_7_1774770603.062873", + "content": "备战中考", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:50:03.062873" + }, + { + "id": "know_7_1774771471.036668", + "content": "正在备战中考的学生", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:04:31.036668" + }, + { + "id": "know_7_1774771923.862535", + "content": "小千正在备战中考", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:12:03.862535" + }, + { + "id": "know_7_1774771958.588749", + "content": "回声者_Echoderd正在备战中考", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:12:38.588749" + }, + { + "id": "know_7_1774772112.714455", + "content": "小千使用AI模型进行对话", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:15:12.714455" + }, + { + "id": "know_7_1774772154.870238", + "content": "用户正在备战中考", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:15:54.870238" + } + ], + "8": [ + { + "id": "know_8_1774770946.624486", + "content": "日常逛游戏地图", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:55:46.624486" + }, + { + "id": "know_8_1774771397.769034", + "content": "备考中考期间仍保持日常游戏娱乐习惯", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:03:17.769034" + }, + { + "id": "know_8_1774771851.338018", + "content": "用户有备考中考的学习任务", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:10:51.338018" + }, + { + "id": "know_8_1774771894.523189", + "content": "备考中(备战中考)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:11:34.523189" + }, + { + "id": "know_8_1774771993.484733", + "content": "小千有打CS:GO的游戏习惯", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:13:13.484733" + }, + { + "id": "know_8_1774772079.501334", + "content": "有在高压环境下(如中考前)进行游戏娱乐(CS:GO)的习惯,自称或认同“摆烂”的生活态度", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:14:39.501334" + }, + { + "id": "know_8_1774772154.875743", + "content": "用户在备考期间有打游戏摸鱼的习惯", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:15:54.875743" } ], - "8": [], "9": [], "10": [ { @@ -119,8 +580,82 @@ "source": "maisaka_learning" }, "created_at": "2026-03-29T15:17:25.028561" + }, + { + "id": "know_10_1774769406.249584", + "content": "沟通中常使用文言文或半文言表达", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:30:06.249584" + }, + { + "id": "know_10_1774769406.251097", + "content": "习惯用反问句和夸张语气进行互动", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:30:06.251097" + }, + { + "id": "know_10_1774770487.211056", + "content": "沟通风格幽默,常使用网络梗和夸张表达", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:48:07.211056" + }, + { + "id": "know_10_1774771471.038677", + "content": "沟通风格轻松随意,善于接话和调侃", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:04:31.038677" + }, + { + "id": "know_10_1774771765.053285", + "content": "沟通风格活泼,喜欢使用语气词和表情符号撒娇", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:09:25.053285" + }, + { + "id": "know_10_1774772079.503333", + "content": "沟通风格幽默调侃,擅长用反话(如“烦到了”)和夸张修辞(如“耳朵起茧子”、“要报警了”)表达情绪", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T16:14:39.503333" } ], - "11": [], - "12": [] + "11": [ + { + "id": "know_11_1774771068.360999", + "content": "乐于接受并学习新的技术技巧(如加速器用法)", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:57:48.360999" + } + ], + "12": [ + { + "id": "know_12_1774770654.657355", + "content": "面对网络延迟问题倾向于寻找加速器解决方案", + "metadata": { + "session_id": "628336b082552269377e9d0648e26c60", + "source": "maisaka_learning" + }, + "created_at": "2026-03-29T15:50:54.657355" + } + ] } \ No newline at end of file diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index 74d94773..2696a420 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -198,7 +198,6 @@ class HeartFChatting: """判定和生成回复""" asyncio.create_task(self._trigger_expression_learning(self.message_cache)) # TODO: 完成反思器之后的逻辑 - start_time = time.time() current_cycle_detail = self._start_cycle() logger.info(f"{self.log_prefix} 开始第{self._cycle_counter}次思考") @@ -207,10 +206,7 @@ class HeartFChatting: # TODO: 动作执行逻辑 cycle_detail = self._end_cycle(current_cycle_detail) - if wait_time := global_config.chat.planner_smooth - (time.time() - start_time) > 0: - await asyncio.sleep(wait_time) - else: - await asyncio.sleep(0.1) # 最小等待时间,避免过快循环 + await asyncio.sleep(0.1) # 最小等待时间,避免过快循环 return True def _handle_loop_completion(self, task: asyncio.Task): diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index 7b24bd51..10630ecc 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -577,29 +577,6 @@ class DefaultReplyer: duration = end_time - start_time return name, result, duration - async def _build_disabled_jargon_explanation(self) -> str: - """当关闭黑话解释时使用的占位协程,避免额外的LLM调用""" - return "" - - async def _build_unknown_words_jargon(self, unknown_words: Optional[List[str]], chat_id: str) -> str: - """针对 Planner 提供的未知词语列表执行黑话检索""" - if not unknown_words: - return "" - # 清洗未知词语列表,只保留非空字符串 - concepts: List[str] = [] - for item in unknown_words: - if isinstance(item, str): - s = item.strip() - if s: - concepts.append(s) - if not concepts: - return "" - try: - return await retrieve_concepts_with_jargon(concepts, chat_id) - except Exception as e: - logger.error(f"未知词语黑话检索失败: {e}") - return "" - async def _build_jargon_explanation( self, chat_id: str, @@ -609,19 +586,14 @@ class DefaultReplyer: ) -> str: """ 统一的黑话解释构建函数: - - 根据 enable_jargon_explanation / jargon_mode 决定具体策略 + - 根据 enable_jargon_explanation 决定是否启用 """ + del unknown_words enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True) if not enable_jargon_explanation: return "" - jargon_mode = getattr(global_config.expression, "jargon_mode", "context") - - # planner 模式:仅使用 Planner 的 unknown_words - if jargon_mode == "planner": - return await self._build_unknown_words_jargon(unknown_words, chat_id) - - # 默认 / context 模式:使用上下文自动匹配黑话 + # 使用上下文自动匹配黑话 try: return await explain_jargon_in_context(chat_id, messages_short, chat_talking_prompt_short) or "" except Exception as e: @@ -1209,7 +1181,7 @@ class DefaultReplyer: prompt = await prompt_manager.render_prompt(template_prompt) generation_result = await llm_api.generate( llm_api.LLMServiceRequest( - task_name="tool_use", + task_name="utils", request_type="replyer.lpmm_knowledge", prompt=prompt, tool_options=[search_knowledge_tool.get_tool_definition()], diff --git a/src/chat/replyer/maisaka_generator.py b/src/chat/replyer/maisaka_generator.py index 946014ff..1aa4199a 100644 --- a/src/chat/replyer/maisaka_generator.py +++ b/src/chat/replyer/maisaka_generator.py @@ -20,8 +20,8 @@ from src.services.llm_service import LLMServiceClient from src.maisaka.message_adapter import ( get_message_kind, get_message_role, + get_message_source, get_message_text, - is_perception_message, parse_speaker_content, ) @@ -121,6 +121,9 @@ class MaisakaReplyGenerator: role = get_message_role(message) timestamp = self._format_message_time(message) + if get_message_source(message) == "user_reference": + continue + if role == "user": guided_reply = self._extract_guided_bot_reply(message) if guided_reply: @@ -148,7 +151,6 @@ class MaisakaReplyGenerator: chat_history: List[SessionMessage], reply_reason: str, expression_habits: str = "", - jargon_explanation: str = "", ) -> str: """构建 Maisaka replyer 提示词。""" current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -167,8 +169,6 @@ class MaisakaReplyGenerator: extra_sections: List[str] = [] if expression_habits.strip(): extra_sections.append(expression_habits.strip()) - if jargon_explanation.strip(): - extra_sections.append(jargon_explanation.strip()) user_sections = [ f"当前时间:{current_time}", @@ -198,7 +198,6 @@ class MaisakaReplyGenerator: log_reply: bool = True, chat_history: Optional[List[SessionMessage]] = None, expression_habits: str = "", - jargon_explanation: str = "", selected_expression_ids: Optional[List[int]] = None, ) -> Tuple[bool, ReplyGenerationResult]: """结合上下文生成 Maisaka 的最终可见回复。""" @@ -223,20 +222,20 @@ class MaisakaReplyGenerator: f"Maisaka replyer start: stream_id={stream_id} reply_reason={reply_reason!r} " f"history_size={len(chat_history)} target_message_id=" f"{reply_message.message_id if reply_message else None} " - f"expression_count={len(result.selected_expression_ids)} " - f"jargon_enabled={bool(jargon_explanation.strip())}" + f"expression_count={len(result.selected_expression_ids)}" ) filtered_history = [ message for message in chat_history - if get_message_role(message) != "system" and get_message_kind(message) != "perception" + if get_message_role(message) != "system" + and get_message_kind(message) != "perception" + and get_message_source(message) != "user_reference" ] prompt = self._build_prompt( chat_history=filtered_history, reply_reason=reply_reason or "", expression_habits=expression_habits, - jargon_explanation=jargon_explanation, ) result.completion.request_prompt = prompt diff --git a/src/chat/utils/statistic.py b/src/chat/utils/statistic.py index 51e5e643..25add4bf 100644 --- a/src/chat/utils/statistic.py +++ b/src/chat/utils/statistic.py @@ -12,7 +12,7 @@ from sqlmodel import col, select from src.common.logger import get_logger from src.common.database.database import get_db_session -from src.common.database.database_model import OnlineTime, ModelUsage, Messages, ActionRecord +from src.common.database.database_model import Messages, ModelUsage, OnlineTime, ToolRecord from src.manager.async_task_manager import AsyncTask from src.manager.local_store_manager import local_storage from src.config.config import global_config @@ -648,7 +648,7 @@ class StatisticOutputTask(AsyncTask): def _collect_message_count_for_period( self, collect_period: list[tuple[str, datetime]], - ) -> StatPeriodMapping: + ) -> dict[str, dict[str, object]]: """ 收集指定时间段的消息统计数据 @@ -659,8 +659,13 @@ class StatisticOutputTask(AsyncTask): collect_period.sort(key=lambda x: x[1], reverse=True) - stats: StatPeriodMapping = { - period_key: StatisticOutputTask._build_stat_period_data() for period_key, _ in collect_period + stats: dict[str, dict[str, object]] = { + period_key: { + TOTAL_MSG_CNT: 0, + MSG_CNT_BY_CHAT: defaultdict(int), + TOTAL_REPLY_CNT: 0, + } + for period_key, _ in collect_period } query_start_timestamp = collect_period[-1][1] @@ -710,24 +715,24 @@ class StatisticOutputTask(AsyncTask): StatisticOutputTask._add_defaultdict_int(stats[period_key], MSG_CNT_BY_CHAT, chat_id, 1) break - # 使用 ActionRecords 中的 reply 动作次数作为回复数基准 + # 使用 ToolRecord 中的 reply 工具次数作为回复数基准 try: - action_query_start_timestamp = collect_period[-1][1] + tool_query_start_timestamp = collect_period[-1][1] with get_db_session(auto_commit=False) as session: - statement = select(ActionRecord).where(col(ActionRecord.timestamp) >= action_query_start_timestamp) - actions = session.exec(statement).all() - for action in actions: - if action.action_name != "reply": + statement = select(ToolRecord).where(col(ToolRecord.timestamp) >= tool_query_start_timestamp) + tool_records = session.exec(statement).all() + for tool_record in tool_records: + if tool_record.tool_name != "reply": continue - action_time_ts = action.timestamp.timestamp() + action_time_ts = tool_record.timestamp.timestamp() for idx, (_, period_start_dt) in enumerate(collect_period): if action_time_ts >= period_start_dt.timestamp(): for period_key, _ in collect_period[idx:]: StatisticOutputTask._add_int_stat(stats[period_key], TOTAL_REPLY_CNT, 1) break except Exception as e: - logger.warning(f"统计 reply 动作次数失败,将回复数视为 0,错误信息:{e}") + logger.warning(f"统计 reply 工具次数失败,将回复数视为 0,错误信息:{e}") return stats diff --git a/src/common/data_models/message_component_data_model.py b/src/common/data_models/message_component_data_model.py index 995e54ce..d766cfcf 100644 --- a/src/common/data_models/message_component_data_model.py +++ b/src/common/data_models/message_component_data_model.py @@ -348,17 +348,11 @@ class MessageSequence: if isinstance(item, TextComponent): return {"type": "text", "data": item.text} elif isinstance(item, ImageComponent): - if not item.content: - raise RuntimeError("ImageComponent content 未初始化") - return {"type": "image", "data": item.content, "hash": item.binary_hash} + return {"type": "image", "data": self._ensure_binary_component_content(item, "[图片]"), "hash": item.binary_hash} elif isinstance(item, EmojiComponent): - if not item.content: - raise RuntimeError("EmojiComponent content 未初始化") - return {"type": "emoji", "data": item.content, "hash": item.binary_hash} + return {"type": "emoji", "data": self._ensure_binary_component_content(item, "[表情包]"), "hash": item.binary_hash} elif isinstance(item, VoiceComponent): - if not item.content: - raise RuntimeError("VoiceComponent content 未初始化") - return {"type": "voice", "data": item.content, "hash": item.binary_hash} + return {"type": "voice", "data": self._ensure_binary_component_content(item, "[语音消息]"), "hash": item.binary_hash} elif isinstance(item, AtComponent): return { "type": "at", @@ -388,6 +382,14 @@ class MessageSequence: logger.warning(f"Unofficial component type: {type(item)}, defaulting to DictComponent") return {"type": "dict", "data": item.data} + @staticmethod + def _ensure_binary_component_content(item: ByteComponent, fallback_text: str) -> str: + """确保二进制组件在序列化时带有稳定的文本占位。""" + if item.content: + return item.content + item.content = fallback_text + return item.content + @classmethod def _dict_2_item(cls, item: Dict[str, Any]) -> StandardMessageComponents: """内部方法:将单个消息组件的字典格式转换回组件对象""" diff --git a/src/common/data_models/tool_record_data_model.py b/src/common/data_models/tool_record_data_model.py new file mode 100644 index 00000000..90b594d5 --- /dev/null +++ b/src/common/data_models/tool_record_data_model.py @@ -0,0 +1,59 @@ +from datetime import datetime +from typing import Dict, Optional + +import json + +from src.common.database.database_model import ToolRecord + +from . import BaseDatabaseDataModel + + +class MaiToolRecord(BaseDatabaseDataModel[ToolRecord]): + """工具调用记录数据模型。""" + + def __init__( + self, + tool_id: str, + timestamp: datetime, + session_id: str, + tool_name: str, + tool_reasoning: Optional[str] = None, + tool_data: Optional[Dict] = None, + tool_builtin_prompt: Optional[str] = None, + tool_display_prompt: Optional[str] = None, + ): + self.tool_id = tool_id + self.timestamp = timestamp + self.session_id = session_id + self.tool_name = tool_name + self.tool_reasoning = tool_reasoning + self.tool_data = tool_data or {} + self.tool_builtin_prompt = tool_builtin_prompt + self.tool_display_prompt = tool_display_prompt + + @classmethod + def from_db_instance(cls, db_record: ToolRecord): + """从数据库实例创建数据模型对象。""" + return cls( + tool_id=db_record.tool_id, + timestamp=db_record.timestamp, + session_id=db_record.session_id, + tool_name=db_record.tool_name, + tool_reasoning=db_record.tool_reasoning, + tool_data=json.loads(db_record.tool_data) if db_record.tool_data else None, + tool_builtin_prompt=db_record.tool_builtin_prompt, + tool_display_prompt=db_record.tool_display_prompt, + ) + + def to_db_instance(self): + """将数据模型对象转换为数据库实例。""" + return ToolRecord( + tool_id=self.tool_id, + timestamp=self.timestamp, + session_id=self.session_id, + tool_name=self.tool_name, + tool_reasoning=self.tool_reasoning, + tool_data=json.dumps(self.tool_data) if self.tool_data else None, + tool_builtin_prompt=self.tool_builtin_prompt, + tool_display_prompt=self.tool_display_prompt, + ) diff --git a/src/common/database/database.py b/src/common/database/database.py index e88be9ec..293b47d2 100644 --- a/src/common/database/database.py +++ b/src/common/database/database.py @@ -3,7 +3,7 @@ from contextlib import contextmanager from pathlib import Path from typing import Generator, TYPE_CHECKING -from sqlalchemy import event +from sqlalchemy import event, text from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker from sqlmodel import SQLModel, Session, create_engine @@ -57,6 +57,41 @@ SessionLocal = sessionmaker( _db_initialized = False +def _migrate_action_records_to_tool_records() -> None: + """将旧的 ``action_records`` 历史数据迁移到 ``tool_records``。""" + migration_sql = text( + """ + INSERT INTO tool_records ( + tool_id, + timestamp, + session_id, + tool_name, + tool_reasoning, + tool_data, + tool_builtin_prompt, + tool_display_prompt + ) + SELECT + action_id, + timestamp, + session_id, + action_name, + action_reasoning, + action_data, + action_builtin_prompt, + action_display_prompt + FROM action_records + WHERE NOT EXISTS ( + SELECT 1 + FROM tool_records + WHERE tool_records.tool_id = action_records.action_id + ) + """ + ) + with engine.begin() as connection: + connection.execute(migration_sql) + + def initialize_database() -> None: global _db_initialized if _db_initialized: @@ -65,6 +100,7 @@ def initialize_database() -> None: import src.common.database.database_model # noqa: F401 SQLModel.metadata.create_all(engine) + _migrate_action_records_to_tool_records() _db_initialized = True diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py index 5b274c43..33932909 100644 --- a/src/common/database/database_model.py +++ b/src/common/database/database_model.py @@ -134,6 +134,27 @@ class ActionRecord(SQLModel, table=True): action_display_prompt: Optional[str] = Field(default=None) # 最终输入到Prompt的内容 +class ToolRecord(SQLModel, table=True): + """存储工具调用记录""" + + __tablename__ = "tool_records" # type: ignore + + id: Optional[int] = Field(default=None, primary_key=True) # 自增主键 + + # 元信息 + tool_id: str = Field(index=True, max_length=255) # 工具调用ID + timestamp: datetime = Field(default_factory=datetime.now, sa_column=Column(DateTime, index=True)) # 记录时间戳 + session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id + + # 调用信息 + tool_name: str = Field(index=True, max_length=255) # 工具名称 + tool_reasoning: Optional[str] = Field(default=None) # 工具调用推理过程 + tool_data: Optional[str] = Field(default=None) # 工具数据,JSON格式存储 + + tool_builtin_prompt: Optional[str] = Field(default=None) # 内置工具提示 + tool_display_prompt: Optional[str] = Field(default=None) # 最终输入到 Prompt 的内容 + + class CommandRecord(SQLModel, table=True): """记录命令执行情况""" diff --git a/src/common/utils/utils_action.py b/src/common/utils/utils_action.py index c1fe7c28..382957c8 100644 --- a/src/common/utils/utils_action.py +++ b/src/common/utils/utils_action.py @@ -3,12 +3,12 @@ from typing import TYPE_CHECKING, List from src.common.utils.math_utils import translate_timestamp_to_human_readable, TimestampMode if TYPE_CHECKING: - from src.common.data_models.action_record_data_model import MaiActionRecord + from src.common.data_models.tool_record_data_model import MaiToolRecord class ActionUtils: @staticmethod - def build_readable_action_records(action_records: List["MaiActionRecord"], timestamp_mode: str | TimestampMode): + def build_readable_action_records(action_records: List["MaiToolRecord"], timestamp_mode: str | TimestampMode): """ 将动作列表转换为可读的文本格式。 @@ -27,6 +27,6 @@ class ActionUtils: output_lines = [] for record in action_records: timestamp_str = translate_timestamp_to_human_readable(record.timestamp.timestamp(), mode=timestamp_mode) - line = f"在{timestamp_str},你使用了{record.action_name},具体内容是:{record.action_display_prompt}" + line = f"在{timestamp_str},你使用了{record.tool_name},具体内容是:{record.tool_display_prompt}" output_lines.append(line) return "\n".join(output_lines) diff --git a/src/common/utils/utils_message.py b/src/common/utils/utils_message.py index 6b3b5f4e..e1db1d29 100644 --- a/src/common/utils/utils_message.py +++ b/src/common/utils/utils_message.py @@ -579,26 +579,26 @@ class MessageUtils: List[Tuple[float, str]]: 按时间排序的动作文本列表,每个元素为 (timestamp, action_text) """ from src.common.database.database import get_db_session - from src.common.database.database_model import ActionRecord + from src.common.database.database_model import ToolRecord # 获取这个时间范围内的动作记录,并匹配session_id try: with get_db_session() as session: actions_in_range = session.exec( - select(ActionRecord) - .where(col(ActionRecord.timestamp) >= datetime.fromtimestamp(min_time)) - .where(col(ActionRecord.timestamp) <= datetime.fromtimestamp(max_time)) - .where(col(ActionRecord.session_id) == session_id) - .order_by(col(ActionRecord.timestamp)) + select(ToolRecord) + .where(col(ToolRecord.timestamp) >= datetime.fromtimestamp(min_time)) + .where(col(ToolRecord.timestamp) <= datetime.fromtimestamp(max_time)) + .where(col(ToolRecord.session_id) == session_id) + .order_by(col(ToolRecord.timestamp)) ).all() # 获取最新消息之后的第一个动作记录 with get_db_session() as session: action_after_latest = session.exec( - select(ActionRecord) - .where(col(ActionRecord.timestamp) > datetime.fromtimestamp(max_time)) - .where(col(ActionRecord.session_id) == session_id) - .order_by(col(ActionRecord.timestamp)) + select(ToolRecord) + .where(col(ToolRecord.timestamp) > datetime.fromtimestamp(max_time)) + .where(col(ToolRecord.session_id) == session_id) + .order_by(col(ToolRecord.timestamp)) .limit(1) ).all() except Exception as e: @@ -611,7 +611,7 @@ class MessageUtils: # 构建动作文本列表 action_messages: List[Tuple[float, str]] = [] for action in actions: - if action_display_prompt := action.action_display_prompt or "": + if action_display_prompt := action.tool_display_prompt or "": action_time = action.timestamp.timestamp() action_messages.append((action_time, action_display_prompt)) diff --git a/src/config/config.py b/src/config/config.py index 6cc73331..44730ab9 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -56,8 +56,8 @@ CONFIG_DIR: Path = PROJECT_ROOT / "config" BOT_CONFIG_PATH: Path = (CONFIG_DIR / "bot_config.toml").resolve().absolute() MODEL_CONFIG_PATH: Path = (CONFIG_DIR / "model_config.toml").resolve().absolute() MMC_VERSION: str = "1.0.0" -CONFIG_VERSION: str = "8.1.10" -MODEL_CONFIG_VERSION: str = "1.12.0" +CONFIG_VERSION: str = "8.1.11" +MODEL_CONFIG_VERSION: str = "1.13.1" logger = get_logger("config") diff --git a/src/config/model_configs.py b/src/config/model_configs.py index 3f0feb54..a501be66 100644 --- a/src/config/model_configs.py +++ b/src/config/model_configs.py @@ -402,6 +402,15 @@ class ModelTaskConfig(ConfigBase): }, ) """首要回复模型配置, 还用于表达器和表达方式学习""" + + planner: TaskConfig = Field( + default_factory=TaskConfig, + json_schema_extra={ + "x-widget": "custom", + "x-icon": "map", + }, + ) + """规划模型配置""" vlm: TaskConfig = Field( default_factory=TaskConfig, @@ -421,24 +430,6 @@ class ModelTaskConfig(ConfigBase): ) """语音识别模型配置""" - tool_use: TaskConfig = Field( - default_factory=TaskConfig, - json_schema_extra={ - "x-widget": "custom", - "x-icon": "tools", - }, - ) - """工具使用模型配置, 需要使用支持工具调用的模型""" - - planner: TaskConfig = Field( - default_factory=TaskConfig, - json_schema_extra={ - "x-widget": "custom", - "x-icon": "map", - }, - ) - """规划模型配置""" - embedding: TaskConfig = Field( default_factory=TaskConfig, json_schema_extra={ @@ -446,22 +437,4 @@ class ModelTaskConfig(ConfigBase): "x-icon": "database", }, ) - """嵌入模型配置""" - - lpmm_entity_extract: TaskConfig = Field( - default_factory=TaskConfig, - json_schema_extra={ - "x-widget": "custom", - "x-icon": "filter", - }, - ) - """LPMM实体提取模型配置""" - - lpmm_rdf_build: TaskConfig = Field( - default_factory=TaskConfig, - json_schema_extra={ - "x-widget": "custom", - "x-icon": "network", - }, - ) - """LPMM RDF构建模型配置""" + """嵌入模型配置""" \ No newline at end of file diff --git a/src/config/official_configs.py b/src/config/official_configs.py index ea20f57d..e4e96bf5 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -234,17 +234,6 @@ class ChatConfig(ConfigBase): ) """上下文长度""" - planner_smooth: float = Field( - default=3, - ge=0, - json_schema_extra={ - "x-widget": "slider", - "x-icon": "gauge", - "step": 0.5, - }, - ) - """规划器平滑,增大数值会减小planner负荷,略微降低反应速度,推荐1-5,0为关闭,必须大于等于0""" - think_mode: Literal["classic", "deep", "dynamic"] = Field( default="dynamic", json_schema_extra={ @@ -659,21 +648,6 @@ class ExpressionConfig(ConfigBase): ) """是否在回复前尝试对上下文中的黑话进行解释(关闭可减少一次LLM调用,仅影响回复前的黑话匹配与解释,不影响黑话学习)""" - jargon_mode: Literal["context", "planner"] = Field( - default="planner", - json_schema_extra={ - "x-widget": "select", - "x-icon": "settings", - }, - ) - """ - 黑话解释来源模式 - - 可选: - - "context":使用上下文自动匹配黑话 - - "planner":仅使用Planner在reply动作中给出的unknown_words列表 - """ - class ToolConfig(ConfigBase): """工具配置类""" @@ -1544,7 +1518,7 @@ class MaiSakaConfig(ConfigBase): "x-icon": "brain", }, ) - """鏄惁鍦?CLI 涓樉绀哄唴蹇冩€濊€冨拰瀹屾暣 Prompt""" + """是否显示MaiSaka思考过程""" user_name: str = Field( default="用户", @@ -1553,7 +1527,7 @@ class MaiSakaConfig(ConfigBase): "x-icon": "user", }, ) - """MaiSaka 涓敤鎴风殑鏄剧ず鍚嶇О""" + """MaiSaka 使用的用户名称""" direct_image_input: bool = Field( default=True, @@ -1562,7 +1536,7 @@ class MaiSakaConfig(ConfigBase): "x-icon": "image", }, ) - """是否将图片直接作为多模态消息传入 Maisaka 主循环,而不是仅使用转译文本""" + """是否直接输入图片""" merge_user_messages: bool = Field( default=True, @@ -1571,7 +1545,7 @@ class MaiSakaConfig(ConfigBase): "x-icon": "merge", }, ) - """Whether Maisaka should merge newly received user utterances into a single user message per round""" + """是否将新接收的用户发言合并为单个用户消息""" max_internal_rounds: int = Field( default=6, @@ -1581,7 +1555,7 @@ class MaiSakaConfig(ConfigBase): "x-icon": "repeat", }, ) - """Maximum number of internal planning rounds per inbound message.""" + """每个入站消息的最大内部规划轮数""" terminal_image_preview: bool = Field( default=False, @@ -1590,7 +1564,7 @@ class MaiSakaConfig(ConfigBase): "x-icon": "image", }, ) - """Whether Maisaka should render a low-resolution terminal preview for images in prompt display""" + """是否渲染低分辨率终端预览图片""" terminal_image_preview_width: int = Field( default=24, @@ -1600,16 +1574,8 @@ class MaiSakaConfig(ConfigBase): "x-icon": "columns", }, ) - """Character width for Maisaka terminal image previews""" + """Maisaka终端图片预览的字符宽度""" - take_over_hfc: bool = Field( - default=False, - json_schema_extra={ - "x-widget": "switch", - "x-icon": "git-branch", - }, - ) - """Enable Maisaka takeover for the Heart Flow Chat planner and reply pipeline""" class PluginRuntimeConfig(ConfigBase): """插件运行时配置类""" diff --git a/src/learners/expression_auto_check_task.py b/src/learners/expression_auto_check_task.py index 311d69e8..44141118 100644 --- a/src/learners/expression_auto_check_task.py +++ b/src/learners/expression_auto_check_task.py @@ -76,7 +76,7 @@ def create_evaluation_prompt(situation: str, style: str) -> str: return prompt -judge_llm = LLMServiceClient(task_name="tool_use", request_type="expression_check") +judge_llm = LLMServiceClient(task_name="utils", request_type="expression_check") async def single_expression_check(situation: str, style: str) -> tuple[bool, str, str | None]: diff --git a/src/learners/expression_learner.py b/src/learners/expression_learner.py index 34d2cb8b..579fb5ea 100644 --- a/src/learners/expression_learner.py +++ b/src/learners/expression_learner.py @@ -30,8 +30,8 @@ logger = get_logger("expressor") express_learn_model = LLMServiceClient( task_name="utils", request_type="expression.learner" ) -summary_model = LLMServiceClient(task_name="tool_use", request_type="expression.summary") -check_model = LLMServiceClient(task_name="tool_use", request_type="expression.check") +summary_model = LLMServiceClient(task_name="utils", request_type="expression.summary") +check_model = LLMServiceClient(task_name="utils", request_type="expression.check") class ExpressionLearner: diff --git a/src/learners/expression_selector.py b/src/learners/expression_selector.py index 7fc714ea..30e2f154 100644 --- a/src/learners/expression_selector.py +++ b/src/learners/expression_selector.py @@ -19,7 +19,7 @@ logger = get_logger("expression_selector") class ExpressionSelector: def __init__(self): self.llm_model = LLMServiceClient( - task_name="tool_use", request_type="expression.selector" + task_name="utils", request_type="expression.selector" ) def can_use_expression_for_chat(self, chat_id: str) -> bool: diff --git a/src/learners/expression_utils.py b/src/learners/expression_utils.py index 573ce364..23c41c39 100644 --- a/src/learners/expression_utils.py +++ b/src/learners/expression_utils.py @@ -12,7 +12,7 @@ from src.common.logger import get_logger logger = get_logger("expression_utils") -judge_llm = LLMServiceClient(task_name="tool_use", request_type="expression_check") +judge_llm = LLMServiceClient(task_name="utils", request_type="expression_check") def _normalize_repair_json_result(repaired_result: Any) -> str: diff --git a/src/learners/jargon_explainer_old.py b/src/learners/jargon_explainer_old.py index fded9019..876b4539 100644 --- a/src/learners/jargon_explainer_old.py +++ b/src/learners/jargon_explainer_old.py @@ -25,7 +25,7 @@ class JargonExplainer: def __init__(self, chat_id: str) -> None: self.chat_id = chat_id self.llm = LLMServiceClient( - task_name="tool_use", + task_name="utils", request_type="jargon.explain", ) diff --git a/src/maisaka/reasoning_engine.py b/src/maisaka/reasoning_engine.py index b329170f..08409e74 100644 --- a/src/maisaka/reasoning_engine.py +++ b/src/maisaka/reasoning_engine.py @@ -1,13 +1,14 @@ """Maisaka 推理引擎。""" -import difflib -import json -import asyncio -import re -import time from datetime import datetime from typing import TYPE_CHECKING, Optional +import asyncio +import difflib +import json +import re +import time + from sqlmodel import select from src.chat.heart_flow.heartFC_utils import CycleDetail @@ -21,13 +22,14 @@ from src.common.logger import get_logger from src.config.config import global_config from src.learners.jargon_explainer import search_jargon from src.llm_models.payload_content.tool_option import ToolCall -from src.services import send_service +from src.services import database_service as database_api, send_service from .message_adapter import ( build_message, build_visible_text_from_sequence, clone_message_sequence, format_speaker_content, + get_message_source, get_message_text, get_message_role, ) @@ -69,6 +71,8 @@ class MaisakaReasoningEngine: cycle_detail = self._start_cycle() self._runtime._log_cycle_started(cycle_detail, round_index) try: + # 每次LLM生成前,动态添加参考消息到最新位置 + self._append_jargon_reference_message() planner_started_at = time.time() response = await self._runtime._chat_loop_service.chat_loop_step(self._runtime._chat_history) cycle_detail.time_records["planner"] = time.time() - planner_started_at @@ -134,10 +138,7 @@ class MaisakaReasoningEngine: raw_message=user_sequence, display_text=visible_text, ) - insert_index = self._insert_chat_history_message(history_message) - reference_message = await self._build_jargon_reference_message(message) - if reference_message is not None: - self._runtime._chat_history.insert(insert_index + 1, reference_message) + self._insert_chat_history_message(history_message) self._trim_chat_history() async def _build_message_sequence(self, message: SessionMessage) -> tuple[MessageSequence, str]: @@ -217,65 +218,84 @@ class MaisakaReasoningEngine: self._runtime._chat_history.insert(insert_at, message) return insert_at - async def _build_jargon_reference_message(self, message: SessionMessage) -> Optional[SessionMessage]: - """如果命中了黑话词条,则构建一条额外的参考信息消息。""" - content = (get_message_text(message) or "").strip() + def _append_jargon_reference_message(self) -> None: + """每次LLM生成前,如果命中了黑话词条,则添加一条参考信息消息到聊天历史末尾。""" + content = self._build_user_history_corpus() if not content: - if not message.processed_plain_text: - await message.process() - content = (message.processed_plain_text or "").strip() - if not content: - return None + return matched_words = self._find_jargon_words_in_text(content) if not matched_words: - return None + return reference_text = ( "[参考信息]\n" f"{','.join(matched_words)}可能是jargon,可以使用query_jargon来查看其含义" ) reference_sequence = MessageSequence([TextComponent(reference_text)]) - return build_message( + + # 使用当前时间作为时间戳 + reference_message = build_message( role="user", content="", source="user_reference", - timestamp=message.timestamp, - platform=message.platform, + timestamp=datetime.now(), + platform=self._runtime.chat_stream.platform, session_id=self._runtime.session_id, - group_info=self._runtime._build_group_info(message), + group_info=self._runtime._build_group_info(), user_info=self._runtime._build_runtime_user_info(), raw_message=reference_sequence, display_text=reference_text, ) + self._runtime._chat_history.append(reference_message) + + def _build_user_history_corpus(self) -> str: + """拼接当前聊天记录内所有用户消息的正文,用于统一匹配黑话。""" + parts: list[str] = [] + for history_message in self._runtime._chat_history: + if get_message_role(history_message) != "user": + continue + if get_message_source(history_message) != "user": + continue + text = (get_message_text(history_message) or "").strip() + if not text: + continue + parts.append(text) + + return "\n".join(parts) def _find_jargon_words_in_text(self, content: str) -> list[str]: """匹配正文中出现的 jargon 词条。""" lowered_content = content.lower() - matches: list[str] = [] + matched_entries: list[tuple[int, int, int, str]] = [] seen_words: set[str] = set() with get_db_session(auto_commit=False) as session: - query = select(Jargon).where(Jargon.is_jargon.is_(True)).order_by(Jargon.count.desc()).limit(200) # type: ignore[attr-defined] + query = ( + select(Jargon) + .where(Jargon.is_jargon.is_(True)) + .order_by(Jargon.count.desc()) # type: ignore[attr-defined] + ) jargons = session.exec(query).all() for jargon in jargons: jargon_content = str(jargon.content or "").strip() if not jargon_content: continue - if jargon_content in seen_words: + normalized_content = jargon_content.lower() + if normalized_content in seen_words: continue if not self._is_visible_jargon(jargon): continue - if not self._jargon_matches_text(jargon_content, lowered_content, content): + match_position = self._get_jargon_match_position(jargon_content, lowered_content, content) + if match_position is None: continue - seen_words.add(jargon_content) - matches.append(jargon_content) - if len(matches) >= 8: - break + seen_words.add(normalized_content) + matched_entries.append((match_position, -len(jargon_content), -int(jargon.count or 0), jargon_content)) - return matches + matched_entries.sort() + return [matched_content for _, _, _, matched_content in matched_entries[:8]] def _is_visible_jargon(self, jargon: Jargon) -> bool: """判断当前会话是否可见该 jargon。""" @@ -290,13 +310,17 @@ class MaisakaReasoningEngine: return self._runtime.session_id in session_id_dict @staticmethod - def _jargon_matches_text(jargon_content: str, lowered_content: str, original_content: str) -> bool: - """判断词条是否命中消息正文。""" + def _get_jargon_match_position(jargon_content: str, lowered_content: str, original_content: str) -> Optional[int]: + """返回 jargon 在文本中的首次命中位置,未命中时返回 `None`。""" if re.search(r"[\u4e00-\u9fff]", jargon_content): - return jargon_content in original_content + match_index = original_content.lower().find(jargon_content.lower()) + return match_index if match_index >= 0 else None pattern = rf"\b{re.escape(jargon_content.lower())}\b" - return re.search(pattern, lowered_content) is not None + match = re.search(pattern, lowered_content) + if match is None: + return None + return match.start() def _start_cycle(self) -> CycleDetail: """开始一轮 Maisaka 思考循环。""" @@ -559,7 +583,6 @@ class MaisakaReasoningEngine: chat_history=self._runtime._chat_history, reply_message=target_message, reply_reason=latest_thought, - unknown_words=unknown_words, ) except Exception: logger.exception( @@ -587,7 +610,6 @@ class MaisakaReasoningEngine: unknown_words=unknown_words, log_reply=False, expression_habits=reply_context.expression_habits, - jargon_explanation=reply_context.jargon_explanation, selected_expression_ids=reply_context.selected_expression_ids, ) except Exception: @@ -645,6 +667,25 @@ class MaisakaReasoningEngine: if not sent: return False + target_user_info = target_message.message_info.user_info + target_user_name = ( + target_user_info.user_cardname + or target_user_info.user_nickname + or target_user_info.user_id + ) + if self._runtime.chat_stream is not None: + await database_api.store_tool_info( + chat_stream=self._runtime.chat_stream, + display_prompt=f"你对{target_user_name}进行了回复:{reply_text}", + tool_data={ + "msg_id": target_message_id, + "quote": quote_reply, + "reply_text": reply_text, + }, + tool_name="reply", + tool_reasoning=latest_thought, + ) + bot_name = global_config.bot.nickname.strip() or "MaiSaka" self._runtime._chat_history.append( build_message( diff --git a/src/maisaka/reply_context_builder.py b/src/maisaka/reply_context_builder.py index f619216a..c9b2487f 100644 --- a/src/maisaka/reply_context_builder.py +++ b/src/maisaka/reply_context_builder.py @@ -13,9 +13,8 @@ from src.common.database.database import get_db_session from src.common.database.database_model import Expression, Jargon from src.common.logger import get_logger from src.config.config import global_config -from src.learners.jargon_explainer import search_jargon -from .message_adapter import get_message_text, parse_speaker_content +from .message_adapter import get_message_role, get_message_source, get_message_text, parse_speaker_content logger = get_logger("maisaka_reply_context") @@ -40,6 +39,7 @@ class _ExpressionRecord: class _JargonRecord: jargon_id: Optional[int] content: str + count: int meaning: str session_id_dict: str is_global: bool @@ -56,7 +56,6 @@ class MaisakaReplyContextBuilder: chat_history: List[SessionMessage], reply_message: Optional[SessionMessage], reply_reason: str, - unknown_words: Optional[List[str]] = None, ) -> ReplyContextBuildResult: """构建 reply 前置上下文。""" expression_habits, selected_expression_ids = self._build_expression_habits( @@ -67,7 +66,6 @@ class MaisakaReplyContextBuilder: jargon_explanation = self._build_jargon_explanation( chat_history=chat_history, reply_message=reply_message, - unknown_words=unknown_words, ) return ReplyContextBuildResult( expression_habits=expression_habits, @@ -129,56 +127,13 @@ class MaisakaReplyContextBuilder: self, chat_history: List[SessionMessage], reply_message: Optional[SessionMessage], - unknown_words: Optional[List[str]], ) -> str: """查询并格式化黑话解释。""" if not global_config.expression.enable_jargon_explanation: return "" - if global_config.expression.jargon_mode == "planner": - return self._build_planner_jargon_explanation(unknown_words or []) - return self._build_context_jargon_explanation(chat_history, reply_message) - def _build_planner_jargon_explanation(self, unknown_words: List[str]) -> str: - """基于 planner 传入的 unknown_words 构建黑话解释。""" - normalized_words: List[str] = [] - seen_words: set[str] = set() - for raw_word in unknown_words: - word = str(raw_word or "").strip() - if not word: - continue - lowered = word.lower() - if lowered in seen_words: - continue - seen_words.add(lowered) - normalized_words.append(word) - - if not normalized_words: - return "" - - lines: List[str] = [] - seen_entries: set[str] = set() - for word in normalized_words: - matches = search_jargon(word, chat_id=self._session_id, limit=3, fuzzy=False) - if not matches: - matches = search_jargon(word, chat_id=self._session_id, limit=3, fuzzy=True) - for match in matches: - content = str(match.get("content") or "").strip() - meaning = str(match.get("meaning") or "").strip() - if not content or not meaning: - continue - entry_key = f"{content}\n{meaning}" - if entry_key in seen_entries: - continue - seen_entries.add(entry_key) - lines.append(f"- {content}: {meaning}") - - if not lines: - return "" - - return "【黑话解释】\n" + "\n".join(lines[:8]) - def _build_context_jargon_explanation( self, chat_history: List[SessionMessage], @@ -190,22 +145,25 @@ class MaisakaReplyContextBuilder: return "" jargon_records = self._load_jargon_records() - lines: List[str] = [] + matched_records: List[tuple[int, int, int, _JargonRecord]] = [] seen_contents: set[str] = set() for jargon in jargon_records: if not jargon.content or not jargon.meaning: continue - if jargon.content in seen_contents: + normalized_content = jargon.content.lower() + if normalized_content in seen_contents: continue if not self._is_visible_jargon(jargon): continue - if not self._is_jargon_in_corpus(jargon.content, corpus): + match_position = self._get_jargon_match_position(jargon.content, corpus) + if match_position is None: continue - seen_contents.add(jargon.content) - lines.append(f"- {jargon.content}: {jargon.meaning}") - if len(lines) >= 8: - break + seen_contents.add(normalized_content) + matched_records.append((match_position, -len(jargon.content), -jargon.count, jargon)) + + matched_records.sort() + lines = [f"- {jargon.content}: {jargon.meaning}" for _, _, _, jargon in matched_records[:8]] if not lines: return "" @@ -219,13 +177,14 @@ class MaisakaReplyContextBuilder: def _load_jargon_records(self) -> List[_JargonRecord]: """在 session 内提取黑话的静态数据,避免 detached ORM 对象。""" with get_db_session(auto_commit=False) as session: - query = select(Jargon).where(Jargon.meaning != "") # type: ignore[attr-defined] - query = query.order_by(Jargon.count.desc()).limit(200) # type: ignore[attr-defined] + query = select(Jargon).where(Jargon.is_jargon.is_(True), Jargon.meaning != "") # type: ignore[attr-defined] + query = query.order_by(Jargon.count.desc()) # type: ignore[attr-defined] jargons = session.exec(query).all() return [ _JargonRecord( jargon_id=jargon.id, content=(jargon.content or "").strip(), + count=int(jargon.count or 0), meaning=(jargon.meaning or "").strip(), session_id_dict=jargon.session_id_dict or "{}", is_global=bool(jargon.is_global), @@ -238,20 +197,26 @@ class MaisakaReplyContextBuilder: chat_history: List[SessionMessage], reply_message: Optional[SessionMessage], ) -> str: - """将最近上下文拼成待匹配文本。""" + """将当前聊天记录内所有用户消息拼成待匹配文本。""" parts: List[str] = [] - for message in chat_history[-20:]: + for message in chat_history: + if get_message_role(message) != "user": + continue + if get_message_source(message) != "user": + continue text = get_message_text(message).strip() if not text: continue _, body = parse_speaker_content(text) parts.append(body.strip() or text) - if reply_message is not None: + if reply_message is not None and get_message_source(reply_message) == "user": reply_text = get_message_text(reply_message).strip() if reply_text: _, body = parse_speaker_content(reply_text) - parts.append(body.strip() or reply_text) + normalized_reply_text = body.strip() or reply_text + if normalized_reply_text not in parts: + parts.append(normalized_reply_text) return "\n".join(parts) @@ -268,10 +233,16 @@ class MaisakaReplyContextBuilder: return self._session_id in session_id_dict @staticmethod - def _is_jargon_in_corpus(content: str, corpus: str) -> bool: - """判断黑话词条是否出现在上下文中。""" + def _get_jargon_match_position(content: str, corpus: str) -> Optional[int]: + """返回 jargon 在上下文中的首次命中位置,未命中时返回 `None`。""" if re.search(r"[\u4e00-\u9fff]", content): - return re.search(re.escape(content), corpus, flags=re.IGNORECASE) is not None + match = re.search(re.escape(content), corpus, flags=re.IGNORECASE) + if match is None: + return None + return match.start() pattern = rf"\b{re.escape(content)}\b" - return re.search(pattern, corpus, flags=re.IGNORECASE) is not None + match = re.search(pattern, corpus, flags=re.IGNORECASE) + if match is None: + return None + return match.start() diff --git a/src/memory_system/memory_retrieval.py b/src/memory_system/memory_retrieval.py index 41851408..5bc6a3a1 100644 --- a/src/memory_system/memory_retrieval.py +++ b/src/memory_system/memory_retrieval.py @@ -271,7 +271,7 @@ async def _react_agent_solve_question( message_factory_fn: Callable[..., List[Message]] = _build_messages # pyright: ignore[reportGeneralTypeIssues] generation_result = await llm_api.generate( llm_api.LLMServiceRequest( - task_name="tool_use", + task_name="utils", request_type="memory.react", message_factory=message_factory_fn, # type: ignore[arg-type] tool_options=tool_definitions, @@ -681,7 +681,7 @@ async def _react_agent_solve_question( evaluation_result = await llm_api.generate( llm_api.LLMServiceRequest( - task_name="tool_use", + task_name="utils", request_type="memory.react.final", prompt=evaluation_prompt, tool_options=[], diff --git a/src/person_info/person_info.py b/src/person_info/person_info.py index cf8143c6..c603f4b7 100644 --- a/src/person_info/person_info.py +++ b/src/person_info/person_info.py @@ -24,7 +24,7 @@ from src.services.llm_service import LLMServiceClient logger = get_logger("person_info") relation_selection_model = LLMServiceClient( - task_name="tool_use", request_type="relation_selection" + task_name="utils", request_type="relation_selection" ) diff --git a/src/services/database_service.py b/src/services/database_service.py index 5b8b716f..7871981d 100644 --- a/src/services/database_service.py +++ b/src/services/database_service.py @@ -11,7 +11,7 @@ from sqlmodel import SQLModel from src.chat.message_receive.chat_manager import BotChatSession from src.common.database.database import get_db_session -from src.common.database.database_model import ActionRecord +from src.common.database.database_model import ToolRecord from src.common.logger import get_logger logger = get_logger("database_service") @@ -157,6 +157,39 @@ async def db_count(model_class: type[SQLModel], filters: Optional[dict[str, Any] return 0 +async def store_tool_info( + chat_stream: BotChatSession, + builtin_prompt: Optional[str] = None, + display_prompt: str = "", + tool_id: str = "", + tool_data: Optional[dict[str, Any]] = None, + tool_name: str = "", + tool_reasoning: str = "", +) -> Optional[dict[str, Any]]: + try: + record_data = { + "tool_id": tool_id or str(int(time.time() * 1000000)), + "timestamp": datetime.now(), + "session_id": chat_stream.session_id, + "tool_name": tool_name, + "tool_data": json.dumps(tool_data or {}, ensure_ascii=False), + "tool_reasoning": tool_reasoning, + "tool_builtin_prompt": builtin_prompt, + "tool_display_prompt": display_prompt, + } + + saved_record = await db_save(ToolRecord, data=record_data, key_field="tool_id", key_value=record_data["tool_id"]) + if saved_record: + logger.debug(f"[DatabaseService] 成功存储工具信息: {tool_name} (ID: {record_data['tool_id']})") + else: + logger.error(f"[DatabaseService] 存储工具信息失败: {tool_name}") + return saved_record + except Exception as e: + logger.error(f"[DatabaseService] 存储工具信息时发生错误: {e}") + traceback.print_exc() + return None + + async def store_action_info( chat_stream: BotChatSession, builtin_prompt: Optional[str] = None, @@ -166,27 +199,13 @@ async def store_action_info( action_name: str = "", action_reasoning: str = "", ) -> Optional[dict[str, Any]]: - try: - record_data = { - "action_id": thinking_id or str(int(time.time() * 1000000)), - "timestamp": datetime.now(), - "session_id": chat_stream.session_id, - "action_name": action_name, - "action_data": json.dumps(action_data or {}, ensure_ascii=False), - "action_reasoning": action_reasoning, - "action_builtin_prompt": builtin_prompt, - "action_display_prompt": display_prompt, - } - - saved_record = await db_save( - ActionRecord, data=record_data, key_field="action_id", key_value=record_data["action_id"] - ) - if saved_record: - logger.debug(f"[DatabaseService] 成功存储动作信息: {action_name} (ID: {record_data['action_id']})") - else: - logger.error(f"[DatabaseService] 存储动作信息失败: {action_name}") - return saved_record - except Exception as e: - logger.error(f"[DatabaseService] 存储动作信息时发生错误: {e}") - traceback.print_exc() - return None + """兼容旧接口,内部转发到 ``store_tool_info``。""" + return await store_tool_info( + chat_stream=chat_stream, + builtin_prompt=builtin_prompt, + display_prompt=display_prompt, + tool_id=thinking_id, + tool_data=action_data, + tool_name=action_name, + tool_reasoning=action_reasoning, + ) diff --git a/src/services/message_service.py b/src/services/message_service.py index d918b177..5291bf04 100644 --- a/src/services/message_service.py +++ b/src/services/message_service.py @@ -7,9 +7,9 @@ from typing import List, Optional, Tuple from sqlmodel import col, select from src.chat.message_receive.message import SessionMessage -from src.common.data_models.action_record_data_model import MaiActionRecord +from src.common.data_models.tool_record_data_model import MaiToolRecord from src.common.database.database import get_db_session -from src.common.database.database_model import ActionRecord, Images, ImageType +from src.common.database.database_model import Images, ImageType, ToolRecord from src.common.message_repository import count_messages, find_messages from src.common.utils.math_utils import translate_timestamp_to_human_readable from src.common.utils.utils_action import ActionUtils @@ -238,18 +238,18 @@ def get_actions_by_timestamp_with_chat( timestamp_start: float, timestamp_end: float, limit: Optional[int] = None, -) -> List[MaiActionRecord]: +) -> List[MaiToolRecord]: with get_db_session() as session: statement = ( - select(ActionRecord) - .where(col(ActionRecord.session_id) == chat_id) - .where(col(ActionRecord.timestamp) >= datetime.fromtimestamp(timestamp_start)) - .where(col(ActionRecord.timestamp) <= datetime.fromtimestamp(timestamp_end)) - .order_by(col(ActionRecord.timestamp)) + select(ToolRecord) + .where(col(ToolRecord.session_id) == chat_id) + .where(col(ToolRecord.timestamp) >= datetime.fromtimestamp(timestamp_start)) + .where(col(ToolRecord.timestamp) <= datetime.fromtimestamp(timestamp_end)) + .order_by(col(ToolRecord.timestamp)) ) if limit is not None: statement = statement.limit(limit) - return [MaiActionRecord.from_db_instance(item) for item in session.exec(statement).all()] + return [MaiToolRecord.from_db_instance(item) for item in session.exec(statement).all()] def replace_user_references(text: str, platform: str, replace_bot_name: bool = False) -> str: