From 8cb0ad3d42fc94a60940fcb27ad627c05197e6ac Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sun, 29 Mar 2026 15:06:50 +0800 Subject: [PATCH] =?UTF-8?q?ref=EF=BC=9A=E5=88=86=E7=A6=BBknow=E6=A8=A1?= =?UTF-8?q?=E5=9D=97=E5=92=8Ccli=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cli/__init__.py | 3 + src/{maisaka => cli}/console.py | 2 +- src/{maisaka => cli}/input_reader.py | 6 +- src/{maisaka/cli.py => cli/maisaka_cli.py} | 49 ++++- src/know_u/__init__.py | 3 + src/know_u/knowledge.py | 239 +++++++++++++++++++++ src/know_u/knowledge_store.py | 197 +++++++++++++++++ src/maisaka/chat_loop_service.py | 4 +- src/maisaka/knowledge.py | 58 ----- src/maisaka/knowledge_store.py | 190 ---------------- src/maisaka/runtime.py | 66 +++++- src/maisaka/tool_handlers.py | 4 +- src/mcp_module/config.py | 2 +- src/mcp_module/connection.py | 2 +- src/mcp_module/manager.py | 2 +- 15 files changed, 556 insertions(+), 271 deletions(-) create mode 100644 src/cli/__init__.py rename src/{maisaka => cli}/console.py (88%) rename src/{maisaka => cli}/input_reader.py (93%) rename src/{maisaka/cli.py => cli/maisaka_cli.py} (87%) create mode 100644 src/know_u/__init__.py create mode 100644 src/know_u/knowledge.py create mode 100644 src/know_u/knowledge_store.py delete mode 100644 src/maisaka/knowledge.py delete mode 100644 src/maisaka/knowledge_store.py diff --git a/src/cli/__init__.py b/src/cli/__init__.py new file mode 100644 index 00000000..28eb1a3b --- /dev/null +++ b/src/cli/__init__.py @@ -0,0 +1,3 @@ +""" +CLI startup and interaction package. +""" diff --git a/src/maisaka/console.py b/src/cli/console.py similarity index 88% rename from src/maisaka/console.py rename to src/cli/console.py index 5f44eda7..3dbfde36 100644 --- a/src/maisaka/console.py +++ b/src/cli/console.py @@ -1,4 +1,4 @@ -"""MaiSaka 终端输出组件。""" +"""MaiSaka terminal console helpers.""" from rich.console import Console from rich.theme import Theme diff --git a/src/maisaka/input_reader.py b/src/cli/input_reader.py similarity index 93% rename from src/maisaka/input_reader.py rename to src/cli/input_reader.py index eff2525c..f1ac6b44 100644 --- a/src/maisaka/input_reader.py +++ b/src/cli/input_reader.py @@ -1,12 +1,12 @@ """ -MaiSaka - 异步输入读取器 -将阻塞的标准输入读取放到后台线程中,供 asyncio 循环安全消费。 +MaiSaka asynchronous stdin reader for CLI interaction. """ +from typing import Optional + import asyncio import sys import threading -from typing import Optional class InputReader: diff --git a/src/maisaka/cli.py b/src/cli/maisaka_cli.py similarity index 87% rename from src/maisaka/cli.py rename to src/cli/maisaka_cli.py index 5f2f14cd..ad4d5c9a 100644 --- a/src/maisaka/cli.py +++ b/src/cli/maisaka_cli.py @@ -8,24 +8,23 @@ from typing import Optional import asyncio import os +import time from rich import box from rich.markdown import Markdown from rich.panel import Panel from rich.text import Text +from src.know_u.knowledge import KnowledgeLearner, retrieve_relevant_knowledge +from src.know_u.knowledge_store import get_knowledge_store from src.chat.message_receive.message import SessionMessage from src.chat.replyer.maisaka_generator import MaisakaReplyGenerator from src.config.config import config_manager, global_config from src.mcp_module import MCPManager -from .chat_loop_service import MaisakaChatLoopService -from .console import console -from .input_reader import InputReader -from .knowledge import retrieve_relevant_knowledge -from .knowledge_store import get_knowledge_store -from .message_adapter import build_message, format_speaker_content, remove_last_perception -from .tool_handlers import ( +from src.maisaka.chat_loop_service import MaisakaChatLoopService +from src.maisaka.message_adapter import build_message, format_speaker_content, remove_last_perception +from src.maisaka.tool_handlers import ( ToolHandlerContext, handle_mcp_tool, handle_stop, @@ -33,6 +32,9 @@ from .tool_handlers import ( handle_wait, ) +from .console import console +from .input_reader import InputReader + class BufferCLI: """Maisaka 命令行交互入口。""" @@ -43,6 +45,10 @@ class BufferCLI: self._reader = InputReader() self._chat_history: Optional[list[SessionMessage]] = None self._knowledge_store = get_knowledge_store() + self._knowledge_learner = KnowledgeLearner("maisaka_cli") + self._knowledge_min_messages_for_extraction = 10 + self._knowledge_min_extraction_interval = 30 + self._last_knowledge_extraction_time = 0.0 knowledge_stats = self._knowledge_store.get_stats() if knowledge_stats["total_items"] > 0: @@ -112,6 +118,7 @@ class BufferCLI: self._chat_start_time = now self._last_assistant_response_time = None self._chat_history = self._chat_loop_service.build_chat_context(user_text) + self._trigger_knowledge_learning([self._chat_history[-1]]) else: self._chat_history.append( build_message( @@ -123,9 +130,37 @@ class BufferCLI: ), ) ) + self._trigger_knowledge_learning([self._chat_history[-1]]) await self._run_llm_loop(self._chat_history) + def _trigger_knowledge_learning(self, messages: list[SessionMessage]) -> None: + """在 CLI 会话中按批次触发 knowledge 学习。""" + if not global_config.maisaka.enable_knowledge_module: + return + + self._knowledge_learner.add_messages(messages) + + elapsed = time.monotonic() - self._last_knowledge_extraction_time + if elapsed < self._knowledge_min_extraction_interval: + return + + cache_size = self._knowledge_learner.get_cache_size() + if cache_size < self._knowledge_min_messages_for_extraction: + return + + self._last_knowledge_extraction_time = time.monotonic() + asyncio.create_task(self._run_knowledge_learning()) + + async def _run_knowledge_learning(self) -> None: + """后台执行 knowledge 学习,避免阻塞主对话。""" + try: + added_count = await self._knowledge_learner.learn() + if added_count > 0 and global_config.maisaka.show_thinking: + console.print(f"[muted]Knowledge learning added {added_count} item(s).[/muted]") + except Exception as exc: + console.print(f"[warning]Knowledge learning failed: {exc}[/warning]") + async def _run_llm_loop(self, chat_history: list[SessionMessage]) -> None: """ Main inner loop for the Maisaka planner. diff --git a/src/know_u/__init__.py b/src/know_u/__init__.py new file mode 100644 index 00000000..9945120b --- /dev/null +++ b/src/know_u/__init__.py @@ -0,0 +1,3 @@ +""" +Knowledge utilities package for Maisaka. +""" diff --git a/src/know_u/knowledge.py b/src/know_u/knowledge.py new file mode 100644 index 00000000..e815e96b --- /dev/null +++ b/src/know_u/knowledge.py @@ -0,0 +1,239 @@ +""" +Maisaka knowledge retrieval and learning helpers. +""" + +from typing import Any, Dict, List + +import asyncio +import json + +from src.chat.message_receive.message import SessionMessage +from src.chat.utils.utils import is_bot_self +from src.common.data_models.llm_service_data_models import LLMGenerationOptions +from src.common.logger import get_logger +from src.services.llm_service import LLMServiceClient + +from src.know_u.knowledge_store import KNOWLEDGE_CATEGORIES, get_knowledge_store +from src.maisaka.message_adapter import get_message_role, get_message_text, parse_speaker_content + +logger = get_logger("maisaka_knowledge") + +NO_RESULT_KEYWORDS = [ + "无", + "没有", + "不适用", + "无需", + "无相关", +] + + +def extract_category_ids_from_result(result: str) -> List[str]: + """Extract valid category ids from an LLM result string.""" + if not result: + return [] + + normalized = result.strip() + if not normalized: + return [] + + lowered = normalized.lower() + if any(keyword in lowered for keyword in ["none", "no relevant", "no_need", "no need"]): + return [] + if any(keyword in normalized for keyword in NO_RESULT_KEYWORDS): + return [] + + category_ids: List[str] = [] + for part in normalized.replace(",", " ").replace(",", " ").replace("\n", " ").split(): + candidate = part.strip() + if candidate in KNOWLEDGE_CATEGORIES and candidate not in category_ids: + category_ids.append(candidate) + + return category_ids + + +async def retrieve_relevant_knowledge( + knowledge_analyzer: Any, + chat_history: List[SessionMessage], +) -> str: + """Retrieve formatted knowledge snippets relevant to the current chat history.""" + store = get_knowledge_store() + categories_summary = store.get_categories_summary() + + try: + category_ids = await knowledge_analyzer.analyze_knowledge_need(chat_history, categories_summary) + if not category_ids: + return "" + return store.get_formatted_knowledge(category_ids) + except Exception: + logger.exception("Failed to retrieve relevant knowledge") + return "" + + +class KnowledgeLearner: + """ + 从最近对话中提取用户画像类知识并写入知识库。 + """ + + def __init__(self, session_id: str) -> None: + self._session_id = session_id + self._store = get_knowledge_store() + self._llm = LLMServiceClient(task_name="utils", request_type="maisaka.knowledge.learn") + self._learning_lock = asyncio.Lock() + self._messages_cache: List[SessionMessage] = [] + + def add_messages(self, messages: List[SessionMessage]) -> None: + """缓存待学习的消息。""" + self._messages_cache.extend(messages) + + def get_cache_size(self) -> int: + """获取缓存消息数量。""" + return len(self._messages_cache) + + async def learn(self) -> int: + """ + 从缓存消息中提取知识并落库。 + + Returns: + 新增入库的知识条数 + """ + if not self._messages_cache: + return 0 + + async with self._learning_lock: + chat_excerpt = self._build_chat_excerpt() + if not chat_excerpt: + return 0 + + prompt = self._build_learning_prompt(chat_excerpt) + try: + result = await self._llm.generate_response( + prompt=prompt, + options=LLMGenerationOptions( + temperature=0.1, + max_tokens=512, + ), + ) + except Exception: + logger.exception("Knowledge learning model call failed") + return 0 + + knowledge_items = self._parse_learning_result(result.response or "") + if not knowledge_items: + logger.debug("Knowledge learning finished without extracted entries") + return 0 + + added_count = 0 + for item in knowledge_items: + category_id = str(item.get("category_id", "")).strip() + content = str(item.get("content", "")).strip() + if not category_id or not content: + continue + + if self._store.add_knowledge( + category_id=category_id, + content=content, + metadata={ + "session_id": self._session_id, + "source": "maisaka_learning", + }, + ): + added_count += 1 + + if added_count > 0: + logger.info( + f"Maisaka knowledge learning finished: session_id={self._session_id} added={added_count}" + ) + else: + logger.debug( + f"Maisaka knowledge learning finished without new entries: session_id={self._session_id}" + ) + + return added_count + + def _build_chat_excerpt(self) -> str: + """ + 构建适合画像提取的对话片段,只保留用户可见文本。 + """ + lines: List[str] = [] + for message in self._messages_cache[-30:]: + if get_message_role(message) == "assistant": + continue + if get_message_role(message) == "tool": + continue + if is_bot_self(message.platform, message.message_info.user_info.user_id): + continue + + raw_text = get_message_text(message).strip() + if not raw_text: + continue + + speaker_name, body = parse_speaker_content(raw_text) + visible_text = (body or raw_text).strip() + if not visible_text: + continue + + speaker = speaker_name or message.message_info.user_info.user_nickname or "用户" + lines.append(f"{speaker}: {visible_text}") + + return "\n".join(lines) + + def _build_learning_prompt(self, chat_excerpt: str) -> str: + """构建知识提取提示词。""" + categories_text = "\n".join( + f"{category_id}. {category_name}" for category_id, category_name in KNOWLEDGE_CATEGORIES.items() + ) + return ( + "你是一个用户画像知识提取器,需要从聊天记录里提取稳定、可复用的用户事实。\n" + "只提取用户明确表达或高置信度可归纳的信息,不要猜测,不要提取一次性情绪,不要重复表述。\n" + "如果没有可提取内容,返回空数组 []。\n" + "输出必须是 JSON 数组,每项格式为 " + '{"category_id":"分类编号","content":"简洁中文陈述"}。\n' + "分类如下:\n" + f"{categories_text}\n\n" + "聊天记录:\n" + f"{chat_excerpt}" + ) + + def _parse_learning_result(self, result: str) -> List[Dict[str, str]]: + """解析模型返回的知识条目。""" + normalized = result.strip() + if not normalized: + return [] + + if "```" in normalized: + normalized = normalized.replace("```json", "").replace("```JSON", "").replace("```", "").strip() + + try: + parsed = json.loads(normalized) + except json.JSONDecodeError: + logger.warning("Knowledge learning result is not valid JSON") + return [] + + if not isinstance(parsed, list): + return [] + + normalized_items: List[Dict[str, str]] = [] + seen_pairs: set[tuple[str, str]] = set() + for item in parsed: + if not isinstance(item, dict): + continue + + category_id = str(item.get("category_id", "")).strip() + content = " ".join(str(item.get("content", "")).strip().split()) + if category_id not in KNOWLEDGE_CATEGORIES: + continue + if not content: + continue + + pair = (category_id, content) + if pair in seen_pairs: + continue + seen_pairs.add(pair) + normalized_items.append( + { + "category_id": category_id, + "content": content, + } + ) + + return normalized_items diff --git a/src/know_u/knowledge_store.py b/src/know_u/knowledge_store.py new file mode 100644 index 00000000..a4b2016c --- /dev/null +++ b/src/know_u/knowledge_store.py @@ -0,0 +1,197 @@ +""" +MaiSaka knowledge store. +""" + +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional + +import json + +# 数据目录位于项目根目录下的 mai_knowledge +PROJECT_ROOT = Path(__file__).resolve().parents[2] +KNOWLEDGE_DATA_DIR = PROJECT_ROOT / "mai_knowledge" +KNOWLEDGE_FILE = KNOWLEDGE_DATA_DIR / "knowledge.json" + + +KNOWLEDGE_CATEGORIES = { + "1": "性别", + "2": "性格", + "3": "饮食口味", + "4": "交友喜好", + "5": "情绪/理性倾向", + "6": "兴趣爱好", + "7": "职业/专业", + "8": "生活习惯", + "9": "价值观", + "10": "沟通风格", + "11": "学习方式", + "12": "压力应对方式", +} + + +class KnowledgeStore: + """ + 简单的 Maisaka 知识存储。 + + 特性: + - 持久化到 JSON 文件 + - 按分类存储用户画像类知识 + - 支持基础去重 + """ + + def __init__(self) -> None: + """初始化知识存储。""" + self._knowledge: Dict[str, List[Dict[str, Any]]] = { + category_id: [] for category_id in KNOWLEDGE_CATEGORIES + } + self._ensure_data_dir() + self._load() + + def _ensure_data_dir(self) -> None: + """确保数据目录存在。""" + KNOWLEDGE_DATA_DIR.mkdir(parents=True, exist_ok=True) + + def _load(self) -> None: + """从文件加载知识数据。""" + if not KNOWLEDGE_FILE.exists(): + self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES} + return + + try: + with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as file: + loaded = json.load(file) + + normalized_knowledge: Dict[str, List[Dict[str, Any]]] = { + category_id: [] for category_id in KNOWLEDGE_CATEGORIES + } + for category_id in KNOWLEDGE_CATEGORIES: + category_items = loaded.get(category_id, []) + if isinstance(category_items, list): + normalized_knowledge[category_id] = [ + item for item in category_items if isinstance(item, dict) + ] + self._knowledge = normalized_knowledge + except Exception: + self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES} + + def _save(self) -> None: + """保存知识数据到文件。""" + with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as file: + json.dump(self._knowledge, file, ensure_ascii=False, indent=2) + + @staticmethod + def _normalize_content(content: str) -> str: + """标准化知识内容,便于去重。""" + return " ".join(str(content).strip().split()) + + def add_knowledge( + self, + category_id: str, + content: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> bool: + """ + 添加一条知识信息。 + + Args: + category_id: 分类编号 + content: 知识内容 + metadata: 附加元数据 + + Returns: + 是否新增成功;若命中去重则返回 False + """ + if category_id not in KNOWLEDGE_CATEGORIES: + return False + + normalized_content = self._normalize_content(content) + if not normalized_content: + return False + + existing_items = self._knowledge.get(category_id, []) + for item in existing_items: + existing_content = self._normalize_content(str(item.get("content", ""))) + if existing_content == normalized_content: + return False + + knowledge_item = { + "id": f"know_{category_id}_{datetime.now().timestamp()}", + "content": normalized_content, + "metadata": metadata or {}, + "created_at": datetime.now().isoformat(), + } + self._knowledge[category_id].append(knowledge_item) + self._save() + return True + + def get_category_knowledge(self, category_id: str) -> List[Dict[str, Any]]: + """获取某个分类下的所有知识。""" + return self._knowledge.get(category_id, []) + + def get_all_knowledge(self) -> Dict[str, List[Dict[str, Any]]]: + """获取全部知识。""" + return self._knowledge + + def get_category_name(self, category_id: str) -> str: + """获取分类名称。""" + return KNOWLEDGE_CATEGORIES.get(category_id, "未知分类") + + def get_categories_summary(self) -> str: + """获取分类摘要,供模型判断是否需要检索。""" + lines: List[str] = [] + for category_id, category_name in KNOWLEDGE_CATEGORIES.items(): + count = len(self._knowledge.get(category_id, [])) + count_text = f"{count}条" if count > 0 else "无数据" + lines.append(f"{category_id}. {category_name} ({count_text})") + return "\n".join(lines) + + def get_formatted_knowledge(self, category_ids: List[str], limit_per_category: int = 5) -> str: + """ + 获取指定分类的格式化知识内容。 + + Args: + category_ids: 分类编号列表 + limit_per_category: 每个分类最多返回多少条 + + Returns: + 格式化后的知识内容 + """ + parts: List[str] = [] + for category_id in category_ids: + items = self.get_category_knowledge(category_id) + if not items: + continue + + category_name = self.get_category_name(category_id) + parts.append(f"【{category_name}】") + + recent_items = items[-limit_per_category:] + for item in recent_items: + content = str(item.get("content", "")).strip() + if content: + parts.append(f"- {content}") + + return "\n".join(parts) + + def get_stats(self) -> Dict[str, Any]: + """获取知识数据统计。""" + total_items = sum(len(items) for items in self._knowledge.values()) + return { + "total_categories": len(KNOWLEDGE_CATEGORIES), + "total_items": total_items, + "data_file": str(KNOWLEDGE_FILE), + "data_exists": KNOWLEDGE_FILE.exists(), + "data_size_kb": KNOWLEDGE_FILE.stat().st_size / 1024 if KNOWLEDGE_FILE.exists() else 0, + } + + +_knowledge_store_instance: Optional[KnowledgeStore] = None + + +def get_knowledge_store() -> KnowledgeStore: + """获取知识存储单例。""" + global _knowledge_store_instance + if _knowledge_store_instance is None: + _knowledge_store_instance = KnowledgeStore() + return _knowledge_store_instance diff --git a/src/maisaka/chat_loop_service.py b/src/maisaka/chat_loop_service.py index edaa61ae..e350e196 100644 --- a/src/maisaka/chat_loop_service.py +++ b/src/maisaka/chat_loop_service.py @@ -15,18 +15,18 @@ from rich.pretty import Pretty from rich.text import Text from src.chat.message_receive.message import SessionMessage +from src.cli.console import console from src.common.data_models.llm_service_data_models import LLMGenerationOptions from src.common.logger import get_logger from src.common.prompt_i18n import load_prompt from src.config.config import global_config +from src.know_u.knowledge import extract_category_ids_from_result from src.llm_models.model_client.base_client import BaseClient from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput, ToolOption, normalize_tool_options from src.services.llm_service import LLMServiceClient from .builtin_tools import get_builtin_tools -from .console import console -from .knowledge import extract_category_ids_from_result from .message_adapter import ( build_message, format_speaker_content, diff --git a/src/maisaka/knowledge.py b/src/maisaka/knowledge.py deleted file mode 100644 index bb8d340b..00000000 --- a/src/maisaka/knowledge.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -MaiSaka knowledge retrieval helpers. -""" - -from typing import List - -from src.chat.message_receive.message import SessionMessage - -from .knowledge_store import KNOWLEDGE_CATEGORIES, get_knowledge_store - -NO_RESULT_KEYWORDS = [ - "\u65e0", - "\u6ca1\u6709", - "\u4e0d\u9002\u7528", - "\u65e0\u9700", - "\u65e0\u76f8\u5173", -] - - -def extract_category_ids_from_result(result: str) -> List[str]: - """Extract valid category ids from an LLM result string.""" - if not result: - return [] - - normalized = result.strip() - if not normalized: - return [] - - lowered = normalized.lower() - if any(keyword in lowered for keyword in ["none", "no relevant", "no_need", "no need"]): - return [] - if any(keyword in normalized for keyword in NO_RESULT_KEYWORDS): - return [] - - category_ids: List[str] = [] - for part in normalized.replace(",", " ").replace("\uff0c", " ").replace("\n", " ").split(): - candidate = part.strip() - if candidate in KNOWLEDGE_CATEGORIES and candidate not in category_ids: - category_ids.append(candidate) - - return category_ids - - -async def retrieve_relevant_knowledge( - knowledge_analyzer, - chat_history: List[SessionMessage], -) -> str: - """Retrieve formatted knowledge snippets relevant to the current chat history.""" - store = get_knowledge_store() - categories_summary = store.get_categories_summary() - - try: - category_ids = await knowledge_analyzer.analyze_knowledge_need(chat_history, categories_summary) - if not category_ids: - return "" - return store.get_formatted_knowledge(category_ids) - except Exception: - return "" diff --git a/src/maisaka/knowledge_store.py b/src/maisaka/knowledge_store.py deleted file mode 100644 index f91573d2..00000000 --- a/src/maisaka/knowledge_store.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -MaiSaka - 了解列表持久化存储 -存储用户个人特征信息,支持层级结构和本地持久化。 -""" - -import json -import os -from pathlib import Path -from typing import Dict, List, Optional, Any -from datetime import datetime - -# 数据目录 - 项目根目录下的 mai_knowledge -PROJECT_ROOT = Path(os.path.dirname(os.path.abspath(__file__))) -KNOWLEDGE_DATA_DIR = PROJECT_ROOT / "mai_knowledge" -KNOWLEDGE_FILE = KNOWLEDGE_DATA_DIR / "knowledge.json" - - -# 个人特征分类列表(预定义) -KNOWLEDGE_CATEGORIES = { - "1": "性别", - "2": "性格", - "3": "饮食口味", - "4": "交友喜好", - "5": "情绪/理性倾向", - "6": "兴趣爱好", - "7": "职业/专业", - "8": "生活习惯", - "9": "价值观", - "10": "沟通风格", - "11": "学习方式", - "12": "压力应对方式", -} - - -class KnowledgeStore: - """ - 了解列表存储。 - - 特性: - - 持久化到 JSON 文件 - - 层级结构存储(按分类) - - 支持增量更新 - - 启动时自动加载 - """ - - def __init__(self): - """初始化了解存储""" - self._knowledge: Dict[str, List[Dict[str, Any]]] = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES} - self._ensure_data_dir() - self._load() - - def _ensure_data_dir(self): - """确保数据目录存在""" - KNOWLEDGE_DATA_DIR.mkdir(parents=True, exist_ok=True) - - def _load(self): - """从文件加载了解数据""" - if not KNOWLEDGE_FILE.exists(): - self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES} - return - - try: - with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as f: - loaded = json.load(f) - # 确保所有分类都存在 - for category_id in KNOWLEDGE_CATEGORIES: - if category_id not in loaded: - loaded[category_id] = [] - self._knowledge = loaded - except Exception as e: - print(f"[warning]加载了解数据失败: {e}[/warning]") - self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES} - - def _save(self): - """保存了解数据到文件""" - try: - with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as f: - json.dump(self._knowledge, f, ensure_ascii=False, indent=2) - except Exception as e: - print(f"[warning]保存了解数据失败: {e}[/warning]") - - def add_knowledge( - self, - category_id: str, - content: str, - metadata: Optional[Dict[str, Any]] = None, - ) -> bool: - """ - 添加一条了解信息。 - - Args: - category_id: 分类编号 - content: 了解内容 - metadata: 元数据 - - Returns: - 是否添加成功 - """ - if category_id not in KNOWLEDGE_CATEGORIES: - return False - - try: - knowledge_item = { - "id": f"know_{category_id}_{datetime.now().timestamp()}", - "content": content, - "metadata": metadata or {}, - "created_at": datetime.now().isoformat(), - } - self._knowledge[category_id].append(knowledge_item) - self._save() - return True - except Exception: - return False - - def get_category_knowledge(self, category_id: str) -> List[Dict[str, Any]]: - """ - 获取某个分类的所有了解信息。 - - Args: - category_id: 分类编号 - - Returns: - 该分类的所有了解信息 - """ - return self._knowledge.get(category_id, []) - - def get_all_knowledge(self) -> Dict[str, List[Dict[str, Any]]]: - """获取所有了解信息""" - return self._knowledge - - def get_category_name(self, category_id: str) -> str: - """获取分类名称""" - return KNOWLEDGE_CATEGORIES.get(category_id, "未知分类") - - def get_categories_summary(self) -> str: - """获取所有分类的摘要(用于 LLM 展示)""" - lines = [] - for category_id, category_name in KNOWLEDGE_CATEGORIES.items(): - count = len(self._knowledge.get(category_id, [])) - if count > 0: - lines.append(f"{category_id}. {category_name} ({count}条)") - else: - lines.append(f"{category_id}. {category_name} (无数据)") - return "\n".join(lines) - - def get_formatted_knowledge(self, category_ids: List[str]) -> str: - """ - 获取指定分类的了解内容,格式化为文本。 - - Args: - category_ids: 分类编号列表 - - Returns: - 格式化后的了解内容文本 - """ - parts = [] - for category_id in category_ids: - category_name = self.get_category_name(category_id) - items = self.get_category_knowledge(category_id) - - if items: - parts.append(f"【{category_name}】") - for item in items: - content = item.get("content", "") - parts.append(f" - {content}") - - return "\n".join(parts) if parts else "暂无相关了解信息" - - def get_stats(self) -> Dict[str, Any]: - """获取了解数据统计信息""" - total_items = sum(len(items) for items in self._knowledge.values()) - return { - "total_categories": len(KNOWLEDGE_CATEGORIES), - "total_items": total_items, - "data_file": str(KNOWLEDGE_FILE), - "data_exists": KNOWLEDGE_FILE.exists(), - "data_size_kb": KNOWLEDGE_FILE.stat().st_size / 1024 if KNOWLEDGE_FILE.exists() else 0, - } - - -# 全局单例 -_knowledge_store_instance: Optional[KnowledgeStore] = None - - -def get_knowledge_store() -> KnowledgeStore: - """获取了解存储实例(单例模式)""" - global _knowledge_store_instance - if _knowledge_store_instance is None: - _knowledge_store_instance = KnowledgeStore() - return _knowledge_store_instance diff --git a/src/maisaka/runtime.py b/src/maisaka/runtime.py index d5be0c54..c8db017e 100644 --- a/src/maisaka/runtime.py +++ b/src/maisaka/runtime.py @@ -13,9 +13,10 @@ from src.common.data_models.mai_message_data_model import GroupInfo, UserInfo from src.common.logger import get_logger from src.common.utils.utils_config import ExpressionConfigUtils from src.config.config import global_config -from src.mcp_module import MCPManager +from src.know_u.knowledge import KnowledgeLearner from src.learners.expression_learner import ExpressionLearner from src.learners.jargon_miner import JargonMiner +from src.mcp_module import MCPManager from .chat_loop_service import MaisakaChatLoopService from .reasoning_engine import MaisakaReasoningEngine @@ -66,9 +67,11 @@ class MaisakaHeartFlowChatting: self._enable_jargon_learning = jargon_learn self._min_messages_for_extraction = 10 self._min_extraction_interval = 30 - self._last_extraction_time = 0.0 + self._last_expression_extraction_time = 0.0 + self._last_knowledge_extraction_time = 0.0 self._expression_learner = ExpressionLearner(session_id) self._jargon_miner = JargonMiner(session_id, session_name=session_name) + self._knowledge_learner = KnowledgeLearner(session_id) self._reasoning_engine = MaisakaReasoningEngine(self) @@ -157,7 +160,7 @@ class MaisakaHeartFlowChatting: if not cached_messages: break await self._internal_turn_queue.put(cached_messages) - asyncio.create_task(self._trigger_expression_learning(cached_messages)) + asyncio.create_task(self._trigger_batch_learning(cached_messages)) except asyncio.CancelledError: logger.info(f"{self.log_prefix} Maisaka runtime loop cancelled") @@ -223,6 +226,18 @@ class MaisakaHeartFlowChatting: self._agent_state = self._STATE_STOP self._wait_until = None + async def _trigger_batch_learning(self, messages: list[SessionMessage]) -> None: + """按同一批消息触发表达方式、黑话和 knowledge 学习。""" + expression_result, knowledge_result = await asyncio.gather( + self._trigger_expression_learning(messages), + self._trigger_knowledge_learning(messages), + return_exceptions=True, + ) + if isinstance(expression_result, Exception): + logger.error(f"{self.log_prefix} expression learning task crashed: {expression_result}") + if isinstance(knowledge_result, Exception): + logger.error(f"{self.log_prefix} knowledge learning task crashed: {knowledge_result}") + async def _trigger_expression_learning(self, messages: list[SessionMessage]) -> None: """Trigger expression learning from the newly collected batch.""" self._expression_learner.add_messages(messages) @@ -231,7 +246,7 @@ class MaisakaHeartFlowChatting: logger.debug(f"{self.log_prefix} expression learning disabled, skip this batch") return - elapsed = time.time() - self._last_extraction_time + elapsed = time.time() - self._last_expression_extraction_time if elapsed < self._min_extraction_interval: logger.debug( f"{self.log_prefix} expression learning interval not reached: " @@ -248,7 +263,7 @@ class MaisakaHeartFlowChatting: ) return - self._last_extraction_time = time.time() + self._last_expression_extraction_time = time.time() logger.info( f"{self.log_prefix} starting expression learning: " f"new_batch={len(messages)} learner_cache={cache_size} " @@ -266,6 +281,47 @@ class MaisakaHeartFlowChatting: except Exception: logger.exception(f"{self.log_prefix} expression learning failed") + async def _trigger_knowledge_learning(self, messages: list[SessionMessage]) -> None: + """Trigger knowledge learning from the newly collected batch.""" + self._knowledge_learner.add_messages(messages) + + if not global_config.maisaka.enable_knowledge_module: + logger.debug(f"{self.log_prefix} knowledge learning disabled, skip this batch") + return + + elapsed = time.time() - self._last_knowledge_extraction_time + if elapsed < self._min_extraction_interval: + logger.debug( + f"{self.log_prefix} knowledge learning interval not reached: " + f"elapsed={elapsed:.2f}s threshold={self._min_extraction_interval}s" + ) + return + + cache_size = self._knowledge_learner.get_cache_size() + if cache_size < self._min_messages_for_extraction: + logger.debug( + f"{self.log_prefix} knowledge learning skipped due to cache size: " + f"learner_cache={cache_size} threshold={self._min_messages_for_extraction} " + f"message_cache_total={len(self.message_cache)}" + ) + return + + self._last_knowledge_extraction_time = time.time() + logger.info( + f"{self.log_prefix} starting knowledge learning: " + f"new_batch={len(messages)} learner_cache={cache_size} " + f"message_cache_total={len(self.message_cache)}" + ) + + try: + added_count = await self._knowledge_learner.learn() + if added_count > 0: + logger.info(f"{self.log_prefix} knowledge learning finished: added={added_count}") + else: + logger.debug(f"{self.log_prefix} knowledge learning finished without usable result") + except Exception: + logger.exception(f"{self.log_prefix} knowledge learning failed") + async def _init_mcp(self) -> None: """Initialize MCP tools and inject them into the planner.""" config_path = Path(__file__).resolve().parents[2] / "config" / "mcp_config.json" diff --git a/src/maisaka/tool_handlers.py b/src/maisaka/tool_handlers.py index dc326c09..f975517c 100644 --- a/src/maisaka/tool_handlers.py +++ b/src/maisaka/tool_handlers.py @@ -12,10 +12,10 @@ import os from rich.panel import Panel from src.chat.message_receive.message import SessionMessage +from src.cli.console import console +from src.cli.input_reader import InputReader from src.llm_models.payload_content.tool_option import ToolCall -from .console import console -from .input_reader import InputReader from .message_adapter import build_message if TYPE_CHECKING: diff --git a/src/mcp_module/config.py b/src/mcp_module/config.py index f4bfa763..7443d3c2 100644 --- a/src/mcp_module/config.py +++ b/src/mcp_module/config.py @@ -27,7 +27,7 @@ from typing import Optional import json import os -from src.maisaka.console import console +from src.cli.console import console DEFAULT_MCP_CONFIG_PATH = Path(__file__).resolve().parents[2] / "config" / "mcp_config.json" diff --git a/src/mcp_module/connection.py b/src/mcp_module/connection.py index 57401950..92e0f8e7 100644 --- a/src/mcp_module/connection.py +++ b/src/mcp_module/connection.py @@ -6,7 +6,7 @@ MaiSaka - 单个 MCP 服务器连接管理 from contextlib import AsyncExitStack from typing import Any, Optional -from src.maisaka.console import console +from src.cli.console import console from .config import MCPServerConfig diff --git a/src/mcp_module/manager.py b/src/mcp_module/manager.py index 8be0a4d7..95511e10 100644 --- a/src/mcp_module/manager.py +++ b/src/mcp_module/manager.py @@ -5,7 +5,7 @@ MaiSaka - MCP 管理器 from typing import Optional -from src.maisaka.console import console +from src.cli.console import console from .config import DEFAULT_MCP_CONFIG_PATH, MCPServerConfig, load_mcp_config from .connection import MCPConnection, MCP_AVAILABLE