ref:分离know模块和cli模块

This commit is contained in:
SengokuCola
2026-03-29 15:06:50 +08:00
parent f32edfa732
commit 8cb0ad3d42
15 changed files with 556 additions and 271 deletions

3
src/cli/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
"""
CLI startup and interaction package.
"""

View File

@@ -1,4 +1,4 @@
"""MaiSaka 终端输出组件。""" """MaiSaka terminal console helpers."""
from rich.console import Console from rich.console import Console
from rich.theme import Theme from rich.theme import Theme

View File

@@ -1,12 +1,12 @@
""" """
MaiSaka - 异步输入读取器 MaiSaka asynchronous stdin reader for CLI interaction.
将阻塞的标准输入读取放到后台线程中 asyncio 循环安全消费
""" """
from typing import Optional
import asyncio import asyncio
import sys import sys
import threading import threading
from typing import Optional
class InputReader: class InputReader:

View File

@@ -8,24 +8,23 @@ from typing import Optional
import asyncio import asyncio
import os import os
import time
from rich import box from rich import box
from rich.markdown import Markdown from rich.markdown import Markdown
from rich.panel import Panel from rich.panel import Panel
from rich.text import Text from rich.text import Text
from src.know_u.knowledge import KnowledgeLearner, retrieve_relevant_knowledge
from src.know_u.knowledge_store import get_knowledge_store
from src.chat.message_receive.message import SessionMessage from src.chat.message_receive.message import SessionMessage
from src.chat.replyer.maisaka_generator import MaisakaReplyGenerator from src.chat.replyer.maisaka_generator import MaisakaReplyGenerator
from src.config.config import config_manager, global_config from src.config.config import config_manager, global_config
from src.mcp_module import MCPManager from src.mcp_module import MCPManager
from .chat_loop_service import MaisakaChatLoopService from src.maisaka.chat_loop_service import MaisakaChatLoopService
from .console import console from src.maisaka.message_adapter import build_message, format_speaker_content, remove_last_perception
from .input_reader import InputReader from src.maisaka.tool_handlers import (
from .knowledge import retrieve_relevant_knowledge
from .knowledge_store import get_knowledge_store
from .message_adapter import build_message, format_speaker_content, remove_last_perception
from .tool_handlers import (
ToolHandlerContext, ToolHandlerContext,
handle_mcp_tool, handle_mcp_tool,
handle_stop, handle_stop,
@@ -33,6 +32,9 @@ from .tool_handlers import (
handle_wait, handle_wait,
) )
from .console import console
from .input_reader import InputReader
class BufferCLI: class BufferCLI:
"""Maisaka 命令行交互入口。""" """Maisaka 命令行交互入口。"""
@@ -43,6 +45,10 @@ class BufferCLI:
self._reader = InputReader() self._reader = InputReader()
self._chat_history: Optional[list[SessionMessage]] = None self._chat_history: Optional[list[SessionMessage]] = None
self._knowledge_store = get_knowledge_store() self._knowledge_store = get_knowledge_store()
self._knowledge_learner = KnowledgeLearner("maisaka_cli")
self._knowledge_min_messages_for_extraction = 10
self._knowledge_min_extraction_interval = 30
self._last_knowledge_extraction_time = 0.0
knowledge_stats = self._knowledge_store.get_stats() knowledge_stats = self._knowledge_store.get_stats()
if knowledge_stats["total_items"] > 0: if knowledge_stats["total_items"] > 0:
@@ -112,6 +118,7 @@ class BufferCLI:
self._chat_start_time = now self._chat_start_time = now
self._last_assistant_response_time = None self._last_assistant_response_time = None
self._chat_history = self._chat_loop_service.build_chat_context(user_text) self._chat_history = self._chat_loop_service.build_chat_context(user_text)
self._trigger_knowledge_learning([self._chat_history[-1]])
else: else:
self._chat_history.append( self._chat_history.append(
build_message( build_message(
@@ -123,9 +130,37 @@ class BufferCLI:
), ),
) )
) )
self._trigger_knowledge_learning([self._chat_history[-1]])
await self._run_llm_loop(self._chat_history) await self._run_llm_loop(self._chat_history)
def _trigger_knowledge_learning(self, messages: list[SessionMessage]) -> None:
"""在 CLI 会话中按批次触发 knowledge 学习。"""
if not global_config.maisaka.enable_knowledge_module:
return
self._knowledge_learner.add_messages(messages)
elapsed = time.monotonic() - self._last_knowledge_extraction_time
if elapsed < self._knowledge_min_extraction_interval:
return
cache_size = self._knowledge_learner.get_cache_size()
if cache_size < self._knowledge_min_messages_for_extraction:
return
self._last_knowledge_extraction_time = time.monotonic()
asyncio.create_task(self._run_knowledge_learning())
async def _run_knowledge_learning(self) -> None:
"""后台执行 knowledge 学习,避免阻塞主对话。"""
try:
added_count = await self._knowledge_learner.learn()
if added_count > 0 and global_config.maisaka.show_thinking:
console.print(f"[muted]Knowledge learning added {added_count} item(s).[/muted]")
except Exception as exc:
console.print(f"[warning]Knowledge learning failed: {exc}[/warning]")
async def _run_llm_loop(self, chat_history: list[SessionMessage]) -> None: async def _run_llm_loop(self, chat_history: list[SessionMessage]) -> None:
""" """
Main inner loop for the Maisaka planner. Main inner loop for the Maisaka planner.

3
src/know_u/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
"""
Knowledge utilities package for Maisaka.
"""

239
src/know_u/knowledge.py Normal file
View File

@@ -0,0 +1,239 @@
"""
Maisaka knowledge retrieval and learning helpers.
"""
from typing import Any, Dict, List
import asyncio
import json
from src.chat.message_receive.message import SessionMessage
from src.chat.utils.utils import is_bot_self
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.common.logger import get_logger
from src.services.llm_service import LLMServiceClient
from src.know_u.knowledge_store import KNOWLEDGE_CATEGORIES, get_knowledge_store
from src.maisaka.message_adapter import get_message_role, get_message_text, parse_speaker_content
logger = get_logger("maisaka_knowledge")
NO_RESULT_KEYWORDS = [
"",
"没有",
"不适用",
"无需",
"无相关",
]
def extract_category_ids_from_result(result: str) -> List[str]:
"""Extract valid category ids from an LLM result string."""
if not result:
return []
normalized = result.strip()
if not normalized:
return []
lowered = normalized.lower()
if any(keyword in lowered for keyword in ["none", "no relevant", "no_need", "no need"]):
return []
if any(keyword in normalized for keyword in NO_RESULT_KEYWORDS):
return []
category_ids: List[str] = []
for part in normalized.replace(",", " ").replace("", " ").replace("\n", " ").split():
candidate = part.strip()
if candidate in KNOWLEDGE_CATEGORIES and candidate not in category_ids:
category_ids.append(candidate)
return category_ids
async def retrieve_relevant_knowledge(
knowledge_analyzer: Any,
chat_history: List[SessionMessage],
) -> str:
"""Retrieve formatted knowledge snippets relevant to the current chat history."""
store = get_knowledge_store()
categories_summary = store.get_categories_summary()
try:
category_ids = await knowledge_analyzer.analyze_knowledge_need(chat_history, categories_summary)
if not category_ids:
return ""
return store.get_formatted_knowledge(category_ids)
except Exception:
logger.exception("Failed to retrieve relevant knowledge")
return ""
class KnowledgeLearner:
"""
从最近对话中提取用户画像类知识并写入知识库。
"""
def __init__(self, session_id: str) -> None:
self._session_id = session_id
self._store = get_knowledge_store()
self._llm = LLMServiceClient(task_name="utils", request_type="maisaka.knowledge.learn")
self._learning_lock = asyncio.Lock()
self._messages_cache: List[SessionMessage] = []
def add_messages(self, messages: List[SessionMessage]) -> None:
"""缓存待学习的消息。"""
self._messages_cache.extend(messages)
def get_cache_size(self) -> int:
"""获取缓存消息数量。"""
return len(self._messages_cache)
async def learn(self) -> int:
"""
从缓存消息中提取知识并落库。
Returns:
新增入库的知识条数
"""
if not self._messages_cache:
return 0
async with self._learning_lock:
chat_excerpt = self._build_chat_excerpt()
if not chat_excerpt:
return 0
prompt = self._build_learning_prompt(chat_excerpt)
try:
result = await self._llm.generate_response(
prompt=prompt,
options=LLMGenerationOptions(
temperature=0.1,
max_tokens=512,
),
)
except Exception:
logger.exception("Knowledge learning model call failed")
return 0
knowledge_items = self._parse_learning_result(result.response or "")
if not knowledge_items:
logger.debug("Knowledge learning finished without extracted entries")
return 0
added_count = 0
for item in knowledge_items:
category_id = str(item.get("category_id", "")).strip()
content = str(item.get("content", "")).strip()
if not category_id or not content:
continue
if self._store.add_knowledge(
category_id=category_id,
content=content,
metadata={
"session_id": self._session_id,
"source": "maisaka_learning",
},
):
added_count += 1
if added_count > 0:
logger.info(
f"Maisaka knowledge learning finished: session_id={self._session_id} added={added_count}"
)
else:
logger.debug(
f"Maisaka knowledge learning finished without new entries: session_id={self._session_id}"
)
return added_count
def _build_chat_excerpt(self) -> str:
"""
构建适合画像提取的对话片段,只保留用户可见文本。
"""
lines: List[str] = []
for message in self._messages_cache[-30:]:
if get_message_role(message) == "assistant":
continue
if get_message_role(message) == "tool":
continue
if is_bot_self(message.platform, message.message_info.user_info.user_id):
continue
raw_text = get_message_text(message).strip()
if not raw_text:
continue
speaker_name, body = parse_speaker_content(raw_text)
visible_text = (body or raw_text).strip()
if not visible_text:
continue
speaker = speaker_name or message.message_info.user_info.user_nickname or "用户"
lines.append(f"{speaker}: {visible_text}")
return "\n".join(lines)
def _build_learning_prompt(self, chat_excerpt: str) -> str:
"""构建知识提取提示词。"""
categories_text = "\n".join(
f"{category_id}. {category_name}" for category_id, category_name in KNOWLEDGE_CATEGORIES.items()
)
return (
"你是一个用户画像知识提取器,需要从聊天记录里提取稳定、可复用的用户事实。\n"
"只提取用户明确表达或高置信度可归纳的信息,不要猜测,不要提取一次性情绪,不要重复表述。\n"
"如果没有可提取内容,返回空数组 []。\n"
"输出必须是 JSON 数组,每项格式为 "
'{"category_id":"分类编号","content":"简洁中文陈述"}。\n'
"分类如下:\n"
f"{categories_text}\n\n"
"聊天记录:\n"
f"{chat_excerpt}"
)
def _parse_learning_result(self, result: str) -> List[Dict[str, str]]:
"""解析模型返回的知识条目。"""
normalized = result.strip()
if not normalized:
return []
if "```" in normalized:
normalized = normalized.replace("```json", "").replace("```JSON", "").replace("```", "").strip()
try:
parsed = json.loads(normalized)
except json.JSONDecodeError:
logger.warning("Knowledge learning result is not valid JSON")
return []
if not isinstance(parsed, list):
return []
normalized_items: List[Dict[str, str]] = []
seen_pairs: set[tuple[str, str]] = set()
for item in parsed:
if not isinstance(item, dict):
continue
category_id = str(item.get("category_id", "")).strip()
content = " ".join(str(item.get("content", "")).strip().split())
if category_id not in KNOWLEDGE_CATEGORIES:
continue
if not content:
continue
pair = (category_id, content)
if pair in seen_pairs:
continue
seen_pairs.add(pair)
normalized_items.append(
{
"category_id": category_id,
"content": content,
}
)
return normalized_items

View File

@@ -0,0 +1,197 @@
"""
MaiSaka knowledge store.
"""
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import json
# 数据目录位于项目根目录下的 mai_knowledge
PROJECT_ROOT = Path(__file__).resolve().parents[2]
KNOWLEDGE_DATA_DIR = PROJECT_ROOT / "mai_knowledge"
KNOWLEDGE_FILE = KNOWLEDGE_DATA_DIR / "knowledge.json"
KNOWLEDGE_CATEGORIES = {
"1": "性别",
"2": "性格",
"3": "饮食口味",
"4": "交友喜好",
"5": "情绪/理性倾向",
"6": "兴趣爱好",
"7": "职业/专业",
"8": "生活习惯",
"9": "价值观",
"10": "沟通风格",
"11": "学习方式",
"12": "压力应对方式",
}
class KnowledgeStore:
"""
简单的 Maisaka 知识存储。
特性:
- 持久化到 JSON 文件
- 按分类存储用户画像类知识
- 支持基础去重
"""
def __init__(self) -> None:
"""初始化知识存储。"""
self._knowledge: Dict[str, List[Dict[str, Any]]] = {
category_id: [] for category_id in KNOWLEDGE_CATEGORIES
}
self._ensure_data_dir()
self._load()
def _ensure_data_dir(self) -> None:
"""确保数据目录存在。"""
KNOWLEDGE_DATA_DIR.mkdir(parents=True, exist_ok=True)
def _load(self) -> None:
"""从文件加载知识数据。"""
if not KNOWLEDGE_FILE.exists():
self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES}
return
try:
with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as file:
loaded = json.load(file)
normalized_knowledge: Dict[str, List[Dict[str, Any]]] = {
category_id: [] for category_id in KNOWLEDGE_CATEGORIES
}
for category_id in KNOWLEDGE_CATEGORIES:
category_items = loaded.get(category_id, [])
if isinstance(category_items, list):
normalized_knowledge[category_id] = [
item for item in category_items if isinstance(item, dict)
]
self._knowledge = normalized_knowledge
except Exception:
self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES}
def _save(self) -> None:
"""保存知识数据到文件。"""
with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as file:
json.dump(self._knowledge, file, ensure_ascii=False, indent=2)
@staticmethod
def _normalize_content(content: str) -> str:
"""标准化知识内容,便于去重。"""
return " ".join(str(content).strip().split())
def add_knowledge(
self,
category_id: str,
content: str,
metadata: Optional[Dict[str, Any]] = None,
) -> bool:
"""
添加一条知识信息。
Args:
category_id: 分类编号
content: 知识内容
metadata: 附加元数据
Returns:
是否新增成功;若命中去重则返回 False
"""
if category_id not in KNOWLEDGE_CATEGORIES:
return False
normalized_content = self._normalize_content(content)
if not normalized_content:
return False
existing_items = self._knowledge.get(category_id, [])
for item in existing_items:
existing_content = self._normalize_content(str(item.get("content", "")))
if existing_content == normalized_content:
return False
knowledge_item = {
"id": f"know_{category_id}_{datetime.now().timestamp()}",
"content": normalized_content,
"metadata": metadata or {},
"created_at": datetime.now().isoformat(),
}
self._knowledge[category_id].append(knowledge_item)
self._save()
return True
def get_category_knowledge(self, category_id: str) -> List[Dict[str, Any]]:
"""获取某个分类下的所有知识。"""
return self._knowledge.get(category_id, [])
def get_all_knowledge(self) -> Dict[str, List[Dict[str, Any]]]:
"""获取全部知识。"""
return self._knowledge
def get_category_name(self, category_id: str) -> str:
"""获取分类名称。"""
return KNOWLEDGE_CATEGORIES.get(category_id, "未知分类")
def get_categories_summary(self) -> str:
"""获取分类摘要,供模型判断是否需要检索。"""
lines: List[str] = []
for category_id, category_name in KNOWLEDGE_CATEGORIES.items():
count = len(self._knowledge.get(category_id, []))
count_text = f"{count}" if count > 0 else "无数据"
lines.append(f"{category_id}. {category_name} ({count_text})")
return "\n".join(lines)
def get_formatted_knowledge(self, category_ids: List[str], limit_per_category: int = 5) -> str:
"""
获取指定分类的格式化知识内容。
Args:
category_ids: 分类编号列表
limit_per_category: 每个分类最多返回多少条
Returns:
格式化后的知识内容
"""
parts: List[str] = []
for category_id in category_ids:
items = self.get_category_knowledge(category_id)
if not items:
continue
category_name = self.get_category_name(category_id)
parts.append(f"{category_name}")
recent_items = items[-limit_per_category:]
for item in recent_items:
content = str(item.get("content", "")).strip()
if content:
parts.append(f"- {content}")
return "\n".join(parts)
def get_stats(self) -> Dict[str, Any]:
"""获取知识数据统计。"""
total_items = sum(len(items) for items in self._knowledge.values())
return {
"total_categories": len(KNOWLEDGE_CATEGORIES),
"total_items": total_items,
"data_file": str(KNOWLEDGE_FILE),
"data_exists": KNOWLEDGE_FILE.exists(),
"data_size_kb": KNOWLEDGE_FILE.stat().st_size / 1024 if KNOWLEDGE_FILE.exists() else 0,
}
_knowledge_store_instance: Optional[KnowledgeStore] = None
def get_knowledge_store() -> KnowledgeStore:
"""获取知识存储单例。"""
global _knowledge_store_instance
if _knowledge_store_instance is None:
_knowledge_store_instance = KnowledgeStore()
return _knowledge_store_instance

View File

@@ -15,18 +15,18 @@ from rich.pretty import Pretty
from rich.text import Text from rich.text import Text
from src.chat.message_receive.message import SessionMessage from src.chat.message_receive.message import SessionMessage
from src.cli.console import console
from src.common.data_models.llm_service_data_models import LLMGenerationOptions from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.prompt_i18n import load_prompt from src.common.prompt_i18n import load_prompt
from src.config.config import global_config from src.config.config import global_config
from src.know_u.knowledge import extract_category_ids_from_result
from src.llm_models.model_client.base_client import BaseClient from src.llm_models.model_client.base_client import BaseClient
from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType from src.llm_models.payload_content.message import Message, MessageBuilder, RoleType
from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput, ToolOption, normalize_tool_options from src.llm_models.payload_content.tool_option import ToolCall, ToolDefinitionInput, ToolOption, normalize_tool_options
from src.services.llm_service import LLMServiceClient from src.services.llm_service import LLMServiceClient
from .builtin_tools import get_builtin_tools from .builtin_tools import get_builtin_tools
from .console import console
from .knowledge import extract_category_ids_from_result
from .message_adapter import ( from .message_adapter import (
build_message, build_message,
format_speaker_content, format_speaker_content,

View File

@@ -1,58 +0,0 @@
"""
MaiSaka knowledge retrieval helpers.
"""
from typing import List
from src.chat.message_receive.message import SessionMessage
from .knowledge_store import KNOWLEDGE_CATEGORIES, get_knowledge_store
NO_RESULT_KEYWORDS = [
"\u65e0",
"\u6ca1\u6709",
"\u4e0d\u9002\u7528",
"\u65e0\u9700",
"\u65e0\u76f8\u5173",
]
def extract_category_ids_from_result(result: str) -> List[str]:
"""Extract valid category ids from an LLM result string."""
if not result:
return []
normalized = result.strip()
if not normalized:
return []
lowered = normalized.lower()
if any(keyword in lowered for keyword in ["none", "no relevant", "no_need", "no need"]):
return []
if any(keyword in normalized for keyword in NO_RESULT_KEYWORDS):
return []
category_ids: List[str] = []
for part in normalized.replace(",", " ").replace("\uff0c", " ").replace("\n", " ").split():
candidate = part.strip()
if candidate in KNOWLEDGE_CATEGORIES and candidate not in category_ids:
category_ids.append(candidate)
return category_ids
async def retrieve_relevant_knowledge(
knowledge_analyzer,
chat_history: List[SessionMessage],
) -> str:
"""Retrieve formatted knowledge snippets relevant to the current chat history."""
store = get_knowledge_store()
categories_summary = store.get_categories_summary()
try:
category_ids = await knowledge_analyzer.analyze_knowledge_need(chat_history, categories_summary)
if not category_ids:
return ""
return store.get_formatted_knowledge(category_ids)
except Exception:
return ""

View File

@@ -1,190 +0,0 @@
"""
MaiSaka - 了解列表持久化存储
存储用户个人特征信息,支持层级结构和本地持久化。
"""
import json
import os
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime
# 数据目录 - 项目根目录下的 mai_knowledge
PROJECT_ROOT = Path(os.path.dirname(os.path.abspath(__file__)))
KNOWLEDGE_DATA_DIR = PROJECT_ROOT / "mai_knowledge"
KNOWLEDGE_FILE = KNOWLEDGE_DATA_DIR / "knowledge.json"
# 个人特征分类列表(预定义)
KNOWLEDGE_CATEGORIES = {
"1": "性别",
"2": "性格",
"3": "饮食口味",
"4": "交友喜好",
"5": "情绪/理性倾向",
"6": "兴趣爱好",
"7": "职业/专业",
"8": "生活习惯",
"9": "价值观",
"10": "沟通风格",
"11": "学习方式",
"12": "压力应对方式",
}
class KnowledgeStore:
"""
了解列表存储。
特性:
- 持久化到 JSON 文件
- 层级结构存储(按分类)
- 支持增量更新
- 启动时自动加载
"""
def __init__(self):
"""初始化了解存储"""
self._knowledge: Dict[str, List[Dict[str, Any]]] = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES}
self._ensure_data_dir()
self._load()
def _ensure_data_dir(self):
"""确保数据目录存在"""
KNOWLEDGE_DATA_DIR.mkdir(parents=True, exist_ok=True)
def _load(self):
"""从文件加载了解数据"""
if not KNOWLEDGE_FILE.exists():
self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES}
return
try:
with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as f:
loaded = json.load(f)
# 确保所有分类都存在
for category_id in KNOWLEDGE_CATEGORIES:
if category_id not in loaded:
loaded[category_id] = []
self._knowledge = loaded
except Exception as e:
print(f"[warning]加载了解数据失败: {e}[/warning]")
self._knowledge = {category_id: [] for category_id in KNOWLEDGE_CATEGORIES}
def _save(self):
"""保存了解数据到文件"""
try:
with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as f:
json.dump(self._knowledge, f, ensure_ascii=False, indent=2)
except Exception as e:
print(f"[warning]保存了解数据失败: {e}[/warning]")
def add_knowledge(
self,
category_id: str,
content: str,
metadata: Optional[Dict[str, Any]] = None,
) -> bool:
"""
添加一条了解信息。
Args:
category_id: 分类编号
content: 了解内容
metadata: 元数据
Returns:
是否添加成功
"""
if category_id not in KNOWLEDGE_CATEGORIES:
return False
try:
knowledge_item = {
"id": f"know_{category_id}_{datetime.now().timestamp()}",
"content": content,
"metadata": metadata or {},
"created_at": datetime.now().isoformat(),
}
self._knowledge[category_id].append(knowledge_item)
self._save()
return True
except Exception:
return False
def get_category_knowledge(self, category_id: str) -> List[Dict[str, Any]]:
"""
获取某个分类的所有了解信息。
Args:
category_id: 分类编号
Returns:
该分类的所有了解信息
"""
return self._knowledge.get(category_id, [])
def get_all_knowledge(self) -> Dict[str, List[Dict[str, Any]]]:
"""获取所有了解信息"""
return self._knowledge
def get_category_name(self, category_id: str) -> str:
"""获取分类名称"""
return KNOWLEDGE_CATEGORIES.get(category_id, "未知分类")
def get_categories_summary(self) -> str:
"""获取所有分类的摘要(用于 LLM 展示)"""
lines = []
for category_id, category_name in KNOWLEDGE_CATEGORIES.items():
count = len(self._knowledge.get(category_id, []))
if count > 0:
lines.append(f"{category_id}. {category_name} ({count}条)")
else:
lines.append(f"{category_id}. {category_name} (无数据)")
return "\n".join(lines)
def get_formatted_knowledge(self, category_ids: List[str]) -> str:
"""
获取指定分类的了解内容,格式化为文本。
Args:
category_ids: 分类编号列表
Returns:
格式化后的了解内容文本
"""
parts = []
for category_id in category_ids:
category_name = self.get_category_name(category_id)
items = self.get_category_knowledge(category_id)
if items:
parts.append(f"{category_name}")
for item in items:
content = item.get("content", "")
parts.append(f" - {content}")
return "\n".join(parts) if parts else "暂无相关了解信息"
def get_stats(self) -> Dict[str, Any]:
"""获取了解数据统计信息"""
total_items = sum(len(items) for items in self._knowledge.values())
return {
"total_categories": len(KNOWLEDGE_CATEGORIES),
"total_items": total_items,
"data_file": str(KNOWLEDGE_FILE),
"data_exists": KNOWLEDGE_FILE.exists(),
"data_size_kb": KNOWLEDGE_FILE.stat().st_size / 1024 if KNOWLEDGE_FILE.exists() else 0,
}
# 全局单例
_knowledge_store_instance: Optional[KnowledgeStore] = None
def get_knowledge_store() -> KnowledgeStore:
"""获取了解存储实例(单例模式)"""
global _knowledge_store_instance
if _knowledge_store_instance is None:
_knowledge_store_instance = KnowledgeStore()
return _knowledge_store_instance

View File

@@ -13,9 +13,10 @@ from src.common.data_models.mai_message_data_model import GroupInfo, UserInfo
from src.common.logger import get_logger from src.common.logger import get_logger
from src.common.utils.utils_config import ExpressionConfigUtils from src.common.utils.utils_config import ExpressionConfigUtils
from src.config.config import global_config from src.config.config import global_config
from src.mcp_module import MCPManager from src.know_u.knowledge import KnowledgeLearner
from src.learners.expression_learner import ExpressionLearner from src.learners.expression_learner import ExpressionLearner
from src.learners.jargon_miner import JargonMiner from src.learners.jargon_miner import JargonMiner
from src.mcp_module import MCPManager
from .chat_loop_service import MaisakaChatLoopService from .chat_loop_service import MaisakaChatLoopService
from .reasoning_engine import MaisakaReasoningEngine from .reasoning_engine import MaisakaReasoningEngine
@@ -66,9 +67,11 @@ class MaisakaHeartFlowChatting:
self._enable_jargon_learning = jargon_learn self._enable_jargon_learning = jargon_learn
self._min_messages_for_extraction = 10 self._min_messages_for_extraction = 10
self._min_extraction_interval = 30 self._min_extraction_interval = 30
self._last_extraction_time = 0.0 self._last_expression_extraction_time = 0.0
self._last_knowledge_extraction_time = 0.0
self._expression_learner = ExpressionLearner(session_id) self._expression_learner = ExpressionLearner(session_id)
self._jargon_miner = JargonMiner(session_id, session_name=session_name) self._jargon_miner = JargonMiner(session_id, session_name=session_name)
self._knowledge_learner = KnowledgeLearner(session_id)
self._reasoning_engine = MaisakaReasoningEngine(self) self._reasoning_engine = MaisakaReasoningEngine(self)
@@ -157,7 +160,7 @@ class MaisakaHeartFlowChatting:
if not cached_messages: if not cached_messages:
break break
await self._internal_turn_queue.put(cached_messages) await self._internal_turn_queue.put(cached_messages)
asyncio.create_task(self._trigger_expression_learning(cached_messages)) asyncio.create_task(self._trigger_batch_learning(cached_messages))
except asyncio.CancelledError: except asyncio.CancelledError:
logger.info(f"{self.log_prefix} Maisaka runtime loop cancelled") logger.info(f"{self.log_prefix} Maisaka runtime loop cancelled")
@@ -223,6 +226,18 @@ class MaisakaHeartFlowChatting:
self._agent_state = self._STATE_STOP self._agent_state = self._STATE_STOP
self._wait_until = None self._wait_until = None
async def _trigger_batch_learning(self, messages: list[SessionMessage]) -> None:
"""按同一批消息触发表达方式、黑话和 knowledge 学习。"""
expression_result, knowledge_result = await asyncio.gather(
self._trigger_expression_learning(messages),
self._trigger_knowledge_learning(messages),
return_exceptions=True,
)
if isinstance(expression_result, Exception):
logger.error(f"{self.log_prefix} expression learning task crashed: {expression_result}")
if isinstance(knowledge_result, Exception):
logger.error(f"{self.log_prefix} knowledge learning task crashed: {knowledge_result}")
async def _trigger_expression_learning(self, messages: list[SessionMessage]) -> None: async def _trigger_expression_learning(self, messages: list[SessionMessage]) -> None:
"""Trigger expression learning from the newly collected batch.""" """Trigger expression learning from the newly collected batch."""
self._expression_learner.add_messages(messages) self._expression_learner.add_messages(messages)
@@ -231,7 +246,7 @@ class MaisakaHeartFlowChatting:
logger.debug(f"{self.log_prefix} expression learning disabled, skip this batch") logger.debug(f"{self.log_prefix} expression learning disabled, skip this batch")
return return
elapsed = time.time() - self._last_extraction_time elapsed = time.time() - self._last_expression_extraction_time
if elapsed < self._min_extraction_interval: if elapsed < self._min_extraction_interval:
logger.debug( logger.debug(
f"{self.log_prefix} expression learning interval not reached: " f"{self.log_prefix} expression learning interval not reached: "
@@ -248,7 +263,7 @@ class MaisakaHeartFlowChatting:
) )
return return
self._last_extraction_time = time.time() self._last_expression_extraction_time = time.time()
logger.info( logger.info(
f"{self.log_prefix} starting expression learning: " f"{self.log_prefix} starting expression learning: "
f"new_batch={len(messages)} learner_cache={cache_size} " f"new_batch={len(messages)} learner_cache={cache_size} "
@@ -266,6 +281,47 @@ class MaisakaHeartFlowChatting:
except Exception: except Exception:
logger.exception(f"{self.log_prefix} expression learning failed") logger.exception(f"{self.log_prefix} expression learning failed")
async def _trigger_knowledge_learning(self, messages: list[SessionMessage]) -> None:
"""Trigger knowledge learning from the newly collected batch."""
self._knowledge_learner.add_messages(messages)
if not global_config.maisaka.enable_knowledge_module:
logger.debug(f"{self.log_prefix} knowledge learning disabled, skip this batch")
return
elapsed = time.time() - self._last_knowledge_extraction_time
if elapsed < self._min_extraction_interval:
logger.debug(
f"{self.log_prefix} knowledge learning interval not reached: "
f"elapsed={elapsed:.2f}s threshold={self._min_extraction_interval}s"
)
return
cache_size = self._knowledge_learner.get_cache_size()
if cache_size < self._min_messages_for_extraction:
logger.debug(
f"{self.log_prefix} knowledge learning skipped due to cache size: "
f"learner_cache={cache_size} threshold={self._min_messages_for_extraction} "
f"message_cache_total={len(self.message_cache)}"
)
return
self._last_knowledge_extraction_time = time.time()
logger.info(
f"{self.log_prefix} starting knowledge learning: "
f"new_batch={len(messages)} learner_cache={cache_size} "
f"message_cache_total={len(self.message_cache)}"
)
try:
added_count = await self._knowledge_learner.learn()
if added_count > 0:
logger.info(f"{self.log_prefix} knowledge learning finished: added={added_count}")
else:
logger.debug(f"{self.log_prefix} knowledge learning finished without usable result")
except Exception:
logger.exception(f"{self.log_prefix} knowledge learning failed")
async def _init_mcp(self) -> None: async def _init_mcp(self) -> None:
"""Initialize MCP tools and inject them into the planner.""" """Initialize MCP tools and inject them into the planner."""
config_path = Path(__file__).resolve().parents[2] / "config" / "mcp_config.json" config_path = Path(__file__).resolve().parents[2] / "config" / "mcp_config.json"

View File

@@ -12,10 +12,10 @@ import os
from rich.panel import Panel from rich.panel import Panel
from src.chat.message_receive.message import SessionMessage from src.chat.message_receive.message import SessionMessage
from src.cli.console import console
from src.cli.input_reader import InputReader
from src.llm_models.payload_content.tool_option import ToolCall from src.llm_models.payload_content.tool_option import ToolCall
from .console import console
from .input_reader import InputReader
from .message_adapter import build_message from .message_adapter import build_message
if TYPE_CHECKING: if TYPE_CHECKING:

View File

@@ -27,7 +27,7 @@ from typing import Optional
import json import json
import os import os
from src.maisaka.console import console from src.cli.console import console
DEFAULT_MCP_CONFIG_PATH = Path(__file__).resolve().parents[2] / "config" / "mcp_config.json" DEFAULT_MCP_CONFIG_PATH = Path(__file__).resolve().parents[2] / "config" / "mcp_config.json"

View File

@@ -6,7 +6,7 @@ MaiSaka - 单个 MCP 服务器连接管理
from contextlib import AsyncExitStack from contextlib import AsyncExitStack
from typing import Any, Optional from typing import Any, Optional
from src.maisaka.console import console from src.cli.console import console
from .config import MCPServerConfig from .config import MCPServerConfig

View File

@@ -5,7 +5,7 @@ MaiSaka - MCP 管理器
from typing import Optional from typing import Optional
from src.maisaka.console import console from src.cli.console import console
from .config import DEFAULT_MCP_CONFIG_PATH, MCPServerConfig, load_mcp_config from .config import DEFAULT_MCP_CONFIG_PATH, MCPServerConfig, load_mcp_config
from .connection import MCPConnection, MCP_AVAILABLE from .connection import MCPConnection, MCP_AVAILABLE