197 lines
6.0 KiB
Python
197 lines
6.0 KiB
Python
"""
|
|
MaiSaka - 了解列表持久化存储
|
|
存储用户个人特征信息,支持层级结构和本地持久化。
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any
|
|
from datetime import datetime
|
|
|
|
# 数据目录 - 项目根目录下的 mai_knowledge
|
|
PROJECT_ROOT = Path(os.path.dirname(os.path.abspath(__file__)))
|
|
KNOWLEDGE_DATA_DIR = PROJECT_ROOT / "mai_knowledge"
|
|
KNOWLEDGE_FILE = KNOWLEDGE_DATA_DIR / "knowledge.json"
|
|
|
|
|
|
# 个人特征分类列表(预定义)
|
|
KNOWLEDGE_CATEGORIES = {
|
|
"1": "性别",
|
|
"2": "性格",
|
|
"3": "饮食口味",
|
|
"4": "交友喜好",
|
|
"5": "情绪/理性倾向",
|
|
"6": "兴趣爱好",
|
|
"7": "职业/专业",
|
|
"8": "生活习惯",
|
|
"9": "价值观",
|
|
"10": "沟通风格",
|
|
"11": "学习方式",
|
|
"12": "压力应对方式",
|
|
}
|
|
|
|
|
|
class KnowledgeStore:
|
|
"""
|
|
了解列表存储。
|
|
|
|
特性:
|
|
- 持久化到 JSON 文件
|
|
- 层级结构存储(按分类)
|
|
- 支持增量更新
|
|
- 启动时自动加载
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""初始化了解存储"""
|
|
self._knowledge: Dict[str, List[Dict[str, Any]]] = {
|
|
category_id: [] for category_id in KNOWLEDGE_CATEGORIES
|
|
}
|
|
self._ensure_data_dir()
|
|
self._load()
|
|
|
|
def _ensure_data_dir(self):
|
|
"""确保数据目录存在"""
|
|
KNOWLEDGE_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _load(self):
|
|
"""从文件加载了解数据"""
|
|
if not KNOWLEDGE_FILE.exists():
|
|
self._knowledge = {
|
|
category_id: [] for category_id in KNOWLEDGE_CATEGORIES
|
|
}
|
|
return
|
|
|
|
try:
|
|
with open(KNOWLEDGE_FILE, "r", encoding="utf-8") as f:
|
|
loaded = json.load(f)
|
|
# 确保所有分类都存在
|
|
for category_id in KNOWLEDGE_CATEGORIES:
|
|
if category_id not in loaded:
|
|
loaded[category_id] = []
|
|
self._knowledge = loaded
|
|
except Exception as e:
|
|
print(f"[warning]加载了解数据失败: {e}[/warning]")
|
|
self._knowledge = {
|
|
category_id: [] for category_id in KNOWLEDGE_CATEGORIES
|
|
}
|
|
|
|
def _save(self):
|
|
"""保存了解数据到文件"""
|
|
try:
|
|
with open(KNOWLEDGE_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(self._knowledge, f, ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
print(f"[warning]保存了解数据失败: {e}[/warning]")
|
|
|
|
def add_knowledge(
|
|
self,
|
|
category_id: str,
|
|
content: str,
|
|
metadata: Optional[Dict[str, Any]] = None,
|
|
) -> bool:
|
|
"""
|
|
添加一条了解信息。
|
|
|
|
Args:
|
|
category_id: 分类编号
|
|
content: 了解内容
|
|
metadata: 元数据
|
|
|
|
Returns:
|
|
是否添加成功
|
|
"""
|
|
if category_id not in KNOWLEDGE_CATEGORIES:
|
|
return False
|
|
|
|
try:
|
|
knowledge_item = {
|
|
"id": f"know_{category_id}_{datetime.now().timestamp()}",
|
|
"content": content,
|
|
"metadata": metadata or {},
|
|
"created_at": datetime.now().isoformat(),
|
|
}
|
|
self._knowledge[category_id].append(knowledge_item)
|
|
self._save()
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
def get_category_knowledge(self, category_id: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
获取某个分类的所有了解信息。
|
|
|
|
Args:
|
|
category_id: 分类编号
|
|
|
|
Returns:
|
|
该分类的所有了解信息
|
|
"""
|
|
return self._knowledge.get(category_id, [])
|
|
|
|
def get_all_knowledge(self) -> Dict[str, List[Dict[str, Any]]]:
|
|
"""获取所有了解信息"""
|
|
return self._knowledge
|
|
|
|
def get_category_name(self, category_id: str) -> str:
|
|
"""获取分类名称"""
|
|
return KNOWLEDGE_CATEGORIES.get(category_id, "未知分类")
|
|
|
|
def get_categories_summary(self) -> str:
|
|
"""获取所有分类的摘要(用于 LLM 展示)"""
|
|
lines = []
|
|
for category_id, category_name in KNOWLEDGE_CATEGORIES.items():
|
|
count = len(self._knowledge.get(category_id, []))
|
|
if count > 0:
|
|
lines.append(f"{category_id}. {category_name} ({count}条)")
|
|
else:
|
|
lines.append(f"{category_id}. {category_name} (无数据)")
|
|
return "\n".join(lines)
|
|
|
|
def get_formatted_knowledge(self, category_ids: List[str]) -> str:
|
|
"""
|
|
获取指定分类的了解内容,格式化为文本。
|
|
|
|
Args:
|
|
category_ids: 分类编号列表
|
|
|
|
Returns:
|
|
格式化后的了解内容文本
|
|
"""
|
|
parts = []
|
|
for category_id in category_ids:
|
|
category_name = self.get_category_name(category_id)
|
|
items = self.get_category_knowledge(category_id)
|
|
|
|
if items:
|
|
parts.append(f"【{category_name}】")
|
|
for item in items:
|
|
content = item.get("content", "")
|
|
parts.append(f" - {content}")
|
|
|
|
return "\n".join(parts) if parts else "暂无相关了解信息"
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""获取了解数据统计信息"""
|
|
total_items = sum(len(items) for items in self._knowledge.values())
|
|
return {
|
|
"total_categories": len(KNOWLEDGE_CATEGORIES),
|
|
"total_items": total_items,
|
|
"data_file": str(KNOWLEDGE_FILE),
|
|
"data_exists": KNOWLEDGE_FILE.exists(),
|
|
"data_size_kb": KNOWLEDGE_FILE.stat().st_size / 1024 if KNOWLEDGE_FILE.exists() else 0,
|
|
}
|
|
|
|
|
|
# 全局单例
|
|
_knowledge_store_instance: Optional[KnowledgeStore] = None
|
|
|
|
|
|
def get_knowledge_store() -> KnowledgeStore:
|
|
"""获取了解存储实例(单例模式)"""
|
|
global _knowledge_store_instance
|
|
if _knowledge_store_instance is None:
|
|
_knowledge_store_instance = KnowledgeStore()
|
|
return _knowledge_store_instance
|