diff --git a/.gitignore b/.gitignore
index a70c66cd..26510818 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,7 @@ mongodb/
NapCat.Framework.Windows.Once/
log/
src/plugins/memory
-src/plugins/chat/bot_config.toml
+config/bot_config.toml
/test
message_queue_content.txt
message_queue_content.bat
diff --git a/README.md b/README.md
index 3e2da652..a85fcc4e 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
-
+


@@ -16,11 +16,19 @@
基于llm、napcat、nonebot和mongodb的专注于群聊天的qqbot
+
+
> ⚠️ **警告**:代码可能随时更改,目前版本不一定是稳定版本
> ⚠️ **警告**:请自行了解qqbot的风险,麦麦有时候一天被腾讯肘七八次
> ⚠️ **警告**:由于麦麦一直在迭代,所以可能存在一些bug,请自行测试,包括胡言乱语(
-关于麦麦的开发和部署相关的讨论群(不建议发布无关消息)这里不会有麦麦发言!
+关于麦麦的开发和建议相关的讨论群(不建议发布无关消息)这里不会有麦麦发言!
## 开发计划TODO:LIST
@@ -29,6 +37,10 @@
- 对思考链长度限制
- 修复已知bug
- 完善文档
+- 修复转发
+- config自动生成和检测
+- log别用print
+- 给发送消息写专门的类
@@ -52,11 +64,9 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
#### 手动运行
1. **创建Python环境**
- 推荐使用conda或其他环境管理来管理你的python环境
+ 推荐使用conda或其他虚拟环境进行依赖安装,防止出现依赖版本冲突问题
```bash
- # 安装requirements(还没检查好,可能有包漏了)
- conda activate 你的环境
- cd 对应路径
+ # 安装requirements
pip install -r requirements.txt
```
2. **MongoDB设置**
@@ -68,8 +78,8 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
- 在Napcat的网络设置中添加ws反向代理:ws://localhost:8080/onebot/v11/ws
4. **配置文件设置**
- - 把env.example改成.env,并填上你的apikey(硅基流动或deepseekapi)
- - 把bot_config_toml改名为bot_config.toml,并填写相关内容,不然无法正常运行
+ - 将.env文件打开,填上你的apikey(硅基流动或deepseekapi)
+ - 将bot_config.toml文件打开,并填写相关内容,不然无法正常运行
#### .env 文件配置说明
```ini
@@ -92,14 +102,10 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
MONGODB_PASSWORD="" # MongoDB密码(可选)
MONGODB_AUTH_SOURCE="" # MongoDB认证源(可选)
- # API密钥配置
- CHAT_ANY_WHERE_KEY= # ChatAnyWhere API密钥
- SILICONFLOW_KEY= # 硅基流动 API密钥(必填)
- DEEP_SEEK_KEY= # DeepSeek API密钥(必填)
-
- # API地址配置
- CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
+ #api配置项,建议siliconflow必填,识图需要这个
+ SILICONFLOW_KEY=
SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/
+ DEEP_SEEK_KEY=
DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1
```
@@ -158,9 +164,8 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
```
5. **运行麦麦**
+ 在含有bot.py程序的目录下运行(如果使用了虚拟环境需要先进入虚拟环境)
```bash
- conda activate 你的环境
- cd 对应路径
nb run
```
6. **运行其他组件**
@@ -205,3 +210,13 @@ NAPCAT_UID=$(id -u) NAPCAT_GID=$(id -g) docker compose restart
纯编程外行,面向cursor编程,很多代码史一样多多包涵
> ⚠️ **警告**:本应用生成内容来自人工智能模型,由 AI 生成,请仔细甄别,请勿用于违反法律的用途,AI生成内容不代表本人观点和立场。
+
+## 致谢
+[nonebot2](https://github.com/nonebot/nonebot2): 跨平台 Python 异步聊天机器人框架
+[NapCat](https://github.com/NapNeko/NapCatQQ): 现代化的基于 NTQQ 的 Bot 协议端实现
+
+### 贡献者
+
+感谢各位大佬!
+
+[](https://github.com/SengokuCola/MaiMBot/graphs/contributors)
diff --git a/src/plugins/chat/bot_config_toml b/config/bot_config_toml
similarity index 65%
rename from src/plugins/chat/bot_config_toml
rename to config/bot_config_toml
index fe6b702d..b5011c7f 100644
--- a/src/plugins/chat/bot_config_toml
+++ b/config/bot_config_toml
@@ -7,8 +7,8 @@ password = "" # 默认空值
auth_source = "" # 默认空值
[bot]
-qq = #填入你的机器人QQ
-nickname = "麦麦"
+qq = 123456 #填入你的机器人QQ
+nickname = "麦麦" #你希望bot被称呼的名字
[message]
min_text_length = 2 # 与麦麦聊天时麦麦只会回答文本大于等于此数的消息
@@ -24,11 +24,16 @@ enable_pic_translate = false
[response]
-api_using = "siliconflow" # 选择大模型API
+api_using = "siliconflow" # 选择大模型API,可选值为siliconflow,deepseek,建议使用siliconflow,因为识图api目前只支持siliconflow的deepseek-vl2模型
model_r1_probability = 0.8 # 麦麦回答时选择R1模型的概率
model_v3_probability = 0.1 # 麦麦回答时选择V3模型的概率
model_r1_distill_probability = 0.1 # 麦麦回答时选择R1蒸馏模型的概率
+[memory]
+build_memory_interval = 300 # 记忆构建间隔
+
+
+
[others]
enable_advance_output = true # 开启后输出更多日志,false关闭true开启
@@ -36,13 +41,13 @@ enable_advance_output = true # 开启后输出更多日志,false关闭true开启
[groups]
talk_allowed = [
- #可以回复消息的群
-]
+ 123456,12345678
+] #可以回复消息的群
talk_frequency_down = [
- #降低回复频率的群
-]
+ 123456,12345678
+] #降低回复频率的群
ban_user_id = [
- #禁止回复消息的QQ号
-]
+ 123456,12345678
+] #禁止回复消息的QQ号
diff --git a/env.example b/config/env.example
similarity index 82%
rename from env.example
rename to config/env.example
index c8ed650d..9988d58f 100644
--- a/env.example
+++ b/config/env.example
@@ -15,10 +15,8 @@ MONGODB_USERNAME = "" # 默认空值
MONGODB_PASSWORD = "" # 默认空值
MONGODB_AUTH_SOURCE = "" # 默认空值
-#key and url
-CHAT_ANY_WHERE_KEY=
+#api配置项
SILICONFLOW_KEY=
-CHAT_ANY_WHERE_BASE_URL=https://api.chatanywhere.tech/v1
SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1/
DEEP_SEEK_KEY=
DEEP_SEEK_BASE_URL=https://api.deepseek.com/v1
diff --git a/knowledge.bat b/knowledge.bat
deleted file mode 100644
index e6ad209e..00000000
--- a/knowledge.bat
+++ /dev/null
@@ -1,5 +0,0 @@
-call conda activate niuniu
-cd "C:\GitHub\MegMeg-bot"
-
-REM 执行nb run命令
-nb run
\ No newline at end of file
diff --git a/src/plugins/chat/__init__.py b/src/plugins/chat/__init__.py
index 7a3e2c75..1c25a24f 100644
--- a/src/plugins/chat/__init__.py
+++ b/src/plugins/chat/__init__.py
@@ -1,3 +1,4 @@
+from loguru import logger
from nonebot import on_message, on_command, require, get_driver
from nonebot.adapters.onebot.v11 import Bot, GroupMessageEvent, Message, MessageSegment
from nonebot.typing import T_State
@@ -10,7 +11,6 @@ from .relationship_manager import relationship_manager
from ..schedule.schedule_generator import bot_schedule
from .willing_manager import willing_manager
-
# 获取驱动器
driver = get_driver()
@@ -19,8 +19,7 @@ Database.initialize(
global_config.MONGODB_PORT,
global_config.DATABASE_NAME
)
-
-print("\033[1;32m[初始化配置和数据库完成]\033[0m")
+print("\033[1;32m[初始化数据库完成]\033[0m")
# 导入其他模块
@@ -28,6 +27,7 @@ from .bot import ChatBot
from .emoji_manager import emoji_manager
from .message_send_control import message_sender
from .relationship_manager import relationship_manager
+from ..memory_system.memory import memory_graph,hippocampus
# 初始化表情管理器
emoji_manager.initialize()
@@ -35,21 +35,26 @@ emoji_manager.initialize()
print(f"\033[1;32m正在唤醒{global_config.BOT_NICKNAME}......\033[0m")
# 创建机器人实例
chat_bot = ChatBot(global_config)
-
# 注册消息处理器
group_msg = on_message()
-
# 创建定时任务
scheduler = require("nonebot_plugin_apscheduler").scheduler
-# 启动后台任务
+
+
@driver.on_startup
async def start_background_tasks():
"""启动后台任务"""
# 只启动表情包管理任务
asyncio.create_task(emoji_manager.start_periodic_check(interval_MINS=global_config.EMOJI_CHECK_INTERVAL))
-
bot_schedule.print_schedule()
+
+@driver.on_startup
+async def init_relationships():
+ """在 NoneBot2 启动时初始化关系管理器"""
+ print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...")
+ await relationship_manager.load_all_relationships()
+ asyncio.create_task(relationship_manager._start_relationship_manager())
@driver.on_bot_connect
async def _(bot: Bot):
@@ -64,19 +69,23 @@ async def _(bot: Bot):
print("\033[1;38;5;208m-----------开始偷表情包!-----------\033[0m")
# 启动消息发送控制任务
-@driver.on_startup
-async def init_relationships():
- """在 NoneBot2 启动时初始化关系管理器"""
- print("\033[1;32m[初始化]\033[0m 正在加载用户关系数据...")
- await relationship_manager.load_all_relationships()
- asyncio.create_task(relationship_manager._start_relationship_manager())
-
@group_msg.handle()
async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
await chat_bot.handle_message(event, bot)
-
+
+'''
@scheduler.scheduled_job("interval", seconds=300000, id="monitor_relationships")
async def monitor_relationships():
"""每15秒打印一次关系数据"""
relationship_manager.print_all_relationships()
+'''
+# 添加build_memory定时任务
+@scheduler.scheduled_job("interval", seconds=global_config.build_memory_interval, id="build_memory")
+async def build_memory_task():
+ """每30秒执行一次记忆构建"""
+ print("\033[1;32m[记忆构建]\033[0m 开始构建记忆...")
+ hippocampus.build_memory(chat_size=12)
+ print("\033[1;32m[记忆构建]\033[0m 记忆构建完成")
+
+
diff --git a/src/plugins/chat/bot.py b/src/plugins/chat/bot.py
index efa8e101..1b520164 100644
--- a/src/plugins/chat/bot.py
+++ b/src/plugins/chat/bot.py
@@ -5,7 +5,7 @@ from .storage import MessageStorage
from .llm_generator import LLMResponseGenerator
from .message_stream import MessageStream, MessageStreamContainer
from .topic_identifier import topic_identifier
-from random import random
+from random import random, choice
from .emoji_manager import emoji_manager # 导入表情包管理器
import time
import os
@@ -15,6 +15,7 @@ from .message import Message_Thinking # 导入 Message_Thinking 类
from .relationship_manager import relationship_manager
from .willing_manager import willing_manager # 导入意愿管理器
from .utils import is_mentioned_bot_in_txt, calculate_typing_time
+from ..memory_system.memory import memory_graph
class ChatBot:
def __init__(self, config: BotConfig):
@@ -82,7 +83,7 @@ class ChatBot:
await relationship_manager.update_relationship(user_id = event.user_id, data = sender_info)
await relationship_manager.update_relationship_value(user_id = event.user_id, relationship_value = 0.5)
- print(f"\033[1;32m[关系管理]\033[0m 更新关系值: {relationship_manager.get_relationship(event.user_id).relationship_value}")
+ # print(f"\033[1;32m[关系管理]\033[0m 更新关系值: {relationship_manager.get_relationship(event.user_id).relationship_value}")
message = Message(
@@ -99,9 +100,19 @@ class ChatBot:
topic = topic_identifier.identify_topic_jieba(message.processed_plain_text)
print(f"\033[1;32m[主题识别]\033[0m 主题: {topic}")
+ all_num = 0
+ interested_num = 0
+ if topic:
+ for current_topic in topic:
+ all_num += 1
+ first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2)
+ if first_layer_items:
+ interested_num += 1
+ print(f"\033[1;32m[前额叶]\033[0m 对|{current_topic}|有印象")
+ interested_rate = interested_num / all_num if all_num > 0 else 0
+
await self.storage.store_message(message, topic[0] if topic else None)
-
is_mentioned = is_mentioned_bot_in_txt(message.processed_plain_text)
@@ -111,7 +122,8 @@ class ChatBot:
is_mentioned,
self.config,
event.user_id,
- message.is_emoji
+ message.is_emoji,
+ interested_rate
)
current_willing = willing_manager.get_willing(event.group_id)
@@ -182,7 +194,8 @@ class ChatBot:
user_nickname=global_config.BOT_NICKNAME,
group_name=message.group_name,
time=bot_response_time,
- is_emoji=True
+ is_emoji=True,
+ translate_cq=False
)
message_sender.send_temp_container.add_message(bot_message)
diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py
index b9965470..05d49278 100644
--- a/src/plugins/chat/config.py
+++ b/src/plugins/chat/config.py
@@ -5,6 +5,9 @@ from nonebot.log import logger, default_format
import logging
import configparser
import tomli
+import sys
+from loguru import logger
+from dotenv import load_dotenv
@@ -20,7 +23,7 @@ class BotConfig:
MONGODB_PASSWORD: Optional[str] = None # 默认空值
MONGODB_AUTH_SOURCE: Optional[str] = None # 默认空值
- BOT_QQ: Optional[int] = None
+ BOT_QQ: Optional[int] = 1
BOT_NICKNAME: Optional[str] = None
# 消息处理相关配置
@@ -34,6 +37,7 @@ class BotConfig:
talk_frequency_down_groups = set()
ban_user_id = set()
+ build_memory_interval: int = 60 # 记忆构建间隔(秒)
EMOJI_CHECK_INTERVAL: int = 120 # 表情包检查间隔(分钟)
EMOJI_REGISTER_INTERVAL: int = 10 # 表情包注册间隔(分钟)
@@ -44,9 +48,21 @@ class BotConfig:
enable_advance_output: bool = False # 是否启用高级输出
+ @staticmethod
+ def get_default_config_path() -> str:
+ """获取默认配置文件路径"""
+ current_dir = os.path.dirname(os.path.abspath(__file__))
+ root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
+ config_dir = os.path.join(root_dir, 'config')
+ return os.path.join(config_dir, 'bot_config.toml')
+
@classmethod
- def load_config(cls, config_path: str = "bot_config.toml") -> "BotConfig":
+ def load_config(cls, config_path: str = None) -> "BotConfig":
"""从TOML配置文件加载配置"""
+ if config_path is None:
+ config_path = cls.get_default_config_path()
+ logger.info(f"使用默认配置文件路径: {config_path}")
+
config = cls()
if os.path.exists(config_path):
with open(config_path, "rb") as f:
@@ -92,6 +108,10 @@ class BotConfig:
config.MAX_CONTEXT_SIZE = msg_config.get("max_context_size", config.MAX_CONTEXT_SIZE)
config.emoji_chance = msg_config.get("emoji_chance", config.emoji_chance)
+ if "memory" in toml_dict:
+ memory_config = toml_dict["memory"]
+ config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval)
+
# 群组配置
if "groups" in toml_dict:
groups_config = toml_dict["groups"]
@@ -103,16 +123,26 @@ class BotConfig:
others_config = toml_dict["others"]
config.enable_advance_output = others_config.get("enable_advance_output", config.enable_advance_output)
- print(f"\033[1;32m成功加载配置文件: {config_path}\033[0m")
+ logger.success(f"成功加载配置文件: {config_path}")
return config
-global_config = BotConfig.load_config("./src/plugins/chat/bot_config.toml")
+# 获取配置文件路径
+bot_config_path = BotConfig.get_default_config_path()
+config_dir = os.path.dirname(bot_config_path)
+env_path = os.path.join(config_dir, '.env')
-from dotenv import load_dotenv
-current_dir = os.path.dirname(os.path.abspath(__file__))
-root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
-load_dotenv(os.path.join(root_dir, '.env'))
+logger.info(f"尝试从 {bot_config_path} 加载机器人配置")
+global_config = BotConfig.load_config(config_path=bot_config_path)
+
+# 加载环境变量
+
+logger.info(f"尝试从 {env_path} 加载环境变量配置")
+if os.path.exists(env_path):
+ load_dotenv(env_path)
+ logger.success("成功加载环境变量配置")
+else:
+ logger.error(f"环境变量配置文件不存在: {env_path}")
@dataclass
class LLMConfig:
@@ -131,10 +161,5 @@ llm_config.DEEP_SEEK_BASE_URL = os.getenv('DEEP_SEEK_BASE_URL')
if not global_config.enable_advance_output:
- logger.remove()
-
- logging.getLogger('nonebot').handlers.clear()
- console_handler = logging.StreamHandler()
- console_handler.setLevel(logging.WARNING) # 只输出 WARNING 及以上级别
- logging.getLogger('nonebot').addHandler(console_handler)
- logging.getLogger('nonebot').setLevel(logging.WARNING)
+ # logger.remove()
+ pass
diff --git a/src/plugins/chat/llm_generator.py b/src/plugins/chat/llm_generator.py
index bb68d361..2ea4d7f2 100644
--- a/src/plugins/chat/llm_generator.py
+++ b/src/plugins/chat/llm_generator.py
@@ -4,7 +4,7 @@ import asyncio
import requests
from functools import partial
from .message import Message
-from .config import BotConfig
+from .config import BotConfig, global_config
from ...common.database import Database
import random
import time
@@ -255,4 +255,4 @@ class LLMResponseGenerator:
return processed_response, emotion_tags
# 创建全局实例
-llm_response = LLMResponseGenerator(config=BotConfig())
\ No newline at end of file
+llm_response = LLMResponseGenerator(global_config)
\ No newline at end of file
diff --git a/src/plugins/chat/message.py b/src/plugins/chat/message.py
index 2e91f530..f5ea0db0 100644
--- a/src/plugins/chat/message.py
+++ b/src/plugins/chat/message.py
@@ -6,17 +6,13 @@ import os
from datetime import datetime
from ...common.database import Database
from PIL import Image
-from .config import BotConfig, global_config
+from .config import global_config
import urllib3
from .utils_user import get_user_nickname
from .utils_cq import parse_cq_code
from .cq_code import cq_code_tool,CQCode
Message = ForwardRef('Message') # 添加这行
-
-# 加载配置
-bot_config = BotConfig.load_config()
-
# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -48,6 +44,8 @@ class Message:
is_emoji: bool = False # 是否是表情包
has_emoji: bool = False # 是否包含表情包
+
+ translate_cq: bool = True # 是否翻译cq码
reply_benefits: float = 0.0
@@ -99,7 +97,7 @@ class Message:
- cq_code_list:分割出的聊天对象,包括文本和CQ码
- trans_list:翻译后的对象列表
"""
- print(f"\033[1;34m[调试信息]\033[0m 正在处理消息: {message}")
+ # print(f"\033[1;34m[调试信息]\033[0m 正在处理消息: {message}")
cq_code_dict_list = []
trans_list = []
diff --git a/src/plugins/chat/message_send_control.py b/src/plugins/chat/message_send_control.py
index c5bd122f..0ddb79c5 100644
--- a/src/plugins/chat/message_send_control.py
+++ b/src/plugins/chat/message_send_control.py
@@ -184,7 +184,7 @@ class MessageSendControl:
message.update_thinking_time()
thinking_time = message.thinking_time
if thinking_time < 90: # 最少思考2秒
- if int(thinking_time) % 10 == 0:
+ if int(thinking_time) % 15 == 0:
print(f"\033[1;34m[调试]\033[0m 消息正在思考中,已思考{thinking_time:.1f}秒")
return
else:
@@ -208,7 +208,15 @@ class MessageSendControl:
print(f"\033[1;34m[调试]\033[0m 消息发送时间: {cost_time}秒")
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(message.time))
print(f"\033[1;32m群 {group_id} 消息, 用户 {global_config.BOT_NICKNAME}, 时间: {current_time}:\033[0m {str(message.processed_plain_text)}")
- await self.storage.store_message(message, None)
+
+ if message.is_emoji:
+ message.processed_plain_text = "[表情包]"
+ await self.storage.store_message(message, None)
+ else:
+ await self.storage.store_message(message, None)
+
+
+
queue.update_send_time()
if queue.has_messages():
await asyncio.sleep(
diff --git a/src/plugins/chat/prompt_builder.py b/src/plugins/chat/prompt_builder.py
index 5572ae1d..da9037cf 100644
--- a/src/plugins/chat/prompt_builder.py
+++ b/src/plugins/chat/prompt_builder.py
@@ -6,6 +6,9 @@ import os
from .utils import get_embedding, combine_messages, get_recent_group_detailed_plain_text
from ...common.database import Database
from .config import global_config
+from .topic_identifier import topic_identifier
+from ..memory_system.memory import memory_graph
+from random import choice
# 获取当前文件的绝对路径
current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -35,6 +38,59 @@ class PromptBuilder:
Returns:
str: 构建好的prompt
"""
+
+
+ memory_prompt = ''
+ start_time = time.time() # 记录开始时间
+ topic = topic_identifier.identify_topic_jieba(message_txt)
+ # print(f"\033[1;32m[pb主题识别]\033[0m 主题: {topic}")
+
+ all_first_layer_items = [] # 存储所有第一层记忆
+ all_second_layer_items = {} # 用字典存储每个topic的第二层记忆
+ overlapping_second_layer = set() # 存储重叠的第二层记忆
+
+ if topic:
+ # 遍历所有topic
+ for current_topic in topic:
+ first_layer_items, second_layer_items = memory_graph.get_related_item(current_topic, depth=2)
+ # if first_layer_items:
+ # print(f"\033[1;32m[前额叶]\033[0m 主题 '{current_topic}' 的第一层记忆: {first_layer_items}")
+
+ # 记录第一层数据
+ all_first_layer_items.extend(first_layer_items)
+
+ # 记录第二层数据
+ all_second_layer_items[current_topic] = second_layer_items
+
+ # 检查是否有重叠的第二层数据
+ for other_topic, other_second_layer in all_second_layer_items.items():
+ if other_topic != current_topic:
+ # 找到重叠的记忆
+ overlap = set(second_layer_items) & set(other_second_layer)
+ if overlap:
+ # print(f"\033[1;32m[前额叶]\033[0m 发现主题 '{current_topic}' 和 '{other_topic}' 有共同的第二层记忆: {overlap}")
+ overlapping_second_layer.update(overlap)
+
+ # 合并所有需要的记忆
+ if all_first_layer_items:
+ print(f"\033[1;32m[前额叶]\033[0m 合并所有需要的记忆1: {all_first_layer_items}")
+ if overlapping_second_layer:
+ print(f"\033[1;32m[前额叶]\033[0m 合并所有需要的记忆2: {list(overlapping_second_layer)}")
+
+ all_memories = all_first_layer_items + list(overlapping_second_layer)
+
+ if all_memories: # 只在列表非空时选择随机项
+ random_item = choice(all_memories)
+ memory_prompt = f"看到这些聊天,你想起来{random_item}\n"
+ else:
+ memory_prompt = "" # 如果没有记忆,则返回空字符串
+
+ end_time = time.time() # 记录结束时间
+ print(f"\033[1;32m[回忆耗时]\033[0m 耗时: {(end_time - start_time):.3f}秒") # 输出耗时
+
+
+
+
#先禁用关系
if 0 > 30:
relation_prompt = "关系特别特别好,你很喜欢喜欢他"
@@ -54,26 +110,33 @@ class PromptBuilder:
bot_schedule_now_time,bot_schedule_now_activity = bot_schedule.get_current_task()
prompt_date = f'''今天是{current_date},现在是{current_time},你今天的日程是:\n{bot_schedule.today_schedule}\n你现在正在{bot_schedule_now_activity}\n'''
- #知识构建(暂时禁用,因为知识库太少了)
+ #知识构建
+ start_time = time.time()
+
prompt_info = ''
promt_info_prompt = ''
- prompt_info = self.get_prompt_info(message_txt)
+ prompt_info = self.get_prompt_info(message_txt,threshold=0.5)
if prompt_info:
- prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:
- \n{prompt_info}\n
- 请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n'''
+ prompt_info = f'''\n----------------------------------------------------\n你有以下这些[知识]:\n{prompt_info}\n请你记住上面的[知识],之后可能会用到\n----------------------------------------------------\n'''
promt_info_prompt = '你有一些[知识],在上面可以参考。'
+
+ end_time = time.time()
+ print(f"\033[1;32m[知识检索]\033[0m 耗时: {(end_time - start_time):.3f}秒")
+ # print(f"\033[1;34m[调试]\033[0m 获取知识库内容结果: {prompt_info}")
+
- print(f"\033[1;34m[调试信息]\033[0m 正在构建聊天上下文")
+ # print(f"\033[1;34m[调试信息]\033[0m 正在构建聊天上下文")
chat_talking_prompt = ''
if group_id:
chat_talking_prompt = get_recent_group_detailed_plain_text(self.db, group_id, limit=global_config.MAX_CONTEXT_SIZE,combine = True)
- print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
+
+ chat_talking_prompt = f"以下是群里正在聊天的内容:\n{chat_talking_prompt}"
+ # print(f"\033[1;34m[调试]\033[0m 已从数据库获取群 {group_id} 的消息记录:{chat_talking_prompt}")
#激活prompt构建
activate_prompt = ''
- activate_prompt = f"以上是群里正在进行的聊天,昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2}。"
+ activate_prompt = f"以上是群里正在进行的聊天,{memory_prompt} 现在昵称为 '{sender_name}' 的用户说的:{message_txt}。引起了你的注意,你和他{relation_prompt},你想要{relation_prompt_2}。"
#检测机器人相关词汇
bot_keywords = ['人机', 'bot', '机器', '入机', 'robot', '机器人']
@@ -87,13 +150,12 @@ class PromptBuilder:
prompt_personality = ''
personality_choice = random.random()
if personality_choice < 4/6: # 第一种人格
- prompt_personality = f'''你的网名叫{global_config.BOT_NICKNAME},是一个学习地质的女大学生,喜欢摄影,你会刷贴吧,你正在浏览qq群,{promt_info_prompt},
- {activate_prompt}
+ prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},曾经是一个学习地质的女大学生,现在学习心理学和脑科学,你会刷贴吧,你正在浏览qq群,{promt_info_prompt},
现在请你给出日常且口语化的回复,平淡一些,尽量简短一些。{is_bot_prompt}
请注意把握群里的聊天内容,不要回复的太有条理,可以有个性。'''
elif personality_choice < 1: # 第二种人格
- prompt_personality = f'''你的网名叫{global_config.BOT_NICKNAME},是一个女大学生,你有黑色头发,你会刷小红书, 你正在浏览qq群,{promt_info_prompt},
- {activate_prompt}
+ prompt_personality = f'''{activate_prompt}你的网名叫{global_config.BOT_NICKNAME},是一个女大学生,你有黑色头发,你会刷小红书, 你正在浏览qq群,{promt_info_prompt},
+
现在请你给出日常且口语化的回复,请表现你自己的见解,不要一昧迎合,尽量简短一些。{is_bot_prompt}
请你表达自己的见解和观点。可以有个性。'''
@@ -108,15 +170,18 @@ class PromptBuilder:
#额外信息要求
- extra_info = '''但是记得回复平淡一些,简短一些,不要过多提及自身的背景, 记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
+ extra_info = '''但是记得回复平淡一些,简短一些,记住不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只需要输出回复内容就好,不要输出其他任何内容'''
#合并prompt
prompt = ""
- # prompt += f"{prompt_info}\n"
+ prompt += f"{prompt_info}\n"
prompt += f"{prompt_date}\n"
- prompt += f"{chat_talking_prompt}\n"
+ prompt += f"{chat_talking_prompt}\n"
+
+ # prompt += f"{memory_prompt}\n"
+
# prompt += f"{activate_prompt}\n"
prompt += f"{prompt_personality}\n"
prompt += f"{prompt_ger}\n"
@@ -124,31 +189,23 @@ class PromptBuilder:
return prompt
- def get_prompt_info(self,message:str):
+ def get_prompt_info(self,message:str,threshold:float):
related_info = ''
if len(message) > 10:
message_segments = [message[i:i+10] for i in range(0, len(message), 10)]
for segment in message_segments:
embedding = get_embedding(segment)
- related_info += self.get_info_from_db(embedding)
+ related_info += self.get_info_from_db(embedding,threshold=threshold)
else:
embedding = get_embedding(message)
- related_info += self.get_info_from_db(embedding)
+ related_info += self.get_info_from_db(embedding,threshold=threshold)
+ return related_info
+
def get_info_from_db(self, query_embedding: list, limit: int = 1, threshold: float = 0.5) -> str:
- """
- 从知识库中查找与输入向量最相似的内容
- Args:
- query_embedding: 查询向量
- limit: 返回结果数量,默认为2
- threshold: 相似度阈值,默认为0.5
- Returns:
- str: 找到的相关信息,如果相似度低于阈值则返回空字符串
- """
if not query_embedding:
return ''
-
# 使用余弦相似度计算
pipeline = [
{
@@ -206,6 +263,7 @@ class PromptBuilder:
]
results = list(self.db.db.knowledges.aggregate(pipeline))
+ # print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}")
if not results:
return ''
diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py
index 58e2280c..4e223580 100644
--- a/src/plugins/chat/utils.py
+++ b/src/plugins/chat/utils.py
@@ -7,6 +7,8 @@ import numpy as np
from .config import llm_config, global_config
import re
from typing import Dict
+from collections import Counter
+import math
def combine_messages(messages: List[Message]) -> str:
@@ -81,6 +83,39 @@ def cosine_similarity(v1, v2):
norm2 = np.linalg.norm(v2)
return dot_product / (norm1 * norm2)
+def calculate_information_content(text):
+ """计算文本的信息量(熵)"""
+ # 统计字符频率
+ char_count = Counter(text)
+ total_chars = len(text)
+
+ # 计算熵
+ entropy = 0
+ for count in char_count.values():
+ probability = count / total_chars
+ entropy -= probability * math.log2(probability)
+
+ return entropy
+
+def get_cloest_chat_from_db(db, length: int, timestamp: str):
+ # 从数据库中根据时间戳获取离其最近的聊天记录
+ chat_text = ''
+ closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
+ # print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
+
+ if closest_record:
+ closest_time = closest_record['time']
+ group_id = closest_record['group_id'] # 获取groupid
+ # 获取该时间戳之后的length条消息,且groupid相同
+ chat_record = list(db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
+ for record in chat_record:
+ time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
+ chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息
+ return chat_text
+
+ return [] # 如果没有找到记录,返回空列表
+
+
def get_recent_group_messages(db, group_id: int, limit: int = 12) -> list:
"""从数据库获取群组最近的消息记录
diff --git a/src/plugins/chat/utils_image.py b/src/plugins/chat/utils_image.py
index e1a88234..9fe2c40c 100644
--- a/src/plugins/chat/utils_image.py
+++ b/src/plugins/chat/utils_image.py
@@ -4,11 +4,9 @@ import hashlib
import time
import os
from ...common.database import Database
-from .config import BotConfig
import zlib # 用于 CRC32
import base64
-
-bot_config = BotConfig.load_config()
+from .config import global_config
def storage_image(image_data: bytes,type: str, max_size: int = 200) -> bytes:
@@ -39,12 +37,12 @@ def storage_compress_image(image_data: bytes, max_size: int = 200) -> bytes:
# 连接数据库
db = Database(
- host=bot_config.MONGODB_HOST,
- port=bot_config.MONGODB_PORT,
- db_name=bot_config.DATABASE_NAME,
- username=bot_config.MONGODB_USERNAME,
- password=bot_config.MONGODB_PASSWORD,
- auth_source=bot_config.MONGODB_AUTH_SOURCE
+ host=global_config.MONGODB_HOST,
+ port=global_config.MONGODB_PORT,
+ db_name=global_config.DATABASE_NAME,
+ username=global_config.MONGODB_USERNAME,
+ password=global_config.MONGODB_PASSWORD,
+ auth_source=global_config.MONGODB_AUTH_SOURCE
)
# 检查是否已存在相同哈希值的图片
diff --git a/src/plugins/chat/willing_manager.py b/src/plugins/chat/willing_manager.py
index df41ba42..037c2d51 100644
--- a/src/plugins/chat/willing_manager.py
+++ b/src/plugins/chat/willing_manager.py
@@ -22,22 +22,31 @@ class WillingManager:
"""设置指定群组的回复意愿"""
self.group_reply_willing[group_id] = willing
- def change_reply_willing_received(self, group_id: int, topic: str, is_mentioned_bot: bool, config, user_id: int = None, is_emoji: bool = False) -> float:
+ def change_reply_willing_received(self, group_id: int, topic: str, is_mentioned_bot: bool, config, user_id: int = None, is_emoji: bool = False, interested_rate: float = 0) -> float:
"""改变指定群组的回复意愿并返回回复概率"""
current_willing = self.group_reply_willing.get(group_id, 0)
- if topic and current_willing < 1:
- current_willing += 0.2
- elif topic:
- current_willing += 0.05
+ print(f"初始意愿: {current_willing}")
+
+ # if topic and current_willing < 1:
+ # current_willing += 0.2
+ # elif topic:
+ # current_willing += 0.05
if is_mentioned_bot and current_willing < 1.0:
current_willing += 0.9
+ print(f"被提及, 当前意愿: {current_willing}")
elif is_mentioned_bot:
current_willing += 0.05
+ print(f"被重复提及, 当前意愿: {current_willing}")
if is_emoji:
- current_willing *= 0.2
+ current_willing *= 0.15
+ print(f"表情包, 当前意愿: {current_willing}")
+
+ if interested_rate > 0.6:
+ print(f"兴趣度: {interested_rate}, 当前意愿: {current_willing}")
+ current_willing += interested_rate-0.45
self.group_reply_willing[group_id] = min(current_willing, 3.0)
@@ -55,15 +64,15 @@ class WillingManager:
return reply_probability
def change_reply_willing_sent(self, group_id: int):
- """发送消息后降低群组的回复意愿"""
+ """开始思考后降低群组的回复意愿"""
current_willing = self.group_reply_willing.get(group_id, 0)
- self.group_reply_willing[group_id] = max(0, current_willing - 1.8)
+ self.group_reply_willing[group_id] = max(0, current_willing - 2)
def change_reply_willing_after_sent(self, group_id: int):
"""发送消息后提高群组的回复意愿"""
current_willing = self.group_reply_willing.get(group_id, 0)
if current_willing < 1:
- self.group_reply_willing[group_id] = min(1, current_willing + 0.4)
+ self.group_reply_willing[group_id] = min(1, current_willing + 0.3)
async def ensure_started(self):
"""确保衰减任务已启动"""
diff --git a/src/plugins/knowledege/knowledge_library.py b/src/plugins/knowledege/knowledge_library.py
new file mode 100644
index 00000000..40756b41
--- /dev/null
+++ b/src/plugins/knowledege/knowledge_library.py
@@ -0,0 +1,186 @@
+import os
+import sys
+import numpy as np
+import requests
+import time
+
+# 添加项目根目录到 Python 路径
+root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
+sys.path.append(root_path)
+
+from src.common.database import Database
+from src.plugins.chat.config import llm_config
+
+# 直接配置数据库连接信息
+Database.initialize(
+ "127.0.0.1", # MongoDB 主机
+ 27017, # MongoDB 端口
+ "MegBot" # 数据库名称
+)
+
+class KnowledgeLibrary:
+ def __init__(self):
+ self.db = Database.get_instance()
+ self.raw_info_dir = "data/raw_info"
+ self._ensure_dirs()
+
+ def _ensure_dirs(self):
+ """确保必要的目录存在"""
+ os.makedirs(self.raw_info_dir, exist_ok=True)
+
+ def get_embedding(self, text: str) -> list:
+ """获取文本的embedding向量"""
+ url = "https://api.siliconflow.cn/v1/embeddings"
+ payload = {
+ "model": "BAAI/bge-m3",
+ "input": text,
+ "encoding_format": "float"
+ }
+ headers = {
+ "Authorization": f"Bearer {llm_config.SILICONFLOW_API_KEY}",
+ "Content-Type": "application/json"
+ }
+
+ response = requests.post(url, json=payload, headers=headers)
+ if response.status_code != 200:
+ print(f"获取embedding失败: {response.text}")
+ return None
+
+ return response.json()['data'][0]['embedding']
+
+ def process_files(self):
+ """处理raw_info目录下的所有txt文件"""
+ for filename in os.listdir(self.raw_info_dir):
+ if filename.endswith('.txt'):
+ file_path = os.path.join(self.raw_info_dir, filename)
+ self.process_single_file(file_path)
+
+ def process_single_file(self, file_path: str):
+ """处理单个文件"""
+ try:
+ # 检查文件是否已处理
+ if self.db.db.processed_files.find_one({"file_path": file_path}):
+ print(f"文件已处理过,跳过: {file_path}")
+ return
+
+ with open(file_path, 'r', encoding='utf-8') as f:
+ content = f.read()
+
+ # 按1024字符分段
+ segments = [content[i:i+300] for i in range(0, len(content), 300)]
+
+ # 处理每个分段
+ for segment in segments:
+ if not segment.strip(): # 跳过空段
+ continue
+
+ # 获取embedding
+ embedding = self.get_embedding(segment)
+ if not embedding:
+ continue
+
+ # 存储到数据库
+ doc = {
+ "content": segment,
+ "embedding": embedding,
+ "file_path": file_path,
+ "segment_length": len(segment)
+ }
+
+ # 使用文本内容的哈希值作为唯一标识
+ content_hash = hash(segment)
+
+ # 更新或插入文档
+ self.db.db.knowledges.update_one(
+ {"content_hash": content_hash},
+ {"$set": doc},
+ upsert=True
+ )
+
+ # 记录文件已处理
+ self.db.db.processed_files.insert_one({
+ "file_path": file_path,
+ "processed_time": time.time()
+ })
+
+ print(f"成功处理文件: {file_path}")
+
+ except Exception as e:
+ print(f"处理文件 {file_path} 时出错: {str(e)}")
+
+ def search_similar_segments(self, query: str, limit: int = 5) -> list:
+ """搜索与查询文本相似的片段"""
+ query_embedding = self.get_embedding(query)
+ if not query_embedding:
+ return []
+
+ # 使用余弦相似度计算
+ pipeline = [
+ {
+ "$addFields": {
+ "dotProduct": {
+ "$reduce": {
+ "input": {"$range": [0, {"$size": "$embedding"}]},
+ "initialValue": 0,
+ "in": {
+ "$add": [
+ "$$value",
+ {"$multiply": [
+ {"$arrayElemAt": ["$embedding", "$$this"]},
+ {"$arrayElemAt": [query_embedding, "$$this"]}
+ ]}
+ ]
+ }
+ }
+ },
+ "magnitude1": {
+ "$sqrt": {
+ "$reduce": {
+ "input": "$embedding",
+ "initialValue": 0,
+ "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}
+ }
+ }
+ },
+ "magnitude2": {
+ "$sqrt": {
+ "$reduce": {
+ "input": query_embedding,
+ "initialValue": 0,
+ "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}
+ }
+ }
+ }
+ }
+ },
+ {
+ "$addFields": {
+ "similarity": {
+ "$divide": ["$dotProduct", {"$multiply": ["$magnitude1", "$magnitude2"]}]
+ }
+ }
+ },
+ {"$sort": {"similarity": -1}},
+ {"$limit": limit},
+ {"$project": {"content": 1, "similarity": 1, "file_path": 1}}
+ ]
+
+ results = list(self.db.db.knowledges.aggregate(pipeline))
+ return results
+
+# 创建单例实例
+knowledge_library = KnowledgeLibrary()
+
+if __name__ == "__main__":
+ # 测试知识库功能
+ print("开始处理知识库文件...")
+ knowledge_library.process_files()
+
+ # 测试搜索功能
+ test_query = "麦麦评价一下僕と花"
+ print(f"\n搜索与'{test_query}'相似的内容:")
+ results = knowledge_library.search_similar_segments(test_query)
+ for result in results:
+ print(f"相似度: {result['similarity']:.4f}")
+ print(f"内容: {result['content'][:100]}...")
+ print("-" * 50)
diff --git a/src/plugins/memory_system/draw_memory.py b/src/plugins/memory_system/draw_memory.py
new file mode 100644
index 00000000..651d5fbc
--- /dev/null
+++ b/src/plugins/memory_system/draw_memory.py
@@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+import sys
+import jieba
+from llm_module import LLMModel
+import networkx as nx
+import matplotlib.pyplot as plt
+import math
+from collections import Counter
+import datetime
+import random
+import time
+# from chat.config import global_config
+import sys
+sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
+from src.common.database import Database # 使用正确的导入语法
+
+class Memory_graph:
+ def __init__(self):
+ self.G = nx.Graph() # 使用 networkx 的图结构
+ self.db = Database.get_instance()
+
+ def connect_dot(self, concept1, concept2):
+ self.G.add_edge(concept1, concept2)
+
+ def add_dot(self, concept, memory):
+ if concept in self.G:
+ # 如果节点已存在,将新记忆添加到现有列表中
+ if 'memory_items' in self.G.nodes[concept]:
+ if not isinstance(self.G.nodes[concept]['memory_items'], list):
+ # 如果当前不是列表,将其转换为列表
+ self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
+ self.G.nodes[concept]['memory_items'].append(memory)
+ else:
+ self.G.nodes[concept]['memory_items'] = [memory]
+ else:
+ # 如果是新节点,创建新的记忆列表
+ self.G.add_node(concept, memory_items=[memory])
+
+ def get_dot(self, concept):
+ # 检查节点是否存在于图中
+ if concept in self.G:
+ # 从图中获取节点数据
+ node_data = self.G.nodes[concept]
+ # print(node_data)
+ # 创建新的Memory_dot对象
+ return concept,node_data
+ return None
+
+ def get_related_item(self, topic, depth=1):
+ if topic not in self.G:
+ return [], []
+
+ first_layer_items = []
+ second_layer_items = []
+
+ # 获取相邻节点
+ neighbors = list(self.G.neighbors(topic))
+ # print(f"第一层: {topic}")
+
+ # 获取当前节点的记忆项
+ node_data = self.get_dot(topic)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ first_layer_items.extend(memory_items)
+ else:
+ first_layer_items.append(memory_items)
+
+ # 只在depth=2时获取第二层记忆
+ if depth >= 2:
+ # 获取相邻节点的记忆项
+ for neighbor in neighbors:
+ # print(f"第二层: {neighbor}")
+ node_data = self.get_dot(neighbor)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ second_layer_items.extend(memory_items)
+ else:
+ second_layer_items.append(memory_items)
+
+ return first_layer_items, second_layer_items
+
+ def store_memory(self):
+ for node in self.G.nodes():
+ dot_data = {
+ "concept": node
+ }
+ self.db.db.store_memory_dots.insert_one(dot_data)
+
+ @property
+ def dots(self):
+ # 返回所有节点对应的 Memory_dot 对象
+ return [self.get_dot(node) for node in self.G.nodes()]
+
+
+ def get_random_chat_from_db(self, length: int, timestamp: str):
+ # 从数据库中根据时间戳获取离其最近的聊天记录
+ chat_text = ''
+ closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
+ print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
+
+ if closest_record:
+ closest_time = closest_record['time']
+ group_id = closest_record['group_id'] # 获取groupid
+ # 获取该时间戳之后的length条消息,且groupid相同
+ chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
+ for record in chat_record:
+ time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
+ chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息
+ return chat_text
+
+ return [] # 如果没有找到记录,返回空列表
+
+ def save_graph_to_db(self):
+ # 清空现有的图数据
+ self.db.db.graph_data.delete_many({})
+ # 保存节点
+ for node in self.G.nodes(data=True):
+ node_data = {
+ 'concept': node[0],
+ 'memory_items': node[1].get('memory_items', []) # 默认为空列表
+ }
+ self.db.db.graph_data.nodes.insert_one(node_data)
+ # 保存边
+ for edge in self.G.edges():
+ edge_data = {
+ 'source': edge[0],
+ 'target': edge[1]
+ }
+ self.db.db.graph_data.edges.insert_one(edge_data)
+
+ def load_graph_from_db(self):
+ # 清空当前图
+ self.G.clear()
+ # 加载节点
+ nodes = self.db.db.graph_data.nodes.find()
+ for node in nodes:
+ memory_items = node.get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ self.G.add_node(node['concept'], memory_items=memory_items)
+ # 加载边
+ edges = self.db.db.graph_data.edges.find()
+ for edge in edges:
+ self.G.add_edge(edge['source'], edge['target'])
+
+
+def main():
+ # 初始化数据库
+ Database.initialize(
+ "127.0.0.1",
+ 27017,
+ "MegBot"
+ )
+
+ memory_graph = Memory_graph()
+ # 创建LLM模型实例
+
+ memory_graph.load_graph_from_db()
+ # 展示两种不同的可视化方式
+ print("\n按连接数量着色的图谱:")
+ visualize_graph(memory_graph, color_by_memory=False)
+
+ print("\n按记忆数量着色的图谱:")
+ visualize_graph(memory_graph, color_by_memory=True)
+
+ # memory_graph.save_graph_to_db()
+
+ while True:
+ query = input("请输入新的查询概念(输入'退出'以结束):")
+ if query.lower() == '退出':
+ break
+ items_list = memory_graph.get_related_item(query)
+ if items_list:
+ # print(items_list)
+ for memory_item in items_list:
+ print(memory_item)
+ else:
+ print("未找到相关记忆。")
+
+
+def segment_text(text):
+ seg_text = list(jieba.cut(text))
+ return seg_text
+
+def find_topic(text, topic_num):
+ prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。'
+ return prompt
+
+def topic_what(text, topic):
+ prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好'
+ return prompt
+
+def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False):
+ # 设置中文字体
+ plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
+ plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
+
+ G = memory_graph.G
+
+ # 保存图到本地
+ nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式
+
+ # 根据连接条数或记忆数量设置节点颜色
+ node_colors = []
+ nodes = list(G.nodes()) # 获取图中实际的节点列表
+
+ if color_by_memory:
+ # 计算每个节点的记忆数量
+ memory_counts = []
+ for node in nodes:
+ memory_items = G.nodes[node].get('memory_items', [])
+ if isinstance(memory_items, list):
+ count = len(memory_items)
+ else:
+ count = 1 if memory_items else 0
+ memory_counts.append(count)
+ max_memories = max(memory_counts) if memory_counts else 1
+
+ for count in memory_counts:
+ # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少
+ if max_memories > 0:
+ intensity = min(1.0, count / max_memories)
+ color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色
+ else:
+ color = (0, 0, 1) # 如果没有记忆,则为蓝色
+ node_colors.append(color)
+ else:
+ # 使用原来的连接数量着色方案
+ max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1
+ for node in nodes:
+ degree = G.degree(node)
+ if max_degree > 0:
+ red = min(1.0, degree / max_degree)
+ blue = 1.0 - red
+ color = (red, 0, blue)
+ else:
+ color = (0, 0, 1)
+ node_colors.append(color)
+
+ # 绘制图形
+ plt.figure(figsize=(12, 8))
+ pos = nx.spring_layout(G, k=1, iterations=50)
+ nx.draw(G, pos,
+ with_labels=True,
+ node_color=node_colors,
+ node_size=2000,
+ font_size=10,
+ font_family='SimHei',
+ font_weight='bold')
+
+ title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色')
+ plt.title(title, fontsize=16, fontfamily='SimHei')
+ plt.show()
+
+if __name__ == "__main__":
+ main()
+
+
diff --git a/src/plugins/memory_system/llm_module.py b/src/plugins/memory_system/llm_module.py
index a5516012..fa879afd 100644
--- a/src/plugins/memory_system/llm_module.py
+++ b/src/plugins/memory_system/llm_module.py
@@ -2,6 +2,7 @@ import os
import requests
from dotenv import load_dotenv
from typing import Tuple, Union
+import time
# 加载环境变量
load_dotenv()
@@ -32,16 +33,34 @@ class LLMModel:
# 发送请求到完整的chat/completions端点
api_url = f"{self.base_url.rstrip('/')}/chat/completions"
- try:
- response = requests.post(api_url, headers=headers, json=data)
- response.raise_for_status() # 检查响应状态
-
- result = response.json()
- if "choices" in result and len(result["choices"]) > 0:
- content = result["choices"][0]["message"]["content"]
- reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
- return content, reasoning_content # 返回内容和推理内容
- return "没有返回结果", "" # 返回两个值
-
- except requests.exceptions.RequestException as e:
- return f"请求失败: {str(e)}", "" # 返回错误信息和空字符串
\ No newline at end of file
+ max_retries = 3
+ base_wait_time = 15 # 基础等待时间(秒)
+
+ for retry in range(max_retries):
+ try:
+ response = requests.post(api_url, headers=headers, json=data)
+
+ if response.status_code == 429:
+ wait_time = base_wait_time * (2 ** retry) # 指数退避
+ print(f"遇到请求限制(429),等待{wait_time}秒后重试...")
+ time.sleep(wait_time)
+ continue
+
+ response.raise_for_status() # 检查其他响应状态
+
+ result = response.json()
+ if "choices" in result and len(result["choices"]) > 0:
+ content = result["choices"][0]["message"]["content"]
+ reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
+ return content, reasoning_content
+ return "没有返回结果", ""
+
+ except requests.exceptions.RequestException as e:
+ if retry < max_retries - 1: # 如果还有重试机会
+ wait_time = base_wait_time * (2 ** retry)
+ print(f"请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
+ time.sleep(wait_time)
+ else:
+ return f"请求失败: {str(e)}", ""
+
+ return "达到最大重试次数,请求仍然失败", ""
\ No newline at end of file
diff --git a/src/plugins/memory_system/llm_module_memory_make.py b/src/plugins/memory_system/llm_module_memory_make.py
new file mode 100644
index 00000000..1abfdb2c
--- /dev/null
+++ b/src/plugins/memory_system/llm_module_memory_make.py
@@ -0,0 +1,82 @@
+import os
+import requests
+from dotenv import load_dotenv
+from typing import Tuple, Union
+import time
+from ..chat.config import BotConfig
+
+# 获取当前文件的绝对路径
+current_dir = os.path.dirname(os.path.abspath(__file__))
+root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
+env_path = os.path.join(root_dir, 'config', '.env')
+
+# 加载环境变量
+print(f"尝试从 {env_path} 加载环境变量配置")
+if os.path.exists(env_path):
+ load_dotenv(env_path)
+ print("成功加载环境变量配置")
+else:
+ print(f"环境变量配置文件不存在: {env_path}")
+
+class LLMModel:
+ # def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", **kwargs):
+ def __init__(self, model_name="Pro/deepseek-ai/DeepSeek-V3", **kwargs):
+ self.model_name = model_name
+ self.params = kwargs
+ self.api_key = os.getenv("SILICONFLOW_KEY")
+ self.base_url = os.getenv("SILICONFLOW_BASE_URL")
+
+ if not self.api_key or not self.base_url:
+ raise ValueError("环境变量未正确加载:SILICONFLOW_KEY 或 SILICONFLOW_BASE_URL 未设置")
+
+ print(f"API URL: {self.base_url}") # 打印 base_url 用于调试
+
+ def generate_response(self, prompt: str) -> Tuple[str, str]:
+ """根据输入的提示生成模型的响应"""
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+
+ # 构建请求体
+ data = {
+ "model": self.model_name,
+ "messages": [{"role": "user", "content": prompt}],
+ "temperature": 0.5,
+ **self.params
+ }
+
+ # 发送请求到完整的chat/completions端点
+ api_url = f"{self.base_url.rstrip('/')}/chat/completions"
+
+ max_retries = 3
+ base_wait_time = 15 # 基础等待时间(秒)
+
+ for retry in range(max_retries):
+ try:
+ response = requests.post(api_url, headers=headers, json=data)
+
+ if response.status_code == 429:
+ wait_time = base_wait_time * (2 ** retry) # 指数退避
+ print(f"遇到请求限制(429),等待{wait_time}秒后重试...")
+ time.sleep(wait_time)
+ continue
+
+ response.raise_for_status() # 检查其他响应状态
+
+ result = response.json()
+ if "choices" in result and len(result["choices"]) > 0:
+ content = result["choices"][0]["message"]["content"]
+ reasoning_content = result["choices"][0]["message"].get("reasoning_content", "")
+ return content, reasoning_content
+ return "没有返回结果", ""
+
+ except requests.exceptions.RequestException as e:
+ if retry < max_retries - 1: # 如果还有重试机会
+ wait_time = base_wait_time * (2 ** retry)
+ print(f"请求失败,等待{wait_time}秒后重试... 错误: {str(e)}")
+ time.sleep(wait_time)
+ else:
+ return f"请求失败: {str(e)}", ""
+
+ return "达到最大重试次数,请求仍然失败", ""
\ No newline at end of file
diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py
index d8f644d7..af6aab39 100644
--- a/src/plugins/memory_system/memory.py
+++ b/src/plugins/memory_system/memory.py
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
-import sys
import jieba
-from llm_module import LLMModel
+from .llm_module import LLMModel
import networkx as nx
import matplotlib.pyplot as plt
import math
@@ -9,10 +8,10 @@ from collections import Counter
import datetime
import random
import time
-
+from ..chat.config import global_config
import sys
-sys.path.append("C:/GitHub/MegMeg-bot") # 添加项目根目录到 Python 路径
-from src.common.database import Database # 使用正确的导入语法
+from ...common.database import Database # 使用正确的导入语法
+from ..chat.utils import calculate_information_content, get_cloest_chat_from_db
class Memory_graph:
def __init__(self):
@@ -23,93 +22,128 @@ class Memory_graph:
self.G.add_edge(concept1, concept2)
def add_dot(self, concept, memory):
- self.G.add_node(concept, memory_items=memory)
+ if concept in self.G:
+ # 如果节点已存在,将新记忆添加到现有列表中
+ if 'memory_items' in self.G.nodes[concept]:
+ if not isinstance(self.G.nodes[concept]['memory_items'], list):
+ # 如果当前不是列表,将其转换为列表
+ self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
+ self.G.nodes[concept]['memory_items'].append(memory)
+ else:
+ self.G.nodes[concept]['memory_items'] = [memory]
+ else:
+ # 如果是新节点,创建新的记忆列表
+ self.G.add_node(concept, memory_items=[memory])
def get_dot(self, concept):
# 检查节点是否存在于图中
if concept in self.G:
# 从图中获取节点数据
node_data = self.G.nodes[concept]
- print(node_data)
+ # print(node_data)
# 创建新的Memory_dot对象
return concept,node_data
return None
def get_related_item(self, topic, depth=1):
if topic not in self.G:
- return set()
+ return [], []
- items_set = set()
+ first_layer_items = []
+ second_layer_items = []
+
# 获取相邻节点
neighbors = list(self.G.neighbors(topic))
- print(f"第一层: {topic}")
+ # print(f"第一层: {topic}")
# 获取当前节点的记忆项
node_data = self.get_dot(topic)
if node_data:
concept, data = node_data
if 'memory_items' in data:
- items_set.add(data['memory_items'])
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ first_layer_items.extend(memory_items)
+ else:
+ first_layer_items.append(memory_items)
- # 获取相邻节点的记忆项
- for neighbor in neighbors:
- print(f"第二层: {neighbor}")
- node_data = self.get_dot(neighbor)
- if node_data:
- concept, data = node_data
- if 'memory_items' in data:
- items_set.add(data['memory_items'])
+ # 只在depth=2时获取第二层记忆
+ if depth >= 2:
+ # 获取相邻节点的记忆项
+ for neighbor in neighbors:
+ # print(f"第二层: {neighbor}")
+ node_data = self.get_dot(neighbor)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ second_layer_items.extend(memory_items)
+ else:
+ second_layer_items.append(memory_items)
- return items_set
-
- def store_memory(self):
- for node in self.G.nodes():
- dot_data = {
- "concept": node
- }
- self.db.db.store_memory_dots.insert_one(dot_data)
+ return first_layer_items, second_layer_items
@property
def dots(self):
# 返回所有节点对应的 Memory_dot 对象
return [self.get_dot(node) for node in self.G.nodes()]
-
-
- def get_random_chat_from_db(self, length: int, timestamp: str):
- # 从数据库中根据时间戳获取离其最近的聊天记录
- chat_text = ''
- closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
- print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
-
- if closest_record:
- closest_time = closest_record['time']
- group_id = closest_record['group_id'] # 获取groupid
- # 获取该时间戳之后的length条消息,且groupid相同
- chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
- for record in chat_record:
- time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
- chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息
- return chat_text
-
- return [] # 如果没有找到记录,返回空列表
def save_graph_to_db(self):
- # 清空现有的图数据
- self.db.db.graph_data.delete_many({})
# 保存节点
for node in self.G.nodes(data=True):
- node_data = {
- 'concept': node[0],
- 'memory_items': node[1].get('memory_items', None)
- }
- self.db.db.graph_data.nodes.insert_one(node_data)
+ concept = node[0]
+ memory_items = node[1].get('memory_items', [])
+
+ # 查找是否存在同名节点
+ existing_node = self.db.db.graph_data.nodes.find_one({'concept': concept})
+ if existing_node:
+ # 如果存在,合并memory_items并去重
+ existing_items = existing_node.get('memory_items', [])
+ if not isinstance(existing_items, list):
+ existing_items = [existing_items] if existing_items else []
+
+ # 合并并去重
+ all_items = list(set(existing_items + memory_items))
+
+ # 更新节点
+ self.db.db.graph_data.nodes.update_one(
+ {'concept': concept},
+ {'$set': {'memory_items': all_items}}
+ )
+ else:
+ # 如果不存在,创建新节点
+ node_data = {
+ 'concept': concept,
+ 'memory_items': memory_items
+ }
+ self.db.db.graph_data.nodes.insert_one(node_data)
+
# 保存边
for edge in self.G.edges():
- edge_data = {
- 'source': edge[0],
- 'target': edge[1]
- }
- self.db.db.graph_data.edges.insert_one(edge_data)
+ source, target = edge
+
+ # 查找是否存在同样的边
+ existing_edge = self.db.db.graph_data.edges.find_one({
+ 'source': source,
+ 'target': target
+ })
+
+ if existing_edge:
+ # 如果存在,增加num属性
+ num = existing_edge.get('num', 1) + 1
+ self.db.db.graph_data.edges.update_one(
+ {'source': source, 'target': target},
+ {'$set': {'num': num}}
+ )
+ else:
+ # 如果不存在,创建新边
+ edge_data = {
+ 'source': source,
+ 'target': target,
+ 'num': 1
+ }
+ self.db.db.graph_data.edges.insert_one(edge_data)
def load_graph_from_db(self):
# 清空当前图
@@ -117,127 +151,99 @@ class Memory_graph:
# 加载节点
nodes = self.db.db.graph_data.nodes.find()
for node in nodes:
- self.G.add_node(node['concept'], memory_items=node['memory_items'])
+ memory_items = node.get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ self.G.add_node(node['concept'], memory_items=memory_items)
# 加载边
edges = self.db.db.graph_data.edges.find()
for edge in edges:
- self.G.add_edge(edge['source'], edge['target'])
-
-def calculate_information_content(text):
-
- """计算文本的信息量(熵)"""
- # 统计字符频率
- char_count = Counter(text)
- total_chars = len(text)
-
- # 计算熵
- entropy = 0
- for count in char_count.values():
- probability = count / total_chars
- entropy -= probability * math.log2(probability)
-
- return entropy
-
-def main():
- # 初始化数据库
- Database.initialize(
- "127.0.0.1",
- 27017,
- "MegBot"
- )
-
- memory_graph = Memory_graph()
- # 创建LLM模型实例
- llm_model = LLMModel()
- llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5")
-
- # 使用当前时间戳进行测试
- current_timestamp = datetime.datetime.now().timestamp()
- chat_text = []
-
- chat_size =30
-
- for _ in range(60): # 循环10次
- random_time = current_timestamp - random.randint(1, 3600*3) # 随机时间
- print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}")
- chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time)
- chat_text.append(chat_) # 拼接所有text
+ self.G.add_edge(edge['source'], edge['target'], num=edge.get('num', 1))
-
- for input_text in chat_text:
- print(input_text)
- first_memory = set()
- first_memory = memory_compress(input_text, llm_model_small, llm_model_small, rate=2.5)
+
+
+
+# 海马体
+class Hippocampus:
+ def __init__(self,memory_graph:Memory_graph):
+ self.memory_graph = memory_graph
+ self.llm_model = LLMModel()
+ self.llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5")
- #将记忆加入到图谱中
- for topic, memory in first_memory:
- topics = segment_text(topic)
- print(f"话题: {topic},节点: {topics}, 记忆: {memory}")
- for split_topic in topics:
- memory_graph.add_dot(split_topic,memory)
- for split_topic in topics:
- for other_split_topic in topics:
- if split_topic != other_split_topic:
- memory_graph.connect_dot(split_topic, other_split_topic)
+ def get_memory_sample(self,chat_size=20,time_frequency:dict={'near':2,'mid':4,'far':3}):
+ current_timestamp = datetime.datetime.now().timestamp()
+ chat_text = []
+ #短期:1h 中期:4h 长期:24h
+ for _ in range(time_frequency.get('near')): # 循环10次
+ random_time = current_timestamp - random.randint(1, 3600) # 随机时间
+ # print(f"获得 最近 随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}")
+ chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ chat_text.append(chat_)
+ for _ in range(time_frequency.get('mid')): # 循环10次
+ random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间
+ # print(f"获得 最近 随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}")
+ chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ chat_text.append(chat_)
+ for _ in range(time_frequency.get('far')): # 循环10次
+ random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间
+ # print(f"获得 最近 随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}")
+ chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time)
+ chat_text.append(chat_)
+ return chat_text
- # memory_graph.store_memory()
- visualize_graph(memory_graph)
-
- memory_graph.save_graph_to_db()
- # memory_graph.load_graph_from_db()
-
- while True:
- query = input("请输入新的查询概念(输入'退出'以结束):")
- if query.lower() == '退出':
- break
- items_list = memory_graph.get_related_item(query)
- if items_list:
- # print(items_list)
- for memory_item in items_list:
- print(memory_item)
- else:
- print("未找到相关记忆。")
+ def build_memory(self,chat_size=12):
+ #最近消息获取频率
+ time_frequency = {'near':1,'mid':2,'far':2}
+ memory_sample = self.get_memory_sample(chat_size,time_frequency)
+ # print(f"\033[1;32m[记忆构建]\033[0m 获取记忆样本: {memory_sample}")
+
+
+ for i, input_text in enumerate(memory_sample, 1):
+ #加载进度可视化
+ progress = (i / len(memory_sample)) * 100
+ bar_length = 30
+ filled_length = int(bar_length * i // len(memory_sample))
+ bar = '█' * filled_length + '-' * (bar_length - filled_length)
+ print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})")
- while True:
- query = input("请输入问题:")
-
- if query.lower() == '退出':
- break
-
- topic_prompt = find_topic(query, 3)
- topic_response = llm_model.generate_response(topic_prompt)
+ # 生成压缩后记忆
+ first_memory = set()
+ first_memory = self.memory_compress(input_text, 2.5)
+ # 延时防止访问超频
+ # time.sleep(5)
+ #将记忆加入到图谱中
+ for topic, memory in first_memory:
+ topics = segment_text(topic)
+ print(f"\033[1;34m话题\033[0m: {topic},节点: {topics}, 记忆: {memory}")
+ for split_topic in topics:
+ self.memory_graph.add_dot(split_topic,memory)
+ for split_topic in topics:
+ for other_split_topic in topics:
+ if split_topic != other_split_topic:
+ self.memory_graph.connect_dot(split_topic, other_split_topic)
+
+ self.memory_graph.save_graph_to_db()
+
+ def memory_compress(self, input_text, rate=1):
+ information_content = calculate_information_content(input_text)
+ print(f"文本的信息量(熵): {information_content:.4f} bits")
+ topic_num = max(1, min(5, int(information_content * rate / 4)))
+ # print(topic_num)
+ topic_prompt = find_topic(input_text, topic_num)
+ topic_response = self.llm_model.generate_response(topic_prompt)
# 检查 topic_response 是否为元组
if isinstance(topic_response, tuple):
topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串
else:
topics = topic_response.split(",")
- print(topics)
-
- for keyword in topics:
- items_list = memory_graph.get_related_item(keyword)
- if items_list:
- print(items_list)
-
-def memory_compress(input_text, llm_model, llm_model_small, rate=1):
- information_content = calculate_information_content(input_text)
- print(f"文本的信息量(熵): {information_content:.4f} bits")
- topic_num = max(1, min(5, int(information_content * rate / 4)))
- print(topic_num)
- topic_prompt = find_topic(input_text, topic_num)
- topic_response = llm_model.generate_response(topic_prompt)
- # 检查 topic_response 是否为元组
- if isinstance(topic_response, tuple):
- topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串
- else:
- topics = topic_response.split(",")
- print(topics)
- compressed_memory = set()
- for topic in topics:
- topic_what_prompt = topic_what(input_text,topic)
- topic_what_response = llm_model_small.generate_response(topic_what_prompt)
- compressed_memory.add((topic.strip(), topic_what_response[0])) # 将话题和记忆作为元组存储
- return compressed_memory
+ # print(topics)
+ compressed_memory = set()
+ for topic in topics:
+ topic_what_prompt = topic_what(input_text,topic)
+ topic_what_response = self.llm_model_small.generate_response(topic_what_prompt)
+ compressed_memory.add((topic.strip(), topic_what_response[0])) # 将话题和记忆作为元组存储
+ return compressed_memory
def segment_text(text):
@@ -252,48 +258,21 @@ def topic_what(text, topic):
prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好'
return prompt
-def visualize_graph(memory_graph: Memory_graph):
- # 设置中文字体
- plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
- plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
-
- G = memory_graph.G
-
-
- # 保存图到本地
- nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式
-
- # 根据连接条数设置节点颜色
- node_colors = []
- nodes = list(G.nodes()) # 获取图中实际的节点列表
- max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1 # 获取最大连接数
-
- for node in nodes:
- degree = G.degree(node) # 获取节点的度
- # 计算颜色,使用渐变效果
- if max_degree > 0:
- red = min(1.0, degree / max_degree) # 红色分量随连接数增加而增加
- blue = 1.0 - red # 蓝色分量随连接数增加而减少
- color = (red, 0, blue)
- else:
- color = (0, 0, 1) # 如果没有连接,则为蓝色
- node_colors.append(color)
-
- # 绘制图形
- plt.figure(figsize=(12, 8))
- pos = nx.spring_layout(G, k=1, iterations=50) # 使用弹簧布局,调整参数使布局更合理
- nx.draw(G, pos,
- with_labels=True,
- node_color=node_colors,
- node_size=2000,
- font_size=10,
- font_family='SimHei', # 设置节点标签的字体
- font_weight='bold')
-
- plt.title('记忆图谱可视化', fontsize=16, fontfamily='SimHei')
- plt.show()
-
-if __name__ == "__main__":
- main()
+start_time = time.time()
+
+Database.initialize(
+ global_config.MONGODB_HOST,
+ global_config.MONGODB_PORT,
+ global_config.DATABASE_NAME
+)
+#创建记忆图
+memory_graph = Memory_graph()
+#加载数据库中存储的记忆图
+memory_graph.load_graph_from_db()
+#创建海马体
+hippocampus = Hippocampus(memory_graph)
+
+end_time = time.time()
+print(f"\033[32m[加载海马体耗时: {end_time - start_time:.2f} 秒]\033[0m")
\ No newline at end of file
diff --git a/src/plugins/memory_system/memory_make.py b/src/plugins/memory_system/memory_make.py
new file mode 100644
index 00000000..244838e2
--- /dev/null
+++ b/src/plugins/memory_system/memory_make.py
@@ -0,0 +1,428 @@
+# -*- coding: utf-8 -*-
+import sys
+import jieba
+import networkx as nx
+import matplotlib.pyplot as plt
+import math
+from collections import Counter
+import datetime
+import random
+import time
+import os
+from dotenv import load_dotenv
+# from chat.config import global_config
+sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径
+from src.common.database import Database # 使用正确的导入语法
+from src.plugins.memory_system.llm_module import LLMModel
+
+class Memory_graph:
+ def __init__(self):
+ self.G = nx.Graph() # 使用 networkx 的图结构
+ self.db = Database.get_instance()
+
+ def connect_dot(self, concept1, concept2):
+ self.G.add_edge(concept1, concept2)
+
+ def add_dot(self, concept, memory):
+ if concept in self.G:
+ # 如果节点已存在,将新记忆添加到现有列表中
+ if 'memory_items' in self.G.nodes[concept]:
+ if not isinstance(self.G.nodes[concept]['memory_items'], list):
+ # 如果当前不是列表,将其转换为列表
+ self.G.nodes[concept]['memory_items'] = [self.G.nodes[concept]['memory_items']]
+ self.G.nodes[concept]['memory_items'].append(memory)
+ else:
+ self.G.nodes[concept]['memory_items'] = [memory]
+ else:
+ # 如果是新节点,创建新的记忆列表
+ self.G.add_node(concept, memory_items=[memory])
+
+ def get_dot(self, concept):
+ # 检查节点是否存在于图中
+ if concept in self.G:
+ # 从图中获取节点数据
+ node_data = self.G.nodes[concept]
+ # print(node_data)
+ # 创建新的Memory_dot对象
+ return concept,node_data
+ return None
+
+ def get_related_item(self, topic, depth=1):
+ if topic not in self.G:
+ return [], []
+
+ first_layer_items = []
+ second_layer_items = []
+
+ # 获取相邻节点
+ neighbors = list(self.G.neighbors(topic))
+ # print(f"第一层: {topic}")
+
+ # 获取当前节点的记忆项
+ node_data = self.get_dot(topic)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ first_layer_items.extend(memory_items)
+ else:
+ first_layer_items.append(memory_items)
+
+ # 只在depth=2时获取第二层记忆
+ if depth >= 2:
+ # 获取相邻节点的记忆项
+ for neighbor in neighbors:
+ # print(f"第二层: {neighbor}")
+ node_data = self.get_dot(neighbor)
+ if node_data:
+ concept, data = node_data
+ if 'memory_items' in data:
+ memory_items = data['memory_items']
+ if isinstance(memory_items, list):
+ second_layer_items.extend(memory_items)
+ else:
+ second_layer_items.append(memory_items)
+
+ return first_layer_items, second_layer_items
+
+ def store_memory(self):
+ for node in self.G.nodes():
+ dot_data = {
+ "concept": node
+ }
+ self.db.db.store_memory_dots.insert_one(dot_data)
+
+ @property
+ def dots(self):
+ # 返回所有节点对应的 Memory_dot 对象
+ return [self.get_dot(node) for node in self.G.nodes()]
+
+
+ def get_random_chat_from_db(self, length: int, timestamp: str):
+ # 从数据库中根据时间戳获取离其最近的聊天记录
+ chat_text = ''
+ closest_record = self.db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) # 调试输出
+ print(f"距离time最近的消息时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(closest_record['time'])))}")
+
+ if closest_record:
+ closest_time = closest_record['time']
+ group_id = closest_record['group_id'] # 获取groupid
+ # 获取该时间戳之后的length条消息,且groupid相同
+ chat_record = list(self.db.db.messages.find({"time": {"$gt": closest_time}, "group_id": group_id}).sort('time', 1).limit(length))
+ for record in chat_record:
+ time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(record['time'])))
+ chat_text += f'[{time_str}] {record["user_nickname"] or "用户" + str(record["user_id"])}: {record["processed_plain_text"]}\n' # 添加发送者和时间信息
+ return chat_text
+
+ return [] # 如果没有找到记录,返回空列表
+
+ def save_graph_to_db(self):
+ # 保存节点
+ for node in self.G.nodes(data=True):
+ concept = node[0]
+ memory_items = node[1].get('memory_items', [])
+
+ # 查找是否存在同名节点
+ existing_node = self.db.db.graph_data.nodes.find_one({'concept': concept})
+ if existing_node:
+ # 如果存在,合并memory_items并去重
+ existing_items = existing_node.get('memory_items', [])
+ if not isinstance(existing_items, list):
+ existing_items = [existing_items] if existing_items else []
+
+ # 合并并去重
+ all_items = list(set(existing_items + memory_items))
+
+ # 更新节点
+ self.db.db.graph_data.nodes.update_one(
+ {'concept': concept},
+ {'$set': {'memory_items': all_items}}
+ )
+ else:
+ # 如果不存在,创建新节点
+ node_data = {
+ 'concept': concept,
+ 'memory_items': memory_items
+ }
+ self.db.db.graph_data.nodes.insert_one(node_data)
+
+ # 保存边
+ for edge in self.G.edges():
+ source, target = edge
+
+ # 查找是否存在同样的边
+ existing_edge = self.db.db.graph_data.edges.find_one({
+ 'source': source,
+ 'target': target
+ })
+
+ if existing_edge:
+ # 如果存在,增加num属性
+ num = existing_edge.get('num', 1) + 1
+ self.db.db.graph_data.edges.update_one(
+ {'source': source, 'target': target},
+ {'$set': {'num': num}}
+ )
+ else:
+ # 如果不存在,创建新边
+ edge_data = {
+ 'source': source,
+ 'target': target,
+ 'num': 1
+ }
+ self.db.db.graph_data.edges.insert_one(edge_data)
+
+ def load_graph_from_db(self):
+ # 清空当前图
+ self.G.clear()
+ # 加载节点
+ nodes = self.db.db.graph_data.nodes.find()
+ for node in nodes:
+ memory_items = node.get('memory_items', [])
+ if not isinstance(memory_items, list):
+ memory_items = [memory_items] if memory_items else []
+ self.G.add_node(node['concept'], memory_items=memory_items)
+ # 加载边
+ edges = self.db.db.graph_data.edges.find()
+ for edge in edges:
+ self.G.add_edge(edge['source'], edge['target'], num=edge.get('num', 1))
+
+def calculate_information_content(text):
+
+ """计算文本的信息量(熵)"""
+ # 统计字符频率
+ char_count = Counter(text)
+ total_chars = len(text)
+
+ # 计算熵
+ entropy = 0
+ for count in char_count.values():
+ probability = count / total_chars
+ entropy -= probability * math.log2(probability)
+
+ return entropy
+
+
+# Database.initialize(
+# global_config.MONGODB_HOST,
+# global_config.MONGODB_PORT,
+# global_config.DATABASE_NAME
+# )
+# memory_graph = Memory_graph()
+
+# llm_model = LLMModel()
+# llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5")
+
+# memory_graph.load_graph_from_db()
+
+
+
+def main():
+ # 获取当前文件的绝对路径
+ current_dir = os.path.dirname(os.path.abspath(__file__))
+ root_dir = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))
+ env_path = os.path.join(root_dir, 'config', '.env')
+
+ # 加载环境变量
+ print(f"尝试从 {env_path} 加载环境变量配置")
+ if os.path.exists(env_path):
+ load_dotenv(env_path)
+ print("成功加载环境变量配置")
+ else:
+ print(f"环境变量配置文件不存在: {env_path}")
+
+ # 初始化数据库
+ Database.initialize(
+ "127.0.0.1",
+ 27017,
+ "MegBot"
+ )
+
+ memory_graph = Memory_graph()
+ # 创建LLM模型实例
+ llm_model = LLMModel()
+ llm_model_small = LLMModel(model_name="deepseek-ai/DeepSeek-V2.5")
+
+ # 使用当前时间戳进行测试
+ current_timestamp = datetime.datetime.now().timestamp()
+ chat_text = []
+
+ chat_size =25
+
+ for _ in range(30): # 循环10次
+ random_time = current_timestamp - random.randint(1, 3600*10) # 随机时间
+ print(f"随机时间戳对应的时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(random_time))}")
+ chat_ = memory_graph.get_random_chat_from_db(chat_size, random_time)
+ chat_text.append(chat_) # 拼接所有text
+ # time.sleep(1)
+
+
+
+ for i, input_text in enumerate(chat_text, 1):
+
+ progress = (i / len(chat_text)) * 100
+ bar_length = 30
+ filled_length = int(bar_length * i // len(chat_text))
+ bar = '█' * filled_length + '-' * (bar_length - filled_length)
+ print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(chat_text)})")
+
+ # print(input_text)
+ first_memory = set()
+ first_memory = memory_compress(input_text, llm_model_small, llm_model_small, rate=2.5)
+ # time.sleep(5)
+
+ #将记忆加入到图谱中
+ for topic, memory in first_memory:
+ topics = segment_text(topic)
+ print(f"\033[1;34m话题\033[0m: {topic},节点: {topics}, 记忆: {memory}")
+ for split_topic in topics:
+ memory_graph.add_dot(split_topic,memory)
+ for split_topic in topics:
+ for other_split_topic in topics:
+ if split_topic != other_split_topic:
+ memory_graph.connect_dot(split_topic, other_split_topic)
+
+ # memory_graph.store_memory()
+
+ # 展示两种不同的可视化方式
+ print("\n按连接数量着色的图谱:")
+ visualize_graph(memory_graph, color_by_memory=False)
+
+ print("\n按记忆数量着色的图谱:")
+ visualize_graph(memory_graph, color_by_memory=True)
+
+ memory_graph.save_graph_to_db()
+ # memory_graph.load_graph_from_db()
+
+ while True:
+ query = input("请输入新的查询概念(输入'退出'以结束):")
+ if query.lower() == '退出':
+ break
+ items_list = memory_graph.get_related_item(query)
+ if items_list:
+ # print(items_list)
+ for memory_item in items_list:
+ print(memory_item)
+ else:
+ print("未找到相关记忆。")
+
+ while True:
+ query = input("请输入问题:")
+
+ if query.lower() == '退出':
+ break
+
+ topic_prompt = find_topic(query, 3)
+ topic_response = llm_model.generate_response(topic_prompt)
+ # 检查 topic_response 是否为元组
+ if isinstance(topic_response, tuple):
+ topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串
+ else:
+ topics = topic_response.split(",")
+ print(topics)
+
+ for keyword in topics:
+ items_list = memory_graph.get_related_item(keyword)
+ if items_list:
+ print(items_list)
+
+def memory_compress(input_text, llm_model, llm_model_small, rate=1):
+ information_content = calculate_information_content(input_text)
+ print(f"文本的信息量(熵): {information_content:.4f} bits")
+ topic_num = max(1, min(5, int(information_content * rate / 4)))
+ print(topic_num)
+ topic_prompt = find_topic(input_text, topic_num)
+ topic_response = llm_model.generate_response(topic_prompt)
+ # 检查 topic_response 是否为元组
+ if isinstance(topic_response, tuple):
+ topics = topic_response[0].split(",") # 假设第一个元素是我们需要的字符串
+ else:
+ topics = topic_response.split(",")
+ print(topics)
+ compressed_memory = set()
+ for topic in topics:
+ topic_what_prompt = topic_what(input_text,topic)
+ topic_what_response = llm_model_small.generate_response(topic_what_prompt)
+ compressed_memory.add((topic.strip(), topic_what_response[0])) # 将话题和记忆作为元组存储
+ return compressed_memory
+
+
+def segment_text(text):
+ seg_text = list(jieba.cut(text))
+ return seg_text
+
+def find_topic(text, topic_num):
+ prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个话题,帮我列出来,用逗号隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要告诉我其他内容。'
+ return prompt
+
+def topic_what(text, topic):
+ prompt = f'这是一段文字:{text}。我想知道这记忆里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物。只输出这句话就好'
+ return prompt
+
+def visualize_graph(memory_graph: Memory_graph, color_by_memory: bool = False):
+ # 设置中文字体
+ plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
+ plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
+
+ G = memory_graph.G
+
+ # 保存图到本地
+ nx.write_gml(G, "memory_graph.gml") # 保存为 GML 格式
+
+ # 根据连接条数或记忆数量设置节点颜色
+ node_colors = []
+ nodes = list(G.nodes()) # 获取图中实际的节点列表
+
+ if color_by_memory:
+ # 计算每个节点的记忆数量
+ memory_counts = []
+ for node in nodes:
+ memory_items = G.nodes[node].get('memory_items', [])
+ if isinstance(memory_items, list):
+ count = len(memory_items)
+ else:
+ count = 1 if memory_items else 0
+ memory_counts.append(count)
+ max_memories = max(memory_counts) if memory_counts else 1
+
+ for count in memory_counts:
+ # 使用不同的颜色方案:红色表示记忆多,蓝色表示记忆少
+ if max_memories > 0:
+ intensity = min(1.0, count / max_memories)
+ color = (intensity, 0, 1.0 - intensity) # 从蓝色渐变到红色
+ else:
+ color = (0, 0, 1) # 如果没有记忆,则为蓝色
+ node_colors.append(color)
+ else:
+ # 使用原来的连接数量着色方案
+ max_degree = max(G.degree(), key=lambda x: x[1])[1] if G.degree() else 1
+ for node in nodes:
+ degree = G.degree(node)
+ if max_degree > 0:
+ red = min(1.0, degree / max_degree)
+ blue = 1.0 - red
+ color = (red, 0, blue)
+ else:
+ color = (0, 0, 1)
+ node_colors.append(color)
+
+ # 绘制图形
+ plt.figure(figsize=(12, 8))
+ pos = nx.spring_layout(G, k=1, iterations=50)
+ nx.draw(G, pos,
+ with_labels=True,
+ node_color=node_colors,
+ node_size=2000,
+ font_size=10,
+ font_family='SimHei',
+ font_weight='bold')
+
+ title = '记忆图谱可视化 - ' + ('按记忆数量着色' if color_by_memory else '按连接数量着色')
+ plt.title(title, fontsize=16, fontfamily='SimHei')
+ plt.show()
+
+if __name__ == "__main__":
+ main()
+
+
diff --git a/src/plugins/schedule/schedule_generator.py b/src/plugins/schedule/schedule_generator.py
index b1d07214..d1778d87 100644
--- a/src/plugins/schedule/schedule_generator.py
+++ b/src/plugins/schedule/schedule_generator.py
@@ -62,7 +62,7 @@ class ScheduleGenerator:
elif read_only == False:
print(f"{date_str}的日程不存在,准备生成新的日程。")
- prompt = f"""我是{global_config.BOT_NICKNAME},一个地质学大二女大学生,喜欢刷qq,贴吧,知乎和小红书,请为我生成{date_str}({weekday})的日程安排,包括:
+ prompt = f"""我是{global_config.BOT_NICKNAME},一个曾经学习地质,现在学习心理学和脑科学的女大学生,喜欢刷qq,贴吧,知乎和小红书,请为我生成{date_str}({weekday})的日程安排,包括:
1. 早上的学习和工作安排
2. 下午的活动和任务
3. 晚上的计划和休息时间