feat:记忆系统重出江湖,移除了即时记忆和定期记忆
This commit is contained in:
@@ -385,12 +385,6 @@ class HeartFChatting:
|
||||
async with global_prompt_manager.async_message_scope(self.chat_stream.context.get_template_name()):
|
||||
await self.expression_learner.trigger_learning_for_chat()
|
||||
|
||||
# # 记忆构建:为当前chat_id构建记忆
|
||||
# try:
|
||||
# await hippocampus_manager.build_memory_for_chat(self.stream_id)
|
||||
# except Exception as e:
|
||||
# logger.error(f"{self.log_prefix} 记忆构建失败: {e}")
|
||||
|
||||
available_actions: Dict[str, ActionInfo] = {}
|
||||
if random.random() > self.focus_value_control.get_current_focus_value() and mode == ChatMode.FOCUS:
|
||||
# 如果激活度没有激活,并且聊天活跃度低,有可能不进行plan,相当于不在电脑前,不进行认真思考
|
||||
@@ -445,8 +439,8 @@ class HeartFChatting:
|
||||
available_actions=available_actions,
|
||||
)
|
||||
|
||||
for action in action_to_use_info:
|
||||
print(action.action_type)
|
||||
# for action in action_to_use_info:
|
||||
# print(action.action_type)
|
||||
|
||||
# 3. 并行执行所有动作
|
||||
action_tasks = [
|
||||
|
||||
@@ -18,6 +18,7 @@ from src.config.config import global_config, model_config
|
||||
from src.common.data_models.database_data_model import DatabaseMessages
|
||||
from src.common.database.database_model import GraphNodes, GraphEdges # Peewee Models导入
|
||||
from src.common.logger import get_logger
|
||||
from src.chat.utils.utils import cut_key_words
|
||||
from src.chat.utils.chat_message_builder import (
|
||||
build_readable_messages,
|
||||
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||
@@ -98,19 +99,23 @@ class MemoryGraph:
|
||||
current_weight = self.G.nodes[concept].get("weight", 0.0)
|
||||
self.G.nodes[concept]["weight"] = current_weight + 1.0
|
||||
logger.debug(f"节点 {concept} 记忆整合成功,权重增加到 {current_weight + 1.0}")
|
||||
logger.info(f"节点 {concept} 记忆内容已更新:{integrated_memory}")
|
||||
except Exception as e:
|
||||
logger.error(f"LLM整合记忆失败: {e}")
|
||||
# 降级到简单连接
|
||||
new_memory_str = f"{existing_memory} | {memory}"
|
||||
self.G.nodes[concept]["memory_items"] = new_memory_str
|
||||
logger.info(f"节点 {concept} 记忆内容已简单拼接并更新:{new_memory_str}")
|
||||
else:
|
||||
new_memory_str = str(memory)
|
||||
self.G.nodes[concept]["memory_items"] = new_memory_str
|
||||
logger.info(f"节点 {concept} 记忆内容已直接更新:{new_memory_str}")
|
||||
else:
|
||||
self.G.nodes[concept]["memory_items"] = str(memory)
|
||||
# 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time
|
||||
if "created_time" not in self.G.nodes[concept]:
|
||||
self.G.nodes[concept]["created_time"] = current_time
|
||||
logger.info(f"节点 {concept} 创建新记忆:{str(memory)}")
|
||||
# 更新最后修改时间
|
||||
self.G.nodes[concept]["last_modified"] = current_time
|
||||
else:
|
||||
@@ -122,6 +127,7 @@ class MemoryGraph:
|
||||
created_time=current_time, # 添加创建时间
|
||||
last_modified=current_time,
|
||||
) # 添加最后修改时间
|
||||
logger.info(f"新节点 {concept} 已添加,记忆内容已写入:{str(memory)}")
|
||||
|
||||
def get_dot(self, concept):
|
||||
# 检查节点是否存在于图中
|
||||
@@ -402,9 +408,7 @@ class Hippocampus:
|
||||
text_length = len(text)
|
||||
topic_num: int | list[int] = 0
|
||||
|
||||
words = jieba.cut(text)
|
||||
keywords_lite = [word for word in words if len(word) > 1]
|
||||
keywords_lite = list(set(keywords_lite))
|
||||
keywords_lite = cut_key_words(text)
|
||||
if keywords_lite:
|
||||
logger.debug(f"提取关键词极简版: {keywords_lite}")
|
||||
|
||||
@@ -1159,6 +1163,131 @@ class ParahippocampalGyrus:
|
||||
|
||||
return compressed_memory, similar_topics_dict
|
||||
|
||||
def get_similar_topics_from_keywords(
|
||||
self,
|
||||
keywords: list[str] | str,
|
||||
top_k: int = 3,
|
||||
threshold: float = 0.7,
|
||||
) -> dict[str, list[tuple[str, float]]]:
|
||||
"""基于输入的关键词,返回每个关键词对应的相似主题列表。
|
||||
|
||||
Args:
|
||||
keywords: 关键词列表或以逗号/空格/顿号分隔的字符串。
|
||||
top_k: 每个关键词返回的相似主题数量上限。
|
||||
threshold: 相似度阈值,低于该值的主题将被过滤。
|
||||
|
||||
Returns:
|
||||
dict[str, list[tuple[str, float]]]: {keyword: [(topic, similarity), ...]}
|
||||
"""
|
||||
# 规范化输入为列表[str]
|
||||
if isinstance(keywords, str):
|
||||
# 支持中英文逗号、顿号、空格分隔
|
||||
parts = (
|
||||
keywords.replace(",", ",").replace("、", ",").replace(" ", ",").strip(", ")
|
||||
)
|
||||
keyword_list = [p.strip() for p in parts.split(",") if p.strip()]
|
||||
else:
|
||||
keyword_list = [k.strip() for k in keywords if isinstance(k, str) and k.strip()]
|
||||
|
||||
if not keyword_list:
|
||||
return {}
|
||||
|
||||
existing_topics = list(self.memory_graph.G.nodes())
|
||||
result: dict[str, list[tuple[str, float]]] = {}
|
||||
|
||||
for kw in keyword_list:
|
||||
kw_words = set(jieba.cut(kw))
|
||||
similar_topics: list[tuple[str, float]] = []
|
||||
|
||||
for topic in existing_topics:
|
||||
topic_words = set(jieba.cut(topic))
|
||||
all_words = kw_words | topic_words
|
||||
if not all_words:
|
||||
continue
|
||||
v1 = [1 if w in kw_words else 0 for w in all_words]
|
||||
v2 = [1 if w in topic_words else 0 for w in all_words]
|
||||
sim = cosine_similarity(v1, v2)
|
||||
if sim >= threshold:
|
||||
similar_topics.append((topic, sim))
|
||||
|
||||
similar_topics.sort(key=lambda x: x[1], reverse=True)
|
||||
result[kw] = similar_topics[:top_k]
|
||||
|
||||
return result
|
||||
|
||||
async def add_memory_with_similar(
|
||||
self,
|
||||
memory_item: str,
|
||||
similar_topics_dict: dict[str, list[tuple[str, float]]],
|
||||
) -> bool:
|
||||
"""将单条记忆内容与相似主题写入记忆网络并同步数据库。
|
||||
|
||||
按 build_memory_for_chat 的方式:为 similar_topics_dict 的每个键作为主题添加节点内容,
|
||||
并与其相似主题建立连接,连接强度为 int(similarity * 10)。
|
||||
|
||||
Args:
|
||||
memory_item: 记忆内容字符串,将作为每个主题节点的 memory_items。
|
||||
similar_topics_dict: {topic: [(similar_topic, similarity), ...]}
|
||||
|
||||
Returns:
|
||||
bool: 是否成功执行添加与同步。
|
||||
"""
|
||||
try:
|
||||
if not memory_item or not isinstance(memory_item, str):
|
||||
return False
|
||||
|
||||
if not similar_topics_dict or not isinstance(similar_topics_dict, dict):
|
||||
return False
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
# 为每个主题写入节点
|
||||
for topic, similar_list in similar_topics_dict.items():
|
||||
if not topic or not isinstance(topic, str):
|
||||
continue
|
||||
|
||||
await self.hippocampus.memory_graph.add_dot(topic, memory_item, self.hippocampus)
|
||||
|
||||
# 连接相似主题
|
||||
if isinstance(similar_list, list):
|
||||
for item in similar_list:
|
||||
try:
|
||||
similar_topic, similarity = item
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(similar_topic, str):
|
||||
continue
|
||||
if topic == similar_topic:
|
||||
continue
|
||||
# 强度按 build_memory_for_chat 的规则
|
||||
strength = int(max(0.0, float(similarity)) * 10) if similarity is not None else 0
|
||||
if strength <= 0:
|
||||
continue
|
||||
# 确保相似主题节点存在(如果没有,也可以只建立边,networkx会创建节点,但需初始化属性)
|
||||
if similar_topic not in self.memory_graph.G:
|
||||
# 创建一个空的相似主题节点,避免悬空边,memory_items 为空字符串
|
||||
self.memory_graph.G.add_node(
|
||||
similar_topic,
|
||||
memory_items="",
|
||||
weight=1.0,
|
||||
created_time=current_time,
|
||||
last_modified=current_time,
|
||||
)
|
||||
self.memory_graph.G.add_edge(
|
||||
topic,
|
||||
similar_topic,
|
||||
strength=strength,
|
||||
created_time=current_time,
|
||||
last_modified=current_time,
|
||||
)
|
||||
|
||||
# 同步数据库
|
||||
await self.hippocampus.entorhinal_cortex.sync_memory_to_db()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"添加记忆节点失败: {e}")
|
||||
return False
|
||||
|
||||
async def operation_forget_topic(self, percentage=0.005):
|
||||
start_time = time.time()
|
||||
logger.info("[遗忘] 开始检查数据库...")
|
||||
@@ -1325,7 +1454,6 @@ class HippocampusManager:
|
||||
logger.info(f"""
|
||||
--------------------------------
|
||||
记忆系统参数配置:
|
||||
构建频率: {global_config.memory.memory_build_frequency}秒|压缩率: {global_config.memory.memory_compress_rate}
|
||||
遗忘间隔: {global_config.memory.forget_memory_interval}秒|遗忘比例: {global_config.memory.memory_forget_percentage}|遗忘: {global_config.memory.memory_forget_time}小时之后
|
||||
记忆图统计信息: 节点数量: {node_count}, 连接数量: {edge_count}
|
||||
--------------------------------""") # noqa: E501
|
||||
@@ -1343,61 +1471,6 @@ class HippocampusManager:
|
||||
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
|
||||
return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage)
|
||||
|
||||
async def build_memory_for_chat(self, chat_id: str):
|
||||
"""为指定chat_id构建记忆(在heartFC_chat.py中调用)"""
|
||||
if not self._initialized:
|
||||
raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法")
|
||||
|
||||
try:
|
||||
# 检查是否需要构建记忆
|
||||
logger.info(f"为 {chat_id} 构建记忆")
|
||||
if memory_segment_manager.check_and_build_memory_for_chat(chat_id):
|
||||
logger.info(f"为 {chat_id} 构建记忆,需要构建记忆")
|
||||
messages = memory_segment_manager.get_messages_for_memory_build(chat_id, 50)
|
||||
|
||||
build_probability = 0.3 * global_config.memory.memory_build_frequency
|
||||
|
||||
if messages and random.random() < build_probability:
|
||||
logger.info(f"为 {chat_id} 构建记忆,消息数量: {len(messages)}")
|
||||
|
||||
# 调用记忆压缩和构建
|
||||
(
|
||||
compressed_memory,
|
||||
similar_topics_dict,
|
||||
) = await self._hippocampus.parahippocampal_gyrus.memory_compress(
|
||||
messages, global_config.memory.memory_compress_rate
|
||||
)
|
||||
|
||||
# 添加记忆节点
|
||||
current_time = time.time()
|
||||
for topic, memory in compressed_memory:
|
||||
await self._hippocampus.memory_graph.add_dot(topic, memory, self._hippocampus)
|
||||
|
||||
# 连接相似主题
|
||||
if topic in similar_topics_dict:
|
||||
similar_topics = similar_topics_dict[topic]
|
||||
for similar_topic, similarity in similar_topics:
|
||||
if topic != similar_topic:
|
||||
strength = int(similarity * 10)
|
||||
self._hippocampus.memory_graph.G.add_edge(
|
||||
topic,
|
||||
similar_topic,
|
||||
strength=strength,
|
||||
created_time=current_time,
|
||||
last_modified=current_time,
|
||||
)
|
||||
|
||||
# 同步到数据库
|
||||
await self._hippocampus.entorhinal_cortex.sync_memory_to_db()
|
||||
logger.info(f"为 {chat_id} 构建记忆完成")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"为 {chat_id} 构建记忆失败: {e}")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
async def get_memory_from_topic(
|
||||
self, valid_keywords: list[str], max_memory_num: int = 3, max_memory_length: int = 2, max_depth: int = 3
|
||||
) -> list:
|
||||
@@ -1441,89 +1514,3 @@ class HippocampusManager:
|
||||
|
||||
# 创建全局实例
|
||||
hippocampus_manager = HippocampusManager()
|
||||
|
||||
|
||||
# 在Hippocampus类中添加新的记忆构建管理器
|
||||
class MemoryBuilder:
|
||||
"""记忆构建器
|
||||
|
||||
为每个chat_id维护消息缓存和触发机制,类似ExpressionLearner
|
||||
"""
|
||||
|
||||
def __init__(self, chat_id: str):
|
||||
self.chat_id = chat_id
|
||||
self.last_update_time: float = time.time()
|
||||
self.last_processed_time: float = 0.0
|
||||
|
||||
def should_trigger_memory_build(self) -> bool:
|
||||
# sourcery skip: assign-if-exp, boolean-if-exp-identity, reintroduce-else
|
||||
"""检查是否应该触发记忆构建"""
|
||||
current_time = time.time()
|
||||
|
||||
# 检查时间间隔
|
||||
time_diff = current_time - self.last_update_time
|
||||
if time_diff < 600 / global_config.memory.memory_build_frequency:
|
||||
return False
|
||||
|
||||
# 检查消息数量
|
||||
|
||||
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||
chat_id=self.chat_id,
|
||||
timestamp_start=self.last_update_time,
|
||||
timestamp_end=current_time,
|
||||
)
|
||||
|
||||
logger.info(f"最近消息数量: {len(recent_messages)},间隔时间: {time_diff}")
|
||||
|
||||
if not recent_messages or len(recent_messages) < 30 / global_config.memory.memory_build_frequency:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_messages_for_memory_build(self, threshold: int = 25) -> List[DatabaseMessages]:
|
||||
"""获取用于记忆构建的消息"""
|
||||
current_time = time.time()
|
||||
|
||||
messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||
chat_id=self.chat_id,
|
||||
timestamp_start=self.last_update_time,
|
||||
timestamp_end=current_time,
|
||||
limit=threshold,
|
||||
)
|
||||
if messages:
|
||||
# 更新最后处理时间
|
||||
self.last_processed_time = current_time
|
||||
self.last_update_time = current_time
|
||||
|
||||
return messages or []
|
||||
|
||||
|
||||
class MemorySegmentManager:
|
||||
"""记忆段管理器
|
||||
|
||||
管理所有chat_id的MemoryBuilder实例,自动检查和触发记忆构建
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.builders: Dict[str, MemoryBuilder] = {}
|
||||
|
||||
def get_or_create_builder(self, chat_id: str) -> MemoryBuilder:
|
||||
"""获取或创建指定chat_id的MemoryBuilder"""
|
||||
if chat_id not in self.builders:
|
||||
self.builders[chat_id] = MemoryBuilder(chat_id)
|
||||
return self.builders[chat_id]
|
||||
|
||||
def check_and_build_memory_for_chat(self, chat_id: str) -> bool:
|
||||
"""检查指定chat_id是否需要构建记忆,如果需要则返回True"""
|
||||
builder = self.get_or_create_builder(chat_id)
|
||||
return builder.should_trigger_memory_build()
|
||||
|
||||
def get_messages_for_memory_build(self, chat_id: str, threshold: int = 25) -> List[DatabaseMessages]:
|
||||
"""获取指定chat_id用于记忆构建的消息"""
|
||||
if chat_id not in self.builders:
|
||||
return []
|
||||
return self.builders[chat_id].get_messages_for_memory_build(threshold)
|
||||
|
||||
|
||||
# 创建全局实例
|
||||
memory_segment_manager = MemorySegmentManager()
|
||||
|
||||
@@ -1,254 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import ast
|
||||
import traceback
|
||||
|
||||
from json_repair import repair_json
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Memory # Peewee Models导入
|
||||
from src.config.config import model_config, global_config
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MemoryItem:
|
||||
def __init__(self, memory_id: str, chat_id: str, memory_text: str, keywords: list[str]):
|
||||
self.memory_id = memory_id
|
||||
self.chat_id = chat_id
|
||||
self.memory_text: str = memory_text
|
||||
self.keywords: list[str] = keywords
|
||||
self.create_time: float = time.time()
|
||||
self.last_view_time: float = time.time()
|
||||
|
||||
|
||||
class MemoryManager:
|
||||
def __init__(self):
|
||||
# self.memory_items:list[MemoryItem] = []
|
||||
pass
|
||||
|
||||
|
||||
class InstantMemory:
|
||||
def __init__(self, chat_id):
|
||||
self.chat_id = chat_id
|
||||
self.last_view_time = time.time()
|
||||
self.summary_model = LLMRequest(
|
||||
model_set=model_config.model_task_config.utils,
|
||||
request_type="memory.summary",
|
||||
)
|
||||
|
||||
async def if_need_build(self, text: str):
|
||||
prompt = f"""
|
||||
请判断以下内容中是否有值得记忆的信息,如果有,请输出1,否则输出0
|
||||
{text}
|
||||
请只输出1或0就好
|
||||
"""
|
||||
|
||||
try:
|
||||
response, _ = await self.summary_model.generate_response_async(prompt, temperature=0.5)
|
||||
if global_config.debug.show_prompt:
|
||||
print(prompt)
|
||||
print(response)
|
||||
|
||||
return "1" in response
|
||||
except Exception as e:
|
||||
logger.error(f"判断是否需要记忆出现错误:{str(e)} {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
async def build_memory(self, text):
|
||||
prompt = f"""
|
||||
以下内容中存在值得记忆的信息,请你从中总结出一段值得记忆的信息,并输出
|
||||
{text}
|
||||
请以json格式输出一段概括的记忆内容和关键词
|
||||
{{
|
||||
"memory_text": "记忆内容",
|
||||
"keywords": "关键词,用/划分"
|
||||
}}
|
||||
"""
|
||||
try:
|
||||
response, _ = await self.summary_model.generate_response_async(prompt, temperature=0.5)
|
||||
# print(prompt)
|
||||
# print(response)
|
||||
if not response:
|
||||
return None
|
||||
try:
|
||||
repaired = repair_json(response)
|
||||
result = json.loads(repaired)
|
||||
memory_text = result.get("memory_text", "")
|
||||
keywords = result.get("keywords", "")
|
||||
if isinstance(keywords, str):
|
||||
keywords_list = [k.strip() for k in keywords.split("/") if k.strip()]
|
||||
elif isinstance(keywords, list):
|
||||
keywords_list = keywords
|
||||
else:
|
||||
keywords_list = []
|
||||
return {"memory_text": memory_text, "keywords": keywords_list}
|
||||
except Exception as parse_e:
|
||||
logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"构建记忆出现错误:{str(e)} {traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
async def create_and_store_memory(self, text: str):
|
||||
if_need = await self.if_need_build(text)
|
||||
if if_need:
|
||||
logger.info(f"需要记忆:{text}")
|
||||
memory = await self.build_memory(text)
|
||||
if memory and memory.get("memory_text"):
|
||||
memory_id = f"{self.chat_id}_{time.time()}"
|
||||
memory_item = MemoryItem(
|
||||
memory_id=memory_id,
|
||||
chat_id=self.chat_id,
|
||||
memory_text=memory["memory_text"],
|
||||
keywords=memory.get("keywords", []),
|
||||
)
|
||||
await self.store_memory(memory_item)
|
||||
else:
|
||||
logger.info(f"不需要记忆:{text}")
|
||||
|
||||
async def store_memory(self, memory_item: MemoryItem):
|
||||
memory = Memory(
|
||||
memory_id=memory_item.memory_id,
|
||||
chat_id=memory_item.chat_id,
|
||||
memory_text=memory_item.memory_text,
|
||||
keywords=memory_item.keywords,
|
||||
create_time=memory_item.create_time,
|
||||
last_view_time=memory_item.last_view_time,
|
||||
)
|
||||
memory.save()
|
||||
|
||||
async def get_memory(self, target: str):
|
||||
from json_repair import repair_json
|
||||
|
||||
prompt = f"""
|
||||
请根据以下发言内容,判断是否需要提取记忆
|
||||
{target}
|
||||
请用json格式输出,包含以下字段:
|
||||
其中,time的要求是:
|
||||
可以选择具体日期时间,格式为YYYY-MM-DD HH:MM:SS,或者大致时间,格式为YYYY-MM-DD
|
||||
可以选择相对时间,例如:今天,昨天,前天,5天前,1个月前
|
||||
可以选择留空进行模糊搜索
|
||||
{{
|
||||
"need_memory": 1,
|
||||
"keywords": "希望获取的记忆关键词,用/划分",
|
||||
"time": "希望获取的记忆大致时间"
|
||||
}}
|
||||
请只输出json格式,不要输出其他多余内容
|
||||
"""
|
||||
try:
|
||||
response, _ = await self.summary_model.generate_response_async(prompt, temperature=0.5)
|
||||
if global_config.debug.show_prompt:
|
||||
print(prompt)
|
||||
print(response)
|
||||
if not response:
|
||||
return None
|
||||
try:
|
||||
repaired = repair_json(response)
|
||||
result = json.loads(repaired)
|
||||
# 解析keywords
|
||||
keywords = result.get("keywords", "")
|
||||
if isinstance(keywords, str):
|
||||
keywords_list = [k.strip() for k in keywords.split("/") if k.strip()]
|
||||
elif isinstance(keywords, list):
|
||||
keywords_list = keywords
|
||||
else:
|
||||
keywords_list = []
|
||||
# 解析time为时间段
|
||||
time_str = result.get("time", "").strip()
|
||||
start_time, end_time = self._parse_time_range(time_str)
|
||||
logger.info(f"start_time: {start_time}, end_time: {end_time}")
|
||||
# 检索包含关键词的记忆
|
||||
memories_set = set()
|
||||
if start_time and end_time:
|
||||
start_ts = start_time.timestamp()
|
||||
end_ts = end_time.timestamp()
|
||||
query = Memory.select().where(
|
||||
(Memory.chat_id == self.chat_id)
|
||||
& (Memory.create_time >= start_ts) # type: ignore
|
||||
& (Memory.create_time < end_ts) # type: ignore
|
||||
)
|
||||
else:
|
||||
query = Memory.select().where(Memory.chat_id == self.chat_id)
|
||||
|
||||
for mem in query:
|
||||
# 对每条记忆
|
||||
mem_keywords = mem.keywords or ""
|
||||
parsed = ast.literal_eval(mem_keywords)
|
||||
if isinstance(parsed, list):
|
||||
mem_keywords = [str(k).strip() for k in parsed if str(k).strip()]
|
||||
else:
|
||||
mem_keywords = []
|
||||
# logger.info(f"mem_keywords: {mem_keywords}")
|
||||
# logger.info(f"keywords_list: {keywords_list}")
|
||||
for kw in keywords_list:
|
||||
# logger.info(f"kw: {kw}")
|
||||
# logger.info(f"kw in mem_keywords: {kw in mem_keywords}")
|
||||
if kw in mem_keywords:
|
||||
# logger.info(f"mem.memory_text: {mem.memory_text}")
|
||||
memories_set.add(mem.memory_text)
|
||||
break
|
||||
return list(memories_set)
|
||||
except Exception as parse_e:
|
||||
logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"获取记忆出现错误:{str(e)} {traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
def _parse_time_range(self, time_str):
|
||||
# sourcery skip: extract-duplicate-method, use-contextlib-suppress
|
||||
"""
|
||||
支持解析如下格式:
|
||||
- 具体日期时间:YYYY-MM-DD HH:MM:SS
|
||||
- 具体日期:YYYY-MM-DD
|
||||
- 相对时间:今天,昨天,前天,N天前,N个月前
|
||||
- 空字符串:返回(None, None)
|
||||
"""
|
||||
now = datetime.now()
|
||||
if not time_str:
|
||||
return 0, now
|
||||
time_str = time_str.strip()
|
||||
# 具体日期时间
|
||||
try:
|
||||
dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
|
||||
return dt, dt + timedelta(hours=1)
|
||||
except Exception:
|
||||
pass
|
||||
# 具体日期
|
||||
try:
|
||||
dt = datetime.strptime(time_str, "%Y-%m-%d")
|
||||
return dt, dt + timedelta(days=1)
|
||||
except Exception:
|
||||
pass
|
||||
# 相对时间
|
||||
if time_str == "今天":
|
||||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
if time_str == "昨天":
|
||||
start = (now - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
if time_str == "前天":
|
||||
start = (now - timedelta(days=2)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
if m := re.match(r"(\d+)天前", time_str):
|
||||
days = int(m.group(1))
|
||||
start = (now - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
if m := re.match(r"(\d+)个月前", time_str):
|
||||
months = int(m.group(1))
|
||||
# 近似每月30天
|
||||
start = (now - timedelta(days=months * 30)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
end = start + timedelta(days=1)
|
||||
return start, end
|
||||
# 其他无法解析
|
||||
return 0, now
|
||||
@@ -26,7 +26,6 @@ from src.chat.utils.chat_message_builder import (
|
||||
)
|
||||
from src.chat.express.expression_selector import expression_selector
|
||||
from src.chat.memory_system.memory_activator import MemoryActivator
|
||||
from src.chat.memory_system.instant_memory import InstantMemory
|
||||
from src.mood.mood_manager import mood_manager
|
||||
from src.person_info.person_info import Person, is_person_known
|
||||
from src.plugin_system.base.component_types import ActionInfo, EventType
|
||||
@@ -147,7 +146,6 @@ class DefaultReplyer:
|
||||
self.is_group_chat, self.chat_target_info = get_chat_type_and_target_info(self.chat_stream.stream_id)
|
||||
self.heart_fc_sender = HeartFCSender()
|
||||
self.memory_activator = MemoryActivator()
|
||||
self.instant_memory = InstantMemory(chat_id=self.chat_stream.stream_id)
|
||||
|
||||
from src.plugin_system.core.tool_use import ToolExecutor # 延迟导入ToolExecutor,不然会循环依赖
|
||||
|
||||
@@ -375,20 +373,11 @@ class DefaultReplyer:
|
||||
|
||||
instant_memory = None
|
||||
|
||||
# running_memories = await self.memory_activator.activate_memory_with_chat_history(
|
||||
# target_message=target, chat_history=chat_history
|
||||
# )
|
||||
running_memories = await self.memory_activator.activate_memory_with_chat_history(
|
||||
target_message=target, chat_history=chat_history
|
||||
)
|
||||
running_memories = None
|
||||
|
||||
if global_config.memory.enable_instant_memory:
|
||||
chat_history_str = build_readable_messages(
|
||||
messages=chat_history, replace_bot_name=True, timestamp_mode="normal"
|
||||
)
|
||||
asyncio.create_task(self.instant_memory.create_and_store_memory(chat_history_str))
|
||||
|
||||
instant_memory = await self.instant_memory.get_memory(target)
|
||||
logger.info(f"即时记忆:{instant_memory}")
|
||||
|
||||
if not running_memories:
|
||||
return ""
|
||||
|
||||
|
||||
@@ -834,3 +834,79 @@ def parse_keywords_string(keywords_input) -> list[str]:
|
||||
|
||||
# 如果没有分隔符,返回单个关键词
|
||||
return [keywords_str] if keywords_str else []
|
||||
|
||||
|
||||
|
||||
|
||||
def cut_key_words(concept_name: str) -> list[str]:
|
||||
"""对概念名称进行jieba分词,并过滤掉关键词列表中的关键词"""
|
||||
concept_name_tokens = list(jieba.cut(concept_name))
|
||||
|
||||
# 定义常见连词、停用词与标点
|
||||
conjunctions = {
|
||||
"和", "与", "及", "跟", "以及", "并且", "而且", "或", "或者", "并"
|
||||
}
|
||||
stop_words = {
|
||||
"的", "了", "呢", "吗", "吧", "啊", "哦", "恩", "嗯", "呀", "嘛", "哇",
|
||||
"在", "是", "很", "也", "又", "就", "都", "还", "更", "最", "被", "把",
|
||||
"给", "对", "和", "与", "及", "跟", "并", "而且", "或者", "或", "以及"
|
||||
}
|
||||
chinese_punctuations = set(",。!?、;:()【】《》“”‘’—…·-——,.!?;:()[]<>'\"/\\")
|
||||
|
||||
# 清理空白并初步过滤纯标点
|
||||
cleaned_tokens = []
|
||||
for tok in concept_name_tokens:
|
||||
t = tok.strip()
|
||||
if not t:
|
||||
continue
|
||||
# 去除纯标点
|
||||
if all(ch in chinese_punctuations for ch in t):
|
||||
continue
|
||||
cleaned_tokens.append(t)
|
||||
|
||||
# 合并连词两侧的词(仅当两侧都存在且不是标点/停用词时)
|
||||
merged_tokens = []
|
||||
i = 0
|
||||
n = len(cleaned_tokens)
|
||||
while i < n:
|
||||
tok = cleaned_tokens[i]
|
||||
if tok in conjunctions and merged_tokens and i + 1 < n:
|
||||
left = merged_tokens[-1]
|
||||
right = cleaned_tokens[i + 1]
|
||||
# 左右都需要是有效词
|
||||
if left and right \
|
||||
and left not in conjunctions and right not in conjunctions \
|
||||
and left not in stop_words and right not in stop_words \
|
||||
and not all(ch in chinese_punctuations for ch in left) \
|
||||
and not all(ch in chinese_punctuations for ch in right):
|
||||
# 合并为一个新词,并替换掉左侧与跳过右侧
|
||||
combined = f"{left}{tok}{right}"
|
||||
merged_tokens[-1] = combined
|
||||
i += 2
|
||||
continue
|
||||
# 常规推进
|
||||
merged_tokens.append(tok)
|
||||
i += 1
|
||||
|
||||
# 二次过滤:去除停用词、单字符纯标点与无意义项
|
||||
result_tokens = []
|
||||
seen = set()
|
||||
# ban_words = set(getattr(global_config.memory, "memory_ban_words", []) or [])
|
||||
for tok in merged_tokens:
|
||||
if tok in conjunctions:
|
||||
# 独立连词丢弃
|
||||
continue
|
||||
if tok in stop_words:
|
||||
continue
|
||||
# if tok in ban_words:
|
||||
# continue
|
||||
if all(ch in chinese_punctuations for ch in tok):
|
||||
continue
|
||||
if tok.strip() == "":
|
||||
continue
|
||||
if tok not in seen:
|
||||
seen.add(tok)
|
||||
result_tokens.append(tok)
|
||||
|
||||
filtered_concept_name_tokens = result_tokens
|
||||
return filtered_concept_name_tokens
|
||||
Reference in New Issue
Block a user