feat:将记忆配置项添加到配置文件

This commit is contained in:
SengokuCola
2025-10-08 18:45:06 +08:00
parent e0a5cd5922
commit 16ae212adc
6 changed files with 253 additions and 282 deletions

View File

@@ -184,16 +184,16 @@ class HeartFChatting:
)
question_probability = 0
if time.time() - self.last_active_time > 1200:
question_probability = 0.04
elif time.time() - self.last_active_time > 600:
question_probability = 0.02
elif time.time() - self.last_active_time > 300:
if time.time() - self.last_active_time > 3600:
question_probability = 0.01
elif time.time() - self.last_active_time > 1200:
question_probability = 0.005
else:
elif time.time() - self.last_active_time > 600:
question_probability = 0.001
else:
question_probability = 0.0003
question_probability = question_probability * global_config.chat.auto_chat_value
question_probability = question_probability * global_config.chat.get_auto_chat_value(self.stream_id)
# print(f"{self.log_prefix} questioned: {self.questioned},len: {len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id))}")
if question_probability > 0 and not self.questioned and len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id)) == 0: #长久没有回复,可以试试主动发言,提问概率随着时间增加
@@ -335,8 +335,6 @@ class HeartFChatting:
await global_memory_chest.build_running_content(chat_id=self.stream_id)
cycle_timers, thinking_id = self.start_cycle()
logger.info(f"{self.log_prefix} 开始第{self._cycle_counter}次思考")

View File

@@ -926,202 +926,6 @@ async def build_anonymous_messages(messages: List[DatabaseMessages]) -> str:
return formatted_string
def build_readable_messages_anonymized(
messages: List[DatabaseMessages],
timestamp_mode: str = "relative",
show_actions: bool = False,
show_pic: bool = True,
replace_bot_name: bool = True,
remove_emoji_stickers: bool = False,
) -> Tuple[str, Dict[str, str]]:
"""
仿照 build_readable_messages构建匿名化的可读消息
- 所有用户名替换为 用户A、用户B、...、用户Z、用户AA、用户AB ...
- 内容中的 回复<aaa:bbb> 与 @<aaa:bbb> 也替换为匿名名
Returns:
formatted_string: 格式化后的聊天记录字符串
mapping: 原始显示用户名 -> 匿名名 的映射表
"""
if not messages:
return "", {}
# 生成匿名标签A..Z, AA..AZ, BA.. 等
def alphabet_labels() -> Iterable[str]:
import string
letters = string.ascii_uppercase
# 单字母
for ch in letters:
yield ch
# 多字母(简单生成两位,若需要可继续扩展)
for a in letters:
for b in letters:
yield f"{a}{b}"
label_iter = alphabet_labels()
user_to_label: Dict[Tuple[str, str], str] = {}
name_mapping: Dict[str, str] = {}
def get_display_name(platform: str, user_id: str, user_nickname: str, user_cardname: Optional[str]) -> str:
person = Person(platform=platform, user_id=user_id)
return person.person_name or f"{user_nickname}" or (f"昵称:{user_cardname}" if user_cardname else "某人")
def get_anon_name(platform: str, user_id: str, user_nickname: str, user_cardname: Optional[str]) -> str:
key = (platform or "", user_id or "")
# 机器人处理:若需要替换机器人名称,则直接返回 昵称(你)
if replace_bot_name and user_id == global_config.bot.qq_account:
anon = f"{global_config.bot.nickname}(你)"
original_display = get_display_name(platform, user_id, user_nickname, user_cardname)
if original_display not in name_mapping:
name_mapping[original_display] = anon
return anon
if key not in user_to_label:
user_to_label[key] = f"用户{next(label_iter)}"
anon = user_to_label[key]
# 记录原始显示名到匿名名(可能重复显示名时后写覆盖)
original_display = get_display_name(platform, user_id, user_nickname, user_cardname)
if original_display not in name_mapping:
name_mapping[original_display] = anon
return anon
# 如果启用移除表情包,先过滤消息
if remove_emoji_stickers:
filtered_messages = []
for msg in messages:
# 获取消息内容
content = msg.display_message or msg.processed_plain_text or ""
# 移除表情包
emoji_pattern = r"\[表情包:[^\]]+\]"
content = re.sub(emoji_pattern, "", content)
# 如果移除表情包后内容不为空,则保留消息
if content.strip():
filtered_messages.append(msg)
messages = filtered_messages
# 将 DatabaseMessages 转换为可处理结构,并可选拼入动作
copy_messages: List[MessageAndActionModel] = []
for msg in messages:
if remove_emoji_stickers:
# 创建 MessageAndActionModel 但移除表情包
model = MessageAndActionModel.from_DatabaseMessages(msg)
# 移除表情包
if model.display_message:
model.display_message = re.sub(r"\[表情包:[^\]]+\]", "", model.display_message)
if model.processed_plain_text:
model.processed_plain_text = re.sub(r"\[表情包:[^\]]+\]", "", model.processed_plain_text)
copy_messages.append(model)
else:
copy_messages.append(MessageAndActionModel.from_DatabaseMessages(msg))
if show_actions and copy_messages:
min_time = min(msg.time or 0 for msg in copy_messages)
max_time = max(msg.time or 0 for msg in copy_messages)
chat_id = messages[0].chat_id if messages else None
actions_in_range = (
ActionRecords.select()
.where((ActionRecords.time >= min_time) & (ActionRecords.time <= max_time) & (ActionRecords.chat_id == chat_id))
.order_by(ActionRecords.time)
)
action_after_latest = (
ActionRecords.select()
.where((ActionRecords.time > max_time) & (ActionRecords.chat_id == chat_id))
.order_by(ActionRecords.time)
.limit(1)
)
actions: List[ActionRecords] = list(actions_in_range) + list(action_after_latest)
for action in actions:
if action.action_build_into_prompt:
action_msg = MessageAndActionModel(
time=float(action.time), # type: ignore
user_id=global_config.bot.qq_account,
user_platform=global_config.bot.platform,
user_nickname=global_config.bot.nickname,
user_cardname="",
processed_plain_text=f"{action.action_prompt_display}",
display_message=f"{action.action_prompt_display}",
chat_info_platform=str(action.chat_info_platform),
is_action_record=True,
action_name=str(action.action_name),
)
copy_messages.append(action_msg)
copy_messages.sort(key=lambda x: x.time or 0)
# 图片替换帮助
def process_pic_ids(content: Optional[str]) -> str:
if content is None:
return ""
pic_pattern = r"\[picid:([^\]]+)\]"
def replace_pic_id(_m: re.Match) -> str:
return "[图片]" if show_pic else ""
return re.sub(pic_pattern, replace_pic_id, content)
# 内容引用替换的 resolver将 <aaa:bbb> / @<aaa:bbb> 中的 bbb 映射为匿名名
def anon_name_resolver(platform: str, user_id: str) -> str:
try:
# 与主流程一致处理机器人名字
if replace_bot_name and user_id == global_config.bot.qq_account:
return f"{global_config.bot.nickname}(你)"
return get_anon_name(platform, user_id, "", None)
except Exception:
return "用户?"
# 构建结果
detailed: List[Tuple[float, str, str, bool]] = []
for m in copy_messages:
if m.is_action_record:
content = process_pic_ids(m.display_message)
detailed.append((m.time or 0.0, "", content, True))
continue
platform = m.user_platform
user_id = m.user_id
user_nickname = m.user_nickname
user_cardname = m.user_cardname
content = m.display_message or m.processed_plain_text or ""
content = process_pic_ids(content)
anon_name = get_anon_name(platform, user_id, user_nickname, user_cardname)
try:
content = replace_user_references(content, platform, anon_name_resolver, replace_bot_name=False)
except Exception:
pass
detailed.append((m.time or 0.0, anon_name, content, False))
if not detailed:
return "", name_mapping
detailed.sort(key=lambda x: x[0])
output_lines: List[str] = []
for ts, name, content, is_action in detailed:
readable_time = translate_timestamp_to_human_readable(ts, mode=timestamp_mode)
if is_action:
output_lines.append(f"{readable_time}, {content}")
else:
output_lines.append(f"{readable_time}, {name}: {content}")
output_lines.append("\n")
formatted_string = "".join(output_lines).strip()
# 最后对完整字符串再按映射表做一次替换,处理正文里直接出现的原始昵称
if name_mapping:
for original_name, anon_name in sorted(name_mapping.items(), key=lambda x: len(x[0]), reverse=True):
if original_name:
formatted_string = formatted_string.replace(original_name, anon_name)
return formatted_string, name_mapping
async def get_person_id_list(messages: List[Dict[str, Any]]) -> List[str]:
"""

View File

@@ -108,6 +108,23 @@ class ChatConfig(ConfigBase):
时间区间支持跨夜,例如 "23:00-02:00"
"""
auto_chat_value_rules: list[dict] = field(default_factory=lambda: [])
"""
自动聊天频率规则列表,支持按聊天流/按日内时段配置。
规则格式:{ target="platform:id:type""", time="HH:MM-HH:MM", value=0.5 }
示例:
[
["", "00:00-08:59", 0.2], # 全局规则:凌晨到早上更安静
["", "09:00-22:59", 1.0], # 全局规则:白天正常
["qq:1919810:group", "20:00-23:59", 0.6], # 指定群在晚高峰降低发言
["qq:114514:private", "00:00-23:59", 0.3],# 指定私聊全时段较安静
]
匹配优先级: 先匹配指定 chat 流规则,再匹配全局规则(\"\").
时间区间支持跨夜,例如 "23:00-02:00"
"""
def _parse_stream_config_to_chat_id(self, stream_config_str: str) -> Optional[str]:
"""与 ChatStream.get_stream_id 一致地从 "platform:id:type" 生成 chat_id。"""
try:
@@ -213,6 +230,61 @@ class ChatConfig(ConfigBase):
# 3) 未命中规则返回基础值
return self.talk_value
def get_auto_chat_value(self, chat_id: Optional[str]) -> float:
"""根据规则返回当前 chat 的动态 auto_chat_value未匹配则回退到基础值。"""
if not self.auto_chat_value_rules:
return self.auto_chat_value
now_min = self._now_minutes()
# 1) 先尝试匹配指定 chat 的规则
if chat_id:
for rule in self.auto_chat_value_rules:
if not isinstance(rule, dict):
continue
target = rule.get("target", "")
time_range = rule.get("time", "")
value = rule.get("value", None)
if not isinstance(time_range, str):
continue
# 跳过全局
if target == "":
continue
config_chat_id = self._parse_stream_config_to_chat_id(str(target))
if config_chat_id is None or config_chat_id != chat_id:
continue
parsed = self._parse_range(time_range)
if not parsed:
continue
start_min, end_min = parsed
if self._in_range(now_min, start_min, end_min):
try:
return float(value)
except Exception:
continue
# 2) 再匹配全局规则("")
for rule in self.auto_chat_value_rules:
if not isinstance(rule, dict):
continue
target = rule.get("target", None)
time_range = rule.get("time", "")
value = rule.get("value", None)
if target != "" or not isinstance(time_range, str):
continue
parsed = self._parse_range(time_range)
if not parsed:
continue
start_min, end_min = parsed
if self._in_range(now_min, start_min, end_min):
try:
return float(value)
except Exception:
continue
# 3) 未命中规则返回基础值
return self.auto_chat_value
@dataclass
class MessageReceiveConfig(ConfigBase):
@@ -231,8 +303,8 @@ class MemoryConfig(ConfigBase):
max_memory_number: int = 100
"""记忆最大数量"""
max_memory_size: int = 2048
"""记忆最大大小"""
memory_build_frequency: int = 1
"""记忆构建频率"""
@dataclass
class ExpressionConfig(ConfigBase):

View File

@@ -37,8 +37,6 @@ class MemoryChest:
request_type="memory_chest_build",
)
self.memory_build_threshold = 20
self.memory_size_limit = global_config.memory.max_memory_size
self.running_content_list = {} # {chat_id: {"content": running_content, "last_update_time": timestamp, "create_time": timestamp}}
self.fetched_memory_list = [] # [(chat_id, (question, answer, timestamp)), ...]
@@ -54,7 +52,19 @@ class MemoryChest:
Returns:
str: 构建后的运行内容
"""
# 检查是否需要更新:上次更新时间和现在时间的消息数量大于30
# 检查是否需要更新:基于消息数量和最新消息时间差的智能更新机制
#
# 更新机制说明:
# 1. 消息数量 > 100直接触发更新高频消息场景
# 2. 消息数量 > 70 且最新消息时间差 > 30秒触发更新中高频消息场景
# 3. 消息数量 > 50 且最新消息时间差 > 60秒触发更新中频消息场景
# 4. 消息数量 > 30 且最新消息时间差 > 300秒触发更新低频消息场景
#
# 设计理念:
# - 消息越密集,时间阈值越短,确保及时更新记忆
# - 消息越稀疏,时间阈值越长,避免频繁无意义的更新
# - 通过最新消息时间差判断消息活跃度,而非简单的总时间差
# - 平衡更新频率与性能,在保证记忆及时性的同时减少计算开销
if chat_id not in self.running_content_list:
self.running_content_list[chat_id] = {
"content": "",
@@ -75,16 +85,51 @@ class MemoryChest:
)
new_messages_count = len(message_list)
time_diff_minutes = (current_time - last_update_time) / 60
# 检查是否满足强制构建条件超过15分钟且至少有5条新消息
forced_update = time_diff_minutes > 15 and new_messages_count >= 5
should_update = new_messages_count > self.memory_build_threshold or forced_update
if forced_update:
logger.debug(f"chat_id {chat_id} 距离上次更新已 {time_diff_minutes:.1f} 分钟,有 {new_messages_count} 条新消息,强制构建")
else:
logger.debug(f"chat_id {chat_id} 自上次更新后有 {new_messages_count} 条新消息,{'需要' if should_update else '不需要'}更新")
# 获取最新消息的时间戳
latest_message_time = last_update_time
if message_list:
# 假设消息列表按时间排序,取最后一条消息的时间戳
latest_message = message_list[-1]
if hasattr(latest_message, 'timestamp'):
latest_message_time = latest_message.timestamp
elif isinstance(latest_message, dict) and 'timestamp' in latest_message:
latest_message_time = latest_message['timestamp']
# 计算最新消息时间与现在时间的差(秒)
latest_message_time_diff = current_time - latest_message_time
# 智能更新条件判断 - 按优先级从高到低检查
should_update = False
update_reason = ""
if global_config.memory.memory_build_frequency > 0:
if new_messages_count > 100/global_config.memory.memory_build_frequency:
# 条件1消息数量 > 100直接触发更新
# 适用场景:群聊刷屏、高频讨论等消息密集场景
# 无需时间限制,确保重要信息不被遗漏
should_update = True
update_reason = f"消息数量 {new_messages_count} > 100直接触发更新"
elif new_messages_count > 70/global_config.memory.memory_build_frequency and latest_message_time_diff > 30:
# 条件2消息数量 > 70 且最新消息时间差 > 30秒
# 适用场景:中高频讨论,但需要确保消息流已稳定
# 30秒的时间差确保不是正在进行的实时对话
should_update = True
update_reason = f"消息数量 {new_messages_count} > 70 且最新消息时间差 {latest_message_time_diff:.1f}s > 30s"
elif new_messages_count > 50/global_config.memory.memory_build_frequency and latest_message_time_diff > 60:
# 条件3消息数量 > 50 且最新消息时间差 > 60秒
# 适用场景中等频率讨论等待1分钟确保对话告一段落
# 平衡及时性与稳定性
should_update = True
update_reason = f"消息数量 {new_messages_count} > 50 且最新消息时间差 {latest_message_time_diff:.1f}s > 60s"
elif new_messages_count > 30/global_config.memory.memory_build_frequency and latest_message_time_diff > 300:
# 条件4消息数量 > 30 且最新消息时间差 > 300秒5分钟
# 适用场景:低频但有一定信息量的讨论
# 5分钟的时间差确保对话完全结束避免频繁更新
should_update = True
update_reason = f"消息数量 {new_messages_count} > 30 且最新消息时间差 {latest_message_time_diff:.1f}s > 300s"
logger.debug(f"chat_id {chat_id} 更新检查: {update_reason if should_update else f'消息数量 {new_messages_count},最新消息时间差 {latest_message_time_diff:.1f}s不满足更新条件'}")
if should_update:
@@ -98,11 +143,6 @@ class MemoryChest:
remove_emoji_stickers=True,
)
current_running_content = ""
if chat_id and chat_id in self.running_content_list:
current_running_content = self.running_content_list[chat_id]["content"]
# 随机从格式示例列表中选取若干行用于提示
format_candidates = [
"[概念] 是 [概念的含义(简短描述,不超过十个字)]",
@@ -129,18 +169,13 @@ class MemoryChest:
format_section = "\n".join(selected_lines) + "\n......(不要包含中括号)"
prompt = f"""
以下是你的记忆内容和新的聊天记录,请你将他们整合和修改
记忆内容:
<memory_content>
{current_running_content}
</memory_content>
以下是一段你参与的聊天记录,请你在其中总结出记忆
<聊天记录>
{message_str}
</聊天记录>
聊天记录中可能包含有效信息,也可能信息密度很低,请你根据聊天记录中的信息,修改<part1>中的内容与<part2>中的内容
聊天记录中可能包含有效信息,也可能信息密度很低,请你根据聊天记录中的信息,总结出记忆内容
--------------------------------
请将上面的新聊天记录内的有用的信息进行整合到现有的记忆中
对[图片]的处理:
1.除非与文本有关,不要将[图片]的内容整合到记忆中
2.如果图片与某个概念相关,将图片中的关键内容也整合到记忆中,不要写入图片原文,例如:
@@ -178,29 +213,9 @@ class MemoryChest:
print(f"prompt: {prompt}\n记忆仓库构建运行内容: {running_content}")
# 如果有chat_id更新对应的running_content
# 直接保存:每次构建后立即入库,并刷新时间戳窗口
if chat_id and running_content:
current_time = time.time()
# 保留原有的create_time如果没有则使用当前时间
create_time = self.running_content_list[chat_id].get("create_time", current_time)
self.running_content_list[chat_id] = {
"content": running_content,
"last_update_time": current_time,
"create_time": create_time
}
# 检查running_content长度是否大于限制
if len(running_content) > self.memory_size_limit:
await self._save_to_database_and_clear(chat_id, running_content)
# 检查是否需要强制保存create_time超过1800秒且内容大小达到max_memory_size的30%
elif (current_time - create_time > 1800 and
len(running_content) >= self.memory_size_limit * 0.3):
logger.info(f"chat_id {chat_id} 内容创建时间已超过 {(current_time - create_time)/60:.1f} 分钟,"
f"内容大小 {len(running_content)} 达到限制的 {int(self.memory_size_limit * 0.3)} 字符,强制保存")
await self._save_to_database_and_clear(chat_id, running_content)
await self._save_to_database_and_clear(chat_id, running_content)
return running_content
@@ -400,10 +415,15 @@ class MemoryChest:
)
logger.info(f"已保存记忆仓库内容,标题: {title.strip()}, chat_id: {chat_id}")
# 清空对应chat_id的running_content
# 清空内容并刷新时间戳,但保留条目用于增量计算
if chat_id in self.running_content_list:
del self.running_content_list[chat_id]
logger.info(f"已清空chat_id {chat_id} 的running_content")
current_time = time.time()
self.running_content_list[chat_id] = {
"content": "",
"last_update_time": current_time,
"create_time": current_time
}
logger.info(f"已保存并刷新chat_id {chat_id} 的时间戳,准备下一次增量构建")
else:
logger.warning(f"生成标题失败chat_id: {chat_id}")