This commit is contained in:
春河晴
2025-06-10 16:13:31 +09:00
parent 440e8bf7f3
commit 8d9a88a903
70 changed files with 1598 additions and 1642 deletions

View File

@@ -11,7 +11,6 @@ from datetime import datetime
from typing import Dict
from src.llm_models.utils_model import LLMRequest
from src.config.config import global_config
import asyncio
logger = get_logger("processor")

View File

@@ -22,7 +22,7 @@ from src.chat.utils.chat_message_builder import get_raw_msg_by_timestamp_with_ch
# 配置常量:是否启用小模型即时信息提取
# 开启时:使用小模型并行即时提取,速度更快,但精度可能略低
# 关闭时:使用原来的异步模式,精度更高但速度较慢
ENABLE_INSTANT_INFO_EXTRACTION = True
ENABLE_INSTANT_INFO_EXTRACTION = True
logger = get_logger("processor")
@@ -63,7 +63,7 @@ def init_prompt():
"""
Prompt(relationship_prompt, "relationship_prompt")
fetch_info_prompt = """
{name_block}
@@ -84,7 +84,6 @@ def init_prompt():
Prompt(fetch_info_prompt, "fetch_info_prompt")
class RelationshipProcessor(BaseProcessor):
log_prefix = "关系"
@@ -92,8 +91,10 @@ class RelationshipProcessor(BaseProcessor):
super().__init__()
self.subheartflow_id = subheartflow_id
self.info_fetching_cache: List[Dict[str, any]] = []
self.info_fetched_cache: Dict[str, Dict[str, any]] = {} # {person_id: {"info": str, "ttl": int, "start_time": float}}
self.info_fetching_cache: List[Dict[str, any]] = []
self.info_fetched_cache: Dict[
str, Dict[str, any]
] = {} # {person_id: {"info": str, "ttl": int, "start_time": float}}
self.person_engaged_cache: List[Dict[str, any]] = [] # [{person_id: str, start_time: float, rounds: int}]
self.grace_period_rounds = 5
@@ -101,7 +102,7 @@ class RelationshipProcessor(BaseProcessor):
model=global_config.model.relation,
request_type="focus.relationship",
)
# 小模型用于即时信息提取
if ENABLE_INSTANT_INFO_EXTRACTION:
self.instant_llm_model = LLMRequest(
@@ -156,26 +157,27 @@ class RelationshipProcessor(BaseProcessor):
for record in list(self.person_engaged_cache):
record["rounds"] += 1
time_elapsed = current_time - record["start_time"]
message_count = len(get_raw_msg_by_timestamp_with_chat(self.subheartflow_id, record["start_time"], current_time))
message_count = len(
get_raw_msg_by_timestamp_with_chat(self.subheartflow_id, record["start_time"], current_time)
)
print(record)
# 根据消息数量和时间设置不同的触发条件
should_trigger = (
message_count >= 50 or # 50条消息必定满足
(message_count >= 35 and time_elapsed >= 300) or # 35条且10分钟
(message_count >= 25 and time_elapsed >= 900) or # 25条且30分钟
(message_count >= 10 and time_elapsed >= 2000) # 10条且1小时
message_count >= 50 # 50条消息必定满足
or (message_count >= 35 and time_elapsed >= 300) # 35条且10分钟
or (message_count >= 25 and time_elapsed >= 900) # 25条且30分钟
or (message_count >= 10 and time_elapsed >= 2000) # 10条且1小时
)
if should_trigger:
logger.info(f"{self.log_prefix} 用户 {record['person_id']} 满足关系构建条件,开始构建关系。消息数:{message_count},时长:{time_elapsed:.0f}")
logger.info(
f"{self.log_prefix} 用户 {record['person_id']} 满足关系构建条件,开始构建关系。消息数:{message_count},时长:{time_elapsed:.0f}"
)
asyncio.create_task(
self.update_impression_on_cache_expiry(
record["person_id"],
self.subheartflow_id,
record["start_time"],
current_time
record["person_id"], self.subheartflow_id, record["start_time"], current_time
)
)
self.person_engaged_cache.remove(record)
@@ -187,20 +189,24 @@ class RelationshipProcessor(BaseProcessor):
if self.info_fetched_cache[person_id][info_type]["ttl"] <= 0:
# 在删除前查找匹配的info_fetching_cache记录
matched_record = None
min_time_diff = float('inf')
min_time_diff = float("inf")
for record in self.info_fetching_cache:
if (record["person_id"] == person_id and
record["info_type"] == info_type and
not record["forget"]):
time_diff = abs(record["start_time"] - self.info_fetched_cache[person_id][info_type]["start_time"])
if (
record["person_id"] == person_id
and record["info_type"] == info_type
and not record["forget"]
):
time_diff = abs(
record["start_time"] - self.info_fetched_cache[person_id][info_type]["start_time"]
)
if time_diff < min_time_diff:
min_time_diff = time_diff
matched_record = record
if matched_record:
matched_record["forget"] = True
logger.info(f"{self.log_prefix} 用户 {person_id}{info_type} 信息已过期,标记为遗忘。")
del self.info_fetched_cache[person_id][info_type]
if not self.info_fetched_cache[person_id]:
del self.info_fetched_cache[person_id]
@@ -208,7 +214,7 @@ class RelationshipProcessor(BaseProcessor):
# 5. 为需要处理的人员准备LLM prompt
nickname_str = ",".join(global_config.bot.alias_names)
name_block = f"你的名字是{global_config.bot.nickname},你的昵称有{nickname_str},有人也会用这些昵称称呼你。"
info_cache_block = ""
if self.info_fetching_cache:
for info_fetching in self.info_fetching_cache:
@@ -223,7 +229,7 @@ class RelationshipProcessor(BaseProcessor):
chat_observe_info=chat_observe_info,
info_cache_block=info_cache_block,
)
try:
logger.debug(f"{self.log_prefix} 人物信息prompt: \n{prompt}\n")
content, _ = await self.llm_model.generate_response_async(prompt=prompt)
@@ -234,45 +240,47 @@ class RelationshipProcessor(BaseProcessor):
# 收集即时提取任务
instant_tasks = []
async_tasks = []
for person_name, info_type in content_json.items():
person_id = person_info_manager.get_person_id_by_person_name(person_name)
if person_id:
self.info_fetching_cache.append({
"person_id": person_id,
"person_name": person_name,
"info_type": info_type,
"start_time": time.time(),
"forget": False,
})
self.info_fetching_cache.append(
{
"person_id": person_id,
"person_name": person_name,
"info_type": info_type,
"start_time": time.time(),
"forget": False,
}
)
if len(self.info_fetching_cache) > 20:
self.info_fetching_cache.pop(0)
else:
logger.warning(f"{self.log_prefix} 未找到用户 {person_name} 的ID跳过调取信息。")
continue
logger.info(f"{self.log_prefix} 调取用户 {person_name}{info_type} 信息。")
# 检查person_engaged_cache中是否已存在该person_id
person_exists = any(record["person_id"] == person_id for record in self.person_engaged_cache)
if not person_exists:
self.person_engaged_cache.append({
"person_id": person_id,
"start_time": time.time(),
"rounds": 0
})
self.person_engaged_cache.append(
{"person_id": person_id, "start_time": time.time(), "rounds": 0}
)
if ENABLE_INSTANT_INFO_EXTRACTION:
# 收集即时提取任务
instant_tasks.append((person_id, info_type, time.time()))
else:
# 使用原来的异步模式
async_tasks.append(asyncio.create_task(self.fetch_person_info(person_id, [info_type], start_time=time.time())))
async_tasks.append(
asyncio.create_task(self.fetch_person_info(person_id, [info_type], start_time=time.time()))
)
# 执行即时提取任务
if ENABLE_INSTANT_INFO_EXTRACTION and instant_tasks:
await self._execute_instant_extraction_batch(instant_tasks)
# 启动异步任务(如果不是即时模式)
if async_tasks:
# 异步任务不需要等待完成
@@ -300,7 +308,7 @@ class RelationshipProcessor(BaseProcessor):
person_infos_str += f"你不了解{person_name}有关[{info_type}]的信息,不要胡乱回答,你可以直接说你不知道,或者你忘记了;"
if person_infos_str:
persons_infos_str += f"你对 {person_name} 的了解:{person_infos_str}\n"
# 处理正在调取但还没有结果的项目(只在非即时提取模式下显示)
if not ENABLE_INSTANT_INFO_EXTRACTION:
pending_info_dict = {}
@@ -312,50 +320,47 @@ class RelationshipProcessor(BaseProcessor):
person_id = record["person_id"]
person_name = record["person_name"]
info_type = record["info_type"]
# 检查是否已经在info_fetched_cache中有结果
if (person_id in self.info_fetched_cache and
info_type in self.info_fetched_cache[person_id]):
if person_id in self.info_fetched_cache and info_type in self.info_fetched_cache[person_id]:
continue
# 按人物组织正在调取的信息
if person_name not in pending_info_dict:
pending_info_dict[person_name] = []
pending_info_dict[person_name].append(info_type)
# 添加正在调取的信息到返回字符串
for person_name, info_types in pending_info_dict.items():
info_types_str = "".join(info_types)
persons_infos_str += f"你正在识图回忆有关 {person_name}{info_types_str} 信息,稍等一下再回答...\n"
return persons_infos_str
async def _execute_instant_extraction_batch(self, instant_tasks: list):
"""
批量执行即时提取任务
"""
if not instant_tasks:
return
logger.info(f"{self.log_prefix} [即时提取] 开始批量提取 {len(instant_tasks)} 个信息")
# 创建所有提取任务
extraction_tasks = []
for person_id, info_type, start_time in instant_tasks:
# 检查缓存中是否已存在且未过期的信息
if (person_id in self.info_fetched_cache and
info_type in self.info_fetched_cache[person_id]):
if person_id in self.info_fetched_cache and info_type in self.info_fetched_cache[person_id]:
logger.info(f"{self.log_prefix} 用户 {person_id}{info_type} 信息已存在且未过期,跳过调取。")
continue
task = asyncio.create_task(self._fetch_single_info_instant(person_id, info_type, start_time))
extraction_tasks.append(task)
# 并行执行所有提取任务并等待完成
if extraction_tasks:
await asyncio.gather(*extraction_tasks, return_exceptions=True)
logger.info(f"{self.log_prefix} [即时提取] 批量提取完成")
async def _fetch_single_info_instant(self, person_id: str, info_type: str, start_time: float):
"""
@@ -363,24 +368,21 @@ class RelationshipProcessor(BaseProcessor):
"""
nickname_str = ",".join(global_config.bot.alias_names)
name_block = f"你的名字是{global_config.bot.nickname},你的昵称有{nickname_str},有人也会用这些昵称称呼你。"
person_name = await person_info_manager.get_value(person_id, "person_name")
person_impression = await person_info_manager.get_value(person_id, "impression")
if not person_impression:
impression_block = "你对ta没有什么深刻的印象"
else:
impression_block = f"{person_impression}"
points = await person_info_manager.get_value(person_id, "points")
if points:
points_text = "\n".join([
f"{point[2]}:{point[0]}"
for point in points
])
points_text = "\n".join([f"{point[2]}:{point[0]}" for point in points])
else:
points_text = "你不记得ta最近发生了什么"
prompt = (await global_prompt_manager.get_prompt_async("fetch_info_prompt")).format(
name_block=name_block,
info_type=info_type,
@@ -393,9 +395,9 @@ class RelationshipProcessor(BaseProcessor):
try:
# 使用小模型进行即时提取
content, _ = await self.instant_llm_model.generate_response_async(prompt=prompt)
logger.info(f"{self.log_prefix} [即时提取] {person_name}{info_type} 结果: {content}")
if content:
content_json = json.loads(repair_json(content))
if info_type in content_json:
@@ -410,7 +412,9 @@ class RelationshipProcessor(BaseProcessor):
"person_name": person_name,
"unknow": False,
}
logger.info(f"{self.log_prefix} [即时提取] 成功获取 {person_name}{info_type}: {info_content}")
logger.info(
f"{self.log_prefix} [即时提取] 成功获取 {person_name}{info_type}: {info_content}"
)
else:
if person_id not in self.info_fetched_cache:
self.info_fetched_cache[person_id] = {}
@@ -423,59 +427,55 @@ class RelationshipProcessor(BaseProcessor):
}
logger.info(f"{self.log_prefix} [即时提取] {person_name}{info_type} 信息不明确")
else:
logger.warning(f"{self.log_prefix} [即时提取] 小模型返回空结果,获取 {person_name}{info_type} 信息失败。")
logger.warning(
f"{self.log_prefix} [即时提取] 小模型返回空结果,获取 {person_name}{info_type} 信息失败。"
)
except Exception as e:
logger.error(f"{self.log_prefix} [即时提取] 执行小模型请求获取用户信息时出错: {e}")
logger.error(traceback.format_exc())
async def fetch_person_info(self, person_id: str, info_types: list[str], start_time: float):
"""
获取某个人的信息
"""
# 检查缓存中是否已存在且未过期的信息
info_types_to_fetch = []
for info_type in info_types:
if (person_id in self.info_fetched_cache and
info_type in self.info_fetched_cache[person_id]):
if person_id in self.info_fetched_cache and info_type in self.info_fetched_cache[person_id]:
logger.info(f"{self.log_prefix} 用户 {person_id}{info_type} 信息已存在且未过期,跳过调取。")
continue
info_types_to_fetch.append(info_type)
if not info_types_to_fetch:
return
nickname_str = ",".join(global_config.bot.alias_names)
name_block = f"你的名字是{global_config.bot.nickname},你的昵称有{nickname_str},有人也会用这些昵称称呼你。"
person_name = await person_info_manager.get_value(person_id, "person_name")
info_type_str = ""
info_json_str = ""
for info_type in info_types_to_fetch:
info_type_str += f"{info_type},"
info_json_str += f"\"{info_type}\": \"信息内容\","
info_json_str += f'"{info_type}": "信息内容",'
info_type_str = info_type_str[:-1]
info_json_str = info_json_str[:-1]
person_impression = await person_info_manager.get_value(person_id, "impression")
if not person_impression:
impression_block = "你对ta没有什么深刻的印象"
else:
impression_block = f"{person_impression}"
points = await person_info_manager.get_value(person_id, "points")
if points:
points_text = "\n".join([
f"{point[2]}:{point[0]}"
for point in points
])
points_text = "\n".join([f"{point[2]}:{point[0]}" for point in points])
else:
points_text = "你不记得ta最近发生了什么"
prompt = (await global_prompt_manager.get_prompt_async("fetch_info_prompt")).format(
name_block=name_block,
info_type=info_type_str,
@@ -487,10 +487,10 @@ class RelationshipProcessor(BaseProcessor):
try:
content, _ = await self.llm_model.generate_response_async(prompt=prompt)
# logger.info(f"{self.log_prefix} fetch_person_info prompt: \n{prompt}\n")
logger.info(f"{self.log_prefix} fetch_person_info 结果: {content}")
if content:
try:
content_json = json.loads(repair_json(content))
@@ -508,9 +508,9 @@ class RelationshipProcessor(BaseProcessor):
else:
if person_id not in self.info_fetched_cache:
self.info_fetched_cache[person_id] = {}
self.info_fetched_cache[person_id][info_type] = {
"info":"unknow",
"info": "unknow",
"ttl": 10,
"start_time": start_time,
"person_name": person_name,
@@ -525,16 +525,12 @@ class RelationshipProcessor(BaseProcessor):
logger.error(f"{self.log_prefix} 执行LLM请求获取用户信息时出错: {e}")
logger.error(traceback.format_exc())
async def update_impression_on_cache_expiry(
self, person_id: str, chat_id: str, start_time: float, end_time: float
):
async def update_impression_on_cache_expiry(self, person_id: str, chat_id: str, start_time: float, end_time: float):
"""
在缓存过期时,获取聊天记录并更新用户印象
"""
logger.info(f"缓存过期,开始为 {person_id} 更新印象。时间范围:{start_time} -> {end_time}")
try:
impression_messages = get_raw_msg_by_timestamp_with_chat(chat_id, start_time, end_time)
if impression_messages:
logger.info(f"{person_id} 获取到 {len(impression_messages)} 条消息用于印象更新。")

View File

@@ -122,9 +122,7 @@ class SelfProcessor(BaseProcessor):
)
# 获取聊天内容
chat_observe_info = observation.get_observe_info()
person_list = observation.person_list
if isinstance(observation, HFCloopObservation):
# hfcloop_observe_info = observation.get_observe_info()
pass
nickname_str = ""
@@ -133,9 +131,7 @@ class SelfProcessor(BaseProcessor):
name_block = f"你的名字是{global_config.bot.nickname},你的昵称有{nickname_str},有人也会用这些昵称称呼你。"
personality_block = individuality.get_personality_prompt(x_person=2, level=2)
identity_block = individuality.get_identity_prompt(x_person=2, level=2)
prompt = (await global_prompt_manager.get_prompt_async("indentify_prompt")).format(

View File

@@ -118,7 +118,7 @@ class ToolProcessor(BaseProcessor):
is_group_chat = observation.is_group_chat
chat_observe_info = observation.get_observe_info()
person_list = observation.person_list
# person_list = observation.person_list
memory_str = ""
if running_memorys:
@@ -141,9 +141,7 @@ class ToolProcessor(BaseProcessor):
# 调用LLM专注于工具使用
# logger.info(f"开始执行工具调用{prompt}")
response, other_info = await self.llm_model.generate_response_async(
prompt=prompt, tools=tools
)
response, other_info = await self.llm_model.generate_response_async(prompt=prompt, tools=tools)
if len(other_info) == 3:
reasoning_content, model_name, tool_calls = other_info

View File

@@ -118,9 +118,7 @@ class WorkingMemoryProcessor(BaseProcessor):
memory_str=memory_choose_str,
)
# print(f"prompt: {prompt}")
# 调用LLM处理记忆
content = ""