This commit is contained in:
墨梓柒
2025-11-19 23:35:14 +08:00
parent 2f58605644
commit 44f427dc64
42 changed files with 1742 additions and 2062 deletions

View File

@@ -17,6 +17,7 @@ from .query_lpmm_knowledge import register_tool as register_lpmm_knowledge
from .query_person_info import register_tool as register_query_person_info
from src.config.config import global_config
def init_all_tools():
"""初始化并注册所有记忆检索工具"""
register_query_jargon()

View File

@@ -15,13 +15,10 @@ logger = get_logger("memory_retrieval_tools")
async def query_chat_history(
chat_id: str,
keyword: Optional[str] = None,
time_range: Optional[str] = None,
fuzzy: bool = True
chat_id: str, keyword: Optional[str] = None, time_range: Optional[str] = None, fuzzy: bool = True
) -> str:
"""根据时间或关键词在chat_history表中查询聊天记录概述
Args:
chat_id: 聊天ID
keyword: 关键词(可选,支持多个关键词,可用空格、逗号等分隔)
@@ -31,7 +28,7 @@ async def query_chat_history(
fuzzy: 是否使用模糊匹配模式默认True
- True: 模糊匹配只要包含任意一个关键词即匹配OR关系
- False: 全匹配必须包含所有关键词才匹配AND关系
Returns:
str: 查询结果
"""
@@ -39,10 +36,10 @@ async def query_chat_history(
# 检查参数
if not keyword and not time_range:
return "未指定查询参数需要提供keyword或time_range之一"
# 构建查询条件
query = ChatHistory.select().where(ChatHistory.chat_id == chat_id)
# 时间过滤条件
if time_range:
# 判断是时间点还是时间范围
@@ -50,79 +47,79 @@ async def query_chat_history(
# 时间范围:查询与时间范围有交集的记录
start_timestamp, end_timestamp = parse_time_range(time_range)
# 交集条件start_time < end_timestamp AND end_time > start_timestamp
time_filter = (
(ChatHistory.start_time < end_timestamp) &
(ChatHistory.end_time > start_timestamp)
)
time_filter = (ChatHistory.start_time < end_timestamp) & (ChatHistory.end_time > start_timestamp)
else:
# 时间点查询包含该时间点的记录start_time <= time_point <= end_time
target_timestamp = parse_datetime_to_timestamp(time_range)
time_filter = (
(ChatHistory.start_time <= target_timestamp) &
(ChatHistory.end_time >= target_timestamp)
)
time_filter = (ChatHistory.start_time <= target_timestamp) & (ChatHistory.end_time >= target_timestamp)
query = query.where(time_filter)
# 执行查询
records = list(query.order_by(ChatHistory.start_time.desc()).limit(50))
# 如果有关键词,进一步过滤
if keyword:
# 解析多个关键词(支持空格、逗号等分隔符)
keywords_list = parse_keywords_string(keyword)
if not keywords_list:
keywords_list = [keyword.strip()] if keyword.strip() else []
# 转换为小写以便匹配
keywords_lower = [kw.lower() for kw in keywords_list if kw.strip()]
if not keywords_lower:
return "关键词为空"
filtered_records = []
for record in records:
# 在theme、keywords、summary、original_text中搜索
theme = (record.theme or "").lower()
summary = (record.summary or "").lower()
original_text = (record.original_text or "").lower()
# 解析record中的keywords JSON
record_keywords_list = []
if record.keywords:
try:
keywords_data = json.loads(record.keywords) if isinstance(record.keywords, str) else record.keywords
keywords_data = (
json.loads(record.keywords) if isinstance(record.keywords, str) else record.keywords
)
if isinstance(keywords_data, list):
record_keywords_list = [str(k).lower() for k in keywords_data]
except (json.JSONDecodeError, TypeError, ValueError):
pass
# 根据匹配模式检查关键词
matched = False
if fuzzy:
# 模糊匹配只要包含任意一个关键词即匹配OR关系
for kw in keywords_lower:
if (kw in theme or
kw in summary or
kw in original_text or
any(kw in k for k in record_keywords_list)):
if (
kw in theme
or kw in summary
or kw in original_text
or any(kw in k for k in record_keywords_list)
):
matched = True
break
else:
# 全匹配必须包含所有关键词才匹配AND关系
matched = True
for kw in keywords_lower:
kw_matched = (kw in theme or
kw in summary or
kw in original_text or
any(kw in k for k in record_keywords_list))
kw_matched = (
kw in theme
or kw in summary
or kw in original_text
or any(kw in k for k in record_keywords_list)
)
if not kw_matched:
matched = False
break
if matched:
filtered_records.append(record)
if not filtered_records:
keywords_str = "".join(keywords_list)
match_mode = "包含任意一个关键词" if fuzzy else "包含所有关键词"
@@ -130,9 +127,9 @@ async def query_chat_history(
return f"未找到{match_mode}'{keywords_str}'且在指定时间范围内的聊天记录概述"
else:
return f"未找到{match_mode}'{keywords_str}'的聊天记录概述"
records = filtered_records
# 如果没有记录(可能是时间范围查询但没有匹配的记录)
if not records:
if time_range:
@@ -148,22 +145,23 @@ async def query_chat_history(
record.count = (record.count or 0) + 1
except Exception as update_error:
logger.error(f"更新聊天记录概述计数失败: {update_error}")
# 构建结果文本
results = []
for record in records_to_use: # 最多返回3条记录
result_parts = []
# 添加主题
if record.theme:
result_parts.append(f"主题:{record.theme}")
# 添加时间范围
from datetime import datetime
start_str = datetime.fromtimestamp(record.start_time).strftime("%Y-%m-%d %H:%M:%S")
end_str = datetime.fromtimestamp(record.end_time).strftime("%Y-%m-%d %H:%M:%S")
result_parts.append(f"时间:{start_str} - {end_str}")
# 添加概括优先使用summary如果没有则使用original_text的前200字符
if record.summary:
result_parts.append(f"概括:{record.summary}")
@@ -172,18 +170,18 @@ async def query_chat_history(
if len(record.original_text) > 200:
text_preview += "..."
result_parts.append(f"内容:{text_preview}")
results.append("\n".join(result_parts))
if not results:
return "未找到相关聊天记录概述"
response_text = "\n\n---\n\n".join(results)
if len(records) > len(records_to_use):
omitted_count = len(records) - len(records_to_use)
response_text += f"\n\n(还有{omitted_count}条历史记录已省略)"
return response_text
except Exception as e:
logger.error(f"查询聊天历史概述失败: {e}")
return f"查询失败: {str(e)}"
@@ -199,20 +197,20 @@ def register_tool():
"name": "keyword",
"type": "string",
"description": "关键词(可选,支持多个关键词,可用空格、逗号、斜杠等分隔,如:'麦麦 百度网盘''麦麦,百度网盘'。用于在主题、关键词、概括、原文中搜索)",
"required": False
"required": False,
},
{
"name": "time_range",
"type": "string",
"description": "时间范围或时间点(可选)。格式:'YYYY-MM-DD HH:MM:SS - YYYY-MM-DD HH:MM:SS'(时间范围,查询与时间范围有交集的记录)或 'YYYY-MM-DD HH:MM:SS'(时间点,查询包含该时间点的记录)",
"required": False
"required": False,
},
{
"name": "fuzzy",
"type": "boolean",
"description": "是否使用模糊匹配模式默认True。True表示模糊匹配只要包含任意一个关键词即匹配OR关系False表示全匹配必须包含所有关键词才匹配AND关系",
"required": False
}
"required": False,
},
],
execute_func=query_chat_history
execute_func=query_chat_history,
)

View File

@@ -73,5 +73,3 @@ def register_tool():
],
execute_func=query_lpmm_knowledge,
)

View File

@@ -14,23 +14,25 @@ logger = get_logger("memory_retrieval_tools")
def _format_group_nick_names(group_nick_name_field) -> str:
"""格式化群昵称信息
Args:
group_nick_name_field: 群昵称字段可能是字符串JSON或None
Returns:
str: 格式化后的群昵称信息字符串
"""
if not group_nick_name_field:
return ""
try:
# 解析JSON格式的群昵称列表
group_nick_names_data = json.loads(group_nick_name_field) if isinstance(group_nick_name_field, str) else group_nick_name_field
group_nick_names_data = (
json.loads(group_nick_name_field) if isinstance(group_nick_name_field, str) else group_nick_name_field
)
if not isinstance(group_nick_names_data, list) or not group_nick_names_data:
return ""
# 格式化群昵称列表
group_nick_list = []
for item in group_nick_names_data:
@@ -41,7 +43,7 @@ def _format_group_nick_names(group_nick_name_field) -> str:
elif isinstance(item, str):
# 兼容旧格式(如果存在)
group_nick_list.append(f" - {item}")
if group_nick_list:
return "群昵称:\n" + "\n".join(group_nick_list)
return ""
@@ -58,10 +60,10 @@ def _format_group_nick_names(group_nick_name_field) -> str:
async def query_person_info(person_name: str) -> str:
"""根据person_name查询用户信息使用模糊查询
Args:
person_name: 用户名称person_name字段
Returns:
str: 查询结果,包含用户的所有信息
"""
@@ -69,37 +71,35 @@ async def query_person_info(person_name: str) -> str:
person_name = str(person_name).strip()
if not person_name:
return "用户名称为空"
# 构建查询条件(使用模糊查询)
query = PersonInfo.select().where(
PersonInfo.person_name.contains(person_name)
)
query = PersonInfo.select().where(PersonInfo.person_name.contains(person_name))
# 执行查询
records = list(query.limit(20)) # 最多返回20条记录
if not records:
return f"未找到模糊匹配'{person_name}'的用户信息"
# 区分精确匹配和模糊匹配的结果
exact_matches = []
fuzzy_matches = []
for record in records:
# 检查是否是精确匹配
if record.person_name and record.person_name.strip() == person_name:
exact_matches.append(record)
else:
fuzzy_matches.append(record)
# 构建结果文本
results = []
# 先处理精确匹配的结果
for record in exact_matches:
result_parts = []
result_parts.append("【精确匹配】") # 标注为精确匹配
# 基本信息
if record.person_name:
result_parts.append(f"用户名称:{record.person_name}")
@@ -111,19 +111,19 @@ async def query_person_info(person_name: str) -> str:
result_parts.append(f"平台:{record.platform}")
if record.user_id:
result_parts.append(f"平台用户ID{record.user_id}")
# 群昵称信息
group_nick_name_str = _format_group_nick_names(getattr(record, "group_nick_name", None))
if group_nick_name_str:
result_parts.append(group_nick_name_str)
# 名称设定原因
if record.name_reason:
result_parts.append(f"名称设定原因:{record.name_reason}")
# 认识状态
result_parts.append(f"是否已认识:{'' if record.is_known else ''}")
# 时间信息
if record.know_since:
know_since_str = datetime.fromtimestamp(record.know_since).strftime("%Y-%m-%d %H:%M:%S")
@@ -133,11 +133,15 @@ async def query_person_info(person_name: str) -> str:
result_parts.append(f"最后认识时间:{last_know_str}")
if record.know_times:
result_parts.append(f"认识次数:{int(record.know_times)}")
# 记忆点memory_points
if record.memory_points:
try:
memory_points_data = json.loads(record.memory_points) if isinstance(record.memory_points, str) else record.memory_points
memory_points_data = (
json.loads(record.memory_points)
if isinstance(record.memory_points, str)
else record.memory_points
)
if isinstance(memory_points_data, list) and memory_points_data:
# 解析记忆点格式category:content:weight
memory_list = []
@@ -151,7 +155,7 @@ async def query_person_info(person_name: str) -> str:
memory_list.append(f" - [{category}] {content} (权重: {weight})")
else:
memory_list.append(f" - {memory_point}")
if memory_list:
result_parts.append("记忆点:\n" + "\n".join(memory_list))
except (json.JSONDecodeError, TypeError, ValueError) as e:
@@ -161,14 +165,14 @@ async def query_person_info(person_name: str) -> str:
if len(str(record.memory_points)) > 200:
memory_preview += "..."
result_parts.append(f"记忆点(原始数据):{memory_preview}")
results.append("\n".join(result_parts))
# 再处理模糊匹配的结果
for record in fuzzy_matches:
result_parts = []
result_parts.append("【模糊匹配】") # 标注为模糊匹配
# 基本信息
if record.person_name:
result_parts.append(f"用户名称:{record.person_name}")
@@ -180,19 +184,19 @@ async def query_person_info(person_name: str) -> str:
result_parts.append(f"平台:{record.platform}")
if record.user_id:
result_parts.append(f"平台用户ID{record.user_id}")
# 群昵称信息
group_nick_name_str = _format_group_nick_names(getattr(record, "group_nick_name", None))
if group_nick_name_str:
result_parts.append(group_nick_name_str)
# 名称设定原因
if record.name_reason:
result_parts.append(f"名称设定原因:{record.name_reason}")
# 认识状态
result_parts.append(f"是否已认识:{'' if record.is_known else ''}")
# 时间信息
if record.know_since:
know_since_str = datetime.fromtimestamp(record.know_since).strftime("%Y-%m-%d %H:%M:%S")
@@ -202,11 +206,15 @@ async def query_person_info(person_name: str) -> str:
result_parts.append(f"最后认识时间:{last_know_str}")
if record.know_times:
result_parts.append(f"认识次数:{int(record.know_times)}")
# 记忆点memory_points
if record.memory_points:
try:
memory_points_data = json.loads(record.memory_points) if isinstance(record.memory_points, str) else record.memory_points
memory_points_data = (
json.loads(record.memory_points)
if isinstance(record.memory_points, str)
else record.memory_points
)
if isinstance(memory_points_data, list) and memory_points_data:
# 解析记忆点格式category:content:weight
memory_list = []
@@ -220,7 +228,7 @@ async def query_person_info(person_name: str) -> str:
memory_list.append(f" - [{category}] {content} (权重: {weight})")
else:
memory_list.append(f" - {memory_point}")
if memory_list:
result_parts.append("记忆点:\n" + "\n".join(memory_list))
except (json.JSONDecodeError, TypeError, ValueError) as e:
@@ -230,20 +238,20 @@ async def query_person_info(person_name: str) -> str:
if len(str(record.memory_points)) > 200:
memory_preview += "..."
result_parts.append(f"记忆点(原始数据):{memory_preview}")
results.append("\n".join(result_parts))
# 组合所有结果
if not results:
return f"未找到匹配'{person_name}'的用户信息"
response_text = "\n\n---\n\n".join(results)
# 添加统计信息
total_count = len(records)
exact_count = len(exact_matches)
fuzzy_count = len(fuzzy_matches)
# 显示精确匹配和模糊匹配的统计
if exact_count > 0 or fuzzy_count > 0:
stats_parts = []
@@ -257,13 +265,13 @@ async def query_person_info(person_name: str) -> str:
response_text = f"找到 {total_count} 条匹配的用户信息:\n\n{response_text}"
else:
response_text = f"找到用户信息:\n\n{response_text}"
# 如果结果数量达到限制,添加提示
if total_count >= 20:
response_text += "\n\n(已显示前20条结果可能还有更多匹配记录)"
return response_text
except Exception as e:
logger.error(f"查询用户信息失败: {e}")
return f"查询失败: {str(e)}"
@@ -275,13 +283,7 @@ def register_tool():
name="query_person_info",
description="根据查询某个用户的所有信息。名称、昵称、平台、用户ID、qq号、群昵称等",
parameters=[
{
"name": "person_name",
"type": "string",
"description": "用户名称,用于查询用户信息",
"required": True
}
{"name": "person_name", "type": "string", "description": "用户名称,用于查询用户信息", "required": True}
],
execute_func=query_person_info
execute_func=query_person_info,
)

View File

@@ -47,10 +47,10 @@ class MemoryRetrievalTool:
async def execute(self, **kwargs) -> str:
"""执行工具"""
return await self.execute_func(**kwargs)
def get_tool_definition(self) -> Dict[str, Any]:
"""获取工具定义用于LLM function calling
Returns:
Dict[str, Any]: 工具定义字典格式与BaseTool一致
格式: {"name": str, "description": str, "parameters": List[Tuple]}
@@ -58,14 +58,14 @@ class MemoryRetrievalTool:
# 转换参数格式为元组列表格式与BaseTool一致
# 格式: [("param_name", ToolParamType, "description", required, enum_values)]
param_tuples = []
for param in self.parameters:
param_name = param.get("name", "")
param_type_str = param.get("type", "string").lower()
param_desc = param.get("description", "")
is_required = param.get("required", False)
enum_values = param.get("enum", None)
# 转换类型字符串到ToolParamType
type_mapping = {
"string": ToolParamType.STRING,
@@ -76,18 +76,14 @@ class MemoryRetrievalTool:
"bool": ToolParamType.BOOLEAN,
}
param_type = type_mapping.get(param_type_str, ToolParamType.STRING)
# 构建参数元组
param_tuple = (param_name, param_type, param_desc, is_required, enum_values)
param_tuples.append(param_tuple)
# 构建工具定义格式与BaseTool.get_tool_definition()一致
tool_def = {
"name": self.name,
"description": self.description,
"parameters": param_tuples
}
tool_def = {"name": self.name, "description": self.description, "parameters": param_tuples}
return tool_def
@@ -126,10 +122,10 @@ class MemoryRetrievalToolRegistry:
action_types.append("final_answer")
action_types.append("no_answer")
return "".join([f'"{at}"' for at in action_types])
def get_tool_definitions(self) -> List[Dict[str, Any]]:
"""获取所有工具的定义列表用于LLM function calling
Returns:
List[Dict[str, Any]]: 工具定义列表,每个元素是一个工具定义字典
"""