feat:lpmm可选接入memory agent,将memory agent改为标准工具格式,修改llm_utils以兼容

This commit is contained in:
SengokuCola
2025-11-13 18:55:37 +08:00
parent e52a81e90b
commit f2819be5e9
18 changed files with 868 additions and 432 deletions

View File

@@ -166,6 +166,57 @@ class LLMRequest:
time_cost=time.time() - start_time,
)
return content or "", (reasoning_content, model_info.name, tool_calls)
async def generate_response_with_message_async(
self,
message_factory: Callable[[BaseClient], List[Message]],
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
tools: Optional[List[Dict[str, Any]]] = None,
raise_when_empty: bool = True,
) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
"""
异步生成响应
Args:
message_factory (Callable[[BaseClient], List[Message]]): 已构建好的消息工厂
temperature (float, optional): 温度参数
max_tokens (int, optional): 最大token数
tools (Optional[List[Dict[str, Any]]]): 工具列表
raise_when_empty (bool): 当响应为空时是否抛出异常
Returns:
(Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
"""
start_time = time.time()
tool_built = self._build_tool_options(tools)
response, model_info = await self._execute_request(
request_type=RequestType.RESPONSE,
message_factory=message_factory,
temperature=temperature,
max_tokens=max_tokens,
tool_options=tool_built,
)
logger.debug(f"LLM请求总耗时: {time.time() - start_time}")
logger.debug(f"LLM生成内容: {response}")
content = response.content
reasoning_content = response.reasoning_content or ""
tool_calls = response.tool_calls
if not reasoning_content and content:
content, extracted_reasoning = self._extract_reasoning(content)
reasoning_content = extracted_reasoning
if usage := response.usage:
llm_usage_recorder.record_usage_to_database(
model_info=model_info,
model_usage=usage,
user_id="system",
request_type=self.request_type,
endpoint="/chat/completions",
time_cost=time.time() - start_time,
)
return content or "", (reasoning_content, model_info.name, tool_calls)
async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
"""