feat：lpmm可选接入memory agent，将memory agent改为标准工具格式，修改llm_utils以兼容

2025-11-13 18:55:37 +08:00
parent e52a81e90b
commit f2819be5e9
18 changed files with 868 additions and 432 deletions
--- a/src/llm_models/utils_model.py
+++ b/src/llm_models/utils_model.py
@@ -166,6 +166,57 @@ class LLMRequest:
                time_cost=time.time() - start_time,
            )
        return content or "", (reasoning_content, model_info.name, tool_calls)
+    
+    async def generate_response_with_message_async(
+        self,
+        message_factory: Callable[[BaseClient], List[Message]],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        raise_when_empty: bool = True,
+    ) -> Tuple[str, Tuple[str, str, Optional[List[ToolCall]]]]:
+        """
+        异步生成响应
+        Args:
+            message_factory (Callable[[BaseClient], List[Message]]): 已构建好的消息工厂
+            temperature (float, optional): 温度参数
+            max_tokens (int, optional): 最大token数
+            tools (Optional[List[Dict[str, Any]]]): 工具列表
+            raise_when_empty (bool): 当响应为空时是否抛出异常
+        Returns:
+            (Tuple[str, str, str, Optional[List[ToolCall]]]): 响应内容、推理内容、模型名称、工具调用列表
+        """
+        start_time = time.time()
+
+        tool_built = self._build_tool_options(tools)
+
+        response, model_info = await self._execute_request(
+            request_type=RequestType.RESPONSE,
+            message_factory=message_factory,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            tool_options=tool_built,
+        )
+
+        logger.debug(f"LLM请求总耗时: {time.time() - start_time}")
+        logger.debug(f"LLM生成内容: {response}")
+
+        content = response.content
+        reasoning_content = response.reasoning_content or ""
+        tool_calls = response.tool_calls
+        if not reasoning_content and content:
+            content, extracted_reasoning = self._extract_reasoning(content)
+            reasoning_content = extracted_reasoning
+        if usage := response.usage:
+            llm_usage_recorder.record_usage_to_database(
+                model_info=model_info,
+                model_usage=usage,
+                user_id="system",
+                request_type=self.request_type,
+                endpoint="/chat/completions",
+                time_cost=time.time() - start_time,
+            )
+        return content or "", (reasoning_content, model_info.name, tool_calls)

    async def get_embedding(self, embedding_input: str) -> Tuple[List[float], str]:
        """