feat: Enhance OpenAI compatibility and introduce unified LLM service data models

- Refactored model fetching logic to support various authentication methods for OpenAI-compatible APIs. - Introduced new data models for LLM service requests and responses to standardize interactions across layers. - Added an adapter base class for unified request execution across different providers. - Implemented utility functions for building OpenAI-compatible client configurations and request overrides.
2026-03-26 16:15:42 +08:00
parent 6e7daae55d
commit 777d4cb0d2
48 changed files with 5443 additions and 2945 deletions
--- a/src/chat/knowledge/embedding_store.py
+++ b/src/chat/knowledge/embedding_store.py
@@ -139,14 +139,14 @@ class EmbeddingStore:
        asyncio.set_event_loop(loop)

        try:
-            # 创建新的LLMRequest实例
-            from src.llm_models.utils_model import LLMRequest
-            from src.config.config import model_config
+            # 创建新的服务层实例
+            from src.services.llm_service import LLMServiceClient

-            llm = LLMRequest(model_set=model_config.model_task_config.embedding, request_type="embedding")
+            llm = LLMServiceClient(task_name="embedding", request_type="embedding")

            # 使用新的事件循环运行异步方法
-            embedding, _ = loop.run_until_complete(llm.get_embedding(s))
+            embedding_result = loop.run_until_complete(llm.embed_text(s))
+            embedding = embedding_result.embedding

            if embedding and len(embedding) > 0:
                return embedding
@@ -195,13 +195,12 @@ class EmbeddingStore:
            start_idx, chunk_strs = chunk_data
            chunk_results = []

-            # 为每个线程创建独立的LLMRequest实例
-            from src.llm_models.utils_model import LLMRequest
-            from src.config.config import model_config
+            # 为每个线程创建独立的服务层实例
+            from src.services.llm_service import LLMServiceClient

            try:
-                # 创建线程专用的LLM实例
-                llm = LLMRequest(model_set=model_config.model_task_config.embedding, request_type="embedding")
+                # 创建线程专用的服务层实例
+                llm = LLMServiceClient(task_name="embedding", request_type="embedding")

                for i, s in enumerate(chunk_strs):
                    try:
@@ -209,7 +208,8 @@ class EmbeddingStore:
                        loop = asyncio.new_event_loop()
                        asyncio.set_event_loop(loop)
                        try:
-                            embedding = loop.run_until_complete(llm.get_embedding(s))
+                            embedding_result = loop.run_until_complete(llm.embed_text(s))
+                            embedding = embedding_result.embedding
                        finally:
                            loop.close()

--- a/src/chat/knowledge/ie_process.py
+++ b/src/chat/knowledge/ie_process.py
@@ -1,18 +1,27 @@
 import asyncio
 import json
 import time
-from typing import List, Union
+from typing import Dict, List, Tuple, Union

-from .global_logger import logger
-from . import prompt_template
-from . import INVALID_ENTITY
-from src.llm_models.utils_model import LLMRequest
 from json_repair import repair_json

+from src.services.llm_service import LLMServiceClient

-def _extract_json_from_text(text: str):
+from . import INVALID_ENTITY
+from . import prompt_template
+from .global_logger import logger
+
+
+def _extract_json_from_text(text: str) -> List[str] | List[List[str]] | Dict[str, object]:
    # sourcery skip: assign-if-exp, extract-method
-    """从文本中提取JSON数据的高容错方法"""
+    """从文本中提取 JSON 数据。
+
+    Args:
+        text: 原始模型输出文本。
+
+    Returns:
+        List[str] | List[List[str]] | Dict[str, object]: 修复并解析后的 JSON 结果。
+    """
    if text is None:
        logger.error("输入文本为None")
        return []
@@ -46,20 +55,30 @@ def _extract_json_from_text(text: str):
        return []


-def _entity_extract(llm_req: LLMRequest, paragraph: str) -> List[str]:
+def _entity_extract(llm_req: LLMServiceClient, paragraph: str) -> List[str]:
    # sourcery skip: reintroduce-else, swap-if-else-branches, use-named-expression
-    """对段落进行实体提取，返回提取出的实体列表（JSON格式）"""
+    """对单段文本执行实体提取。
+
+    Args:
+        llm_req: LLM 服务门面实例。
+        paragraph: 待提取实体的原始段落文本。
+
+    Returns:
+        List[str]: 提取出的实体列表。
+    """
    entity_extract_context = prompt_template.build_entity_extract_context(paragraph)

    # 使用 asyncio.run 来运行异步方法
    try:
        # 如果当前已有事件循环在运行，使用它
        loop = asyncio.get_running_loop()
-        future = asyncio.run_coroutine_threadsafe(llm_req.generate_response_async(entity_extract_context), loop)
-        response, _ = future.result()
+        future = asyncio.run_coroutine_threadsafe(llm_req.generate_response(entity_extract_context), loop)
+        generation_result = future.result()
+        response = generation_result.response
    except RuntimeError:
        # 如果没有运行中的事件循环，直接使用 asyncio.run
-        response, _ = asyncio.run(llm_req.generate_response_async(entity_extract_context))
+        generation_result = asyncio.run(llm_req.generate_response(entity_extract_context))
+        response = generation_result.response

    # 添加调试日志
    logger.debug(f"LLM返回的原始响应: {response}")
@@ -92,8 +111,21 @@ def _entity_extract(llm_req: LLMRequest, paragraph: str) -> List[str]:
    return entity_extract_result


-def _rdf_triple_extract(llm_req: LLMRequest, paragraph: str, entities: list) -> List[List[str]]:
-    """对段落进行实体提取，返回提取出的实体列表（JSON格式）"""
+def _rdf_triple_extract(
+    llm_req: LLMServiceClient,
+    paragraph: str,
+    entities: List[str],
+) -> List[List[str]]:
+    """对单段文本执行 RDF 三元组提取。
+
+    Args:
+        llm_req: LLM 服务门面实例。
+        paragraph: 待提取的原始段落文本。
+        entities: 已识别出的实体列表。
+
+    Returns:
+        List[List[str]]: 提取出的三元组列表。
+    """
    rdf_extract_context = prompt_template.build_rdf_triple_extract_context(
        paragraph, entities=json.dumps(entities, ensure_ascii=False)
    )
@@ -102,11 +134,13 @@ def _rdf_triple_extract(llm_req: LLMRequest, paragraph: str, entities: list) ->
    try:
        # 如果当前已有事件循环在运行，使用它
        loop = asyncio.get_running_loop()
-        future = asyncio.run_coroutine_threadsafe(llm_req.generate_response_async(rdf_extract_context), loop)
-        response, _ = future.result()
+        future = asyncio.run_coroutine_threadsafe(llm_req.generate_response(rdf_extract_context), loop)
+        generation_result = future.result()
+        response = generation_result.response
    except RuntimeError:
        # 如果没有运行中的事件循环，直接使用 asyncio.run
-        response, _ = asyncio.run(llm_req.generate_response_async(rdf_extract_context))
+        generation_result = asyncio.run(llm_req.generate_response(rdf_extract_context))
+        response = generation_result.response

    # 添加调试日志
    logger.debug(f"RDF LLM返回的原始响应: {response}")
@@ -140,8 +174,21 @@ def _rdf_triple_extract(llm_req: LLMRequest, paragraph: str, entities: list) ->


 def info_extract_from_str(
-    llm_client_for_ner: LLMRequest, llm_client_for_rdf: LLMRequest, paragraph: str
-) -> Union[tuple[None, None], tuple[list[str], list[list[str]]]]:
+    llm_client_for_ner: LLMServiceClient,
+    llm_client_for_rdf: LLMServiceClient,
+    paragraph: str,
+) -> Union[Tuple[None, None], Tuple[List[str], List[List[str]]]]:
+    """从文本中提取实体与三元组信息。
+
+    Args:
+        llm_client_for_ner: 实体提取使用的 LLM 服务门面。
+        llm_client_for_rdf: RDF 三元组提取使用的 LLM 服务门面。
+        paragraph: 原始段落文本。
+
+    Returns:
+        Union[Tuple[None, None], Tuple[List[str], List[List[str]]]]: 成功时返回
+        ``(实体列表, 三元组列表)``，失败时返回 ``(None, None)``。
+    """
    try_count = 0
    while True:
        try:
@@ -176,17 +223,30 @@ def info_extract_from_str(


 class IEProcess:
-    """
-    信息抽取处理器类，提供更方便的批次处理接口。
-    """
+    """信息抽取处理器。"""

-    def __init__(self, llm_ner: LLMRequest, llm_rdf: LLMRequest = None):
+    def __init__(
+        self,
+        llm_ner: LLMServiceClient,
+        llm_rdf: LLMServiceClient | None = None,
+    ) -> None:
+        """初始化信息抽取处理器。
+
+        Args:
+            llm_ner: 实体提取使用的 LLM 服务门面。
+            llm_rdf: RDF 三元组提取使用的 LLM 服务门面；为空时复用 `llm_ner`。
+        """
        self.llm_ner = llm_ner
        self.llm_rdf = llm_rdf or llm_ner

-    async def process_paragraphs(self, paragraphs: List[str]) -> List[dict]:
-        """
-        异步处理多个段落。
+    async def process_paragraphs(self, paragraphs: List[str]) -> List[Dict[str, object]]:
+        """异步处理多个段落。
+
+        Args:
+            paragraphs: 待处理的段落列表。
+
+        Returns:
+            List[Dict[str, object]]: 每个成功段落对应的抽取结果。
        """
        from .utils.hash import get_sha256

--- a/src/chat/knowledge/lpmm_ops.py
+++ b/src/chat/knowledge/lpmm_ops.py
@@ -91,13 +91,14 @@ class LPMMOperations:

            # 2. 实体与三元组抽取 (内部调用大模型)
            from src.chat.knowledge.ie_process import IEProcess
-            from src.llm_models.utils_model import LLMRequest
-            from src.config.config import model_config
+            from src.services.llm_service import LLMServiceClient

-            llm_ner = LLMRequest(
-                model_set=model_config.model_task_config.lpmm_entity_extract, request_type="lpmm.entity_extract"
+            llm_ner = LLMServiceClient(
+                task_name="lpmm_entity_extract", request_type="lpmm.entity_extract"
+            )
+            llm_rdf = LLMServiceClient(
+                task_name="lpmm_rdf_build", request_type="lpmm.rdf_build"
            )
-            llm_rdf = LLMRequest(model_set=model_config.model_task_config.lpmm_rdf_build, request_type="lpmm.rdf_build")
            ie_process = IEProcess(llm_ner, llm_rdf)

            logger.info(f"[Plugin API] 正在对 {len(paragraphs)} 段文本执行信息抽取...")