feat: Enhance OpenAI compatibility and introduce unified LLM service data models

- Refactored model fetching logic to support various authentication methods for OpenAI-compatible APIs.
- Introduced new data models for LLM service requests and responses to standardize interactions across layers.
- Added an adapter base class for unified request execution across different providers.
- Implemented utility functions for building OpenAI-compatible client configurations and request overrides.
This commit is contained in:
DrSmoothl
2026-03-26 16:15:42 +08:00
parent 6e7daae55d
commit 777d4cb0d2
48 changed files with 5443 additions and 2945 deletions

View File

@@ -20,8 +20,8 @@ from src.common.database.database import get_db_session
from src.common.database.database_model import Expression
from src.common.logger import get_logger
from src.config.config import global_config
from src.config.config import model_config
from src.llm_models.utils_model import LLMRequest
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.services.llm_service import LLMServiceClient
from src.manager.async_task_manager import AsyncTask
logger = get_logger("expression_auto_check_task")
@@ -76,7 +76,7 @@ def create_evaluation_prompt(situation: str, style: str) -> str:
return prompt
judge_llm = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression_check")
judge_llm = LLMServiceClient(task_name="tool_use", request_type="expression_check")
async def single_expression_check(situation: str, style: str) -> tuple[bool, str, str | None]:
@@ -94,10 +94,11 @@ async def single_expression_check(situation: str, style: str) -> tuple[bool, str
prompt = create_evaluation_prompt(situation, style)
logger.debug(f"正在评估表达方式: situation={situation}, style={style}")
response, (reasoning, model_name, _) = await judge_llm.generate_response_async(
prompt=prompt, temperature=0.6, max_tokens=1024
generation_result = await judge_llm.generate_response(
prompt=prompt,
options=LLMGenerationOptions(temperature=0.6, max_tokens=1024),
)
response = generation_result.response
logger.debug(f"LLM响应: {response}")
# 解析JSON响应

View File

@@ -7,8 +7,9 @@ import difflib
import json
import re
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config, global_config
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.services.llm_service import LLMServiceClient
from src.config.config import global_config
from src.prompt.prompt_manager import prompt_manager
from src.common.logger import get_logger
from src.common.database.database_model import Expression
@@ -26,10 +27,11 @@ if TYPE_CHECKING:
logger = get_logger("expressor")
# TODO: 重构完LLM相关内容后替换成新的模型调用方式
express_learn_model = LLMRequest(model_set=model_config.model_task_config.utils, request_type="expression.learner")
summary_model = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression.summary")
check_model = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression.check")
express_learn_model = LLMServiceClient(
task_name="utils", request_type="expression.learner"
)
summary_model = LLMServiceClient(task_name="tool_use", request_type="expression.summary")
check_model = LLMServiceClient(task_name="tool_use", request_type="expression.check")
class ExpressionLearner:
@@ -74,7 +76,10 @@ class ExpressionLearner:
# 调用 LLM 学习表达方式
try:
response, _ = await express_learn_model.generate_response_async(prompt, temperature=0.3)
generation_result = await express_learn_model.generate_response(
prompt, options=LLMGenerationOptions(temperature=0.3)
)
response = generation_result.response
except Exception as e:
logger.error(f"学习表达方式失败,模型生成出错:{e}")
return
@@ -413,7 +418,10 @@ class ExpressionLearner:
"只输出概括内容。"
)
try:
summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2)
summary_result = await summary_model.generate_response(
prompt, options=LLMGenerationOptions(temperature=0.2)
)
summary = summary_result.response
if summary := summary.strip():
return summary
except Exception as e:

View File

@@ -4,10 +4,11 @@ import time
from typing import List, Dict, Optional, Any, Tuple
from json_repair import repair_json
from src.llm_models.utils_model import LLMRequest
from src.config.config import global_config, model_config
from src.services.llm_service import LLMServiceClient
from src.config.config import global_config
from src.common.logger import get_logger
from src.common.database.database_model import Expression
from src.common.utils.utils_session import SessionUtils
from src.prompt.prompt_manager import prompt_manager
from src.learners.learner_utils_old import weighted_sample
from src.chat.utils.common_utils import TempMethodsExpression
@@ -17,8 +18,8 @@ logger = get_logger("expression_selector")
class ExpressionSelector:
def __init__(self):
self.llm_model = LLMRequest(
model_set=model_config.model_task_config.tool_use, request_type="expression.selector"
self.llm_model = LLMServiceClient(
task_name="tool_use", request_type="expression.selector"
)
def can_use_expression_for_chat(self, chat_id: str) -> bool:
@@ -383,8 +384,8 @@ class ExpressionSelector:
prompt = await prompt_manager.render_prompt(prompt_template)
# 4. 调用LLM
content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
generation_result = await self.llm_model.generate_response(prompt=prompt)
content = generation_result.response
# print(prompt)
# print(content)

View File

@@ -1,19 +1,40 @@
from json_repair import repair_json
from typing import Tuple, Optional, List
from typing import Any, List, Optional, Tuple
import json
import re
from src.config.config import model_config
from src.config.config import global_config
from src.llm_models.utils_model import LLMRequest
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.services.llm_service import LLMServiceClient
from src.prompt.prompt_manager import prompt_manager
from src.common.logger import get_logger
logger = get_logger("expression_utils")
# TODO: 重构完LLM相关内容后替换成新的模型调用方式
judge_llm = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression_check")
judge_llm = LLMServiceClient(task_name="tool_use", request_type="expression_check")
def _normalize_repair_json_result(repaired_result: Any) -> str:
"""将 repair_json 的返回值规范化为 JSON 字符串。
Args:
repaired_result: `repair_json` 的返回值,可能是字符串或带附加信息的元组。
Returns:
str: 可供 `json.loads` 继续解析的 JSON 字符串。
Raises:
TypeError: 当返回值无法规范化为字符串时抛出。
"""
if isinstance(repaired_result, str):
return repaired_result
if isinstance(repaired_result, tuple) and repaired_result:
first_item = repaired_result[0]
if isinstance(first_item, str):
return first_item
return json.dumps(first_item, ensure_ascii=False)
raise TypeError(f"repair_json 返回了无法处理的结果类型: {type(repaired_result)}")
async def check_expression_suitability(situation: str, style: str) -> Tuple[bool, str, Optional[str]]:
@@ -51,7 +72,11 @@ async def check_expression_suitability(situation: str, style: str) -> Tuple[bool
logger.info(f"正在评估表达方式: situation={situation}, style={style}")
response, _ = await judge_llm.generate_response_async(prompt=prompt, temperature=0.6, max_tokens=1024)
generation_result = await judge_llm.generate_response(
prompt=prompt,
options=LLMGenerationOptions(temperature=0.6, max_tokens=1024),
)
response = generation_result.response
logger.debug(f"评估结果: {response}")
@@ -59,7 +84,7 @@ async def check_expression_suitability(situation: str, style: str) -> Tuple[bool
evaluation = json.loads(response)
except json.JSONDecodeError:
try:
response_repaired = repair_json(response)
response_repaired = _normalize_repair_json_result(repair_json(response))
evaluation = json.loads(response_repaired)
except Exception as e:
raise ValueError(f"无法解析LLM响应为JSON: {response}") from e
@@ -74,7 +99,7 @@ async def check_expression_suitability(situation: str, style: str) -> Tuple[bool
return False, f"评估结果格式错误: {e}", str(e)
def fix_chinese_quotes_in_json(text):
def fix_chinese_quotes_in_json(text: str) -> str:
"""使用状态机修复 JSON 字符串值中的中文引号"""
result = []
i = 0
@@ -201,12 +226,12 @@ def is_single_char_jargon(content: str) -> bool:
)
def _try_parse(text):
def _try_parse(text: str) -> Any:
try:
return json.loads(text)
except Exception:
try:
repaired = repair_json(text)
repaired = _normalize_repair_json_result(repair_json(text))
return json.loads(repaired)
except Exception:
return None

View File

@@ -4,8 +4,9 @@ from typing import List, Dict, Optional, Any
from src.common.logger import get_logger
from src.common.database.database_model import Jargon
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config, global_config
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.services.llm_service import LLMServiceClient
from src.config.config import global_config
from src.prompt.prompt_manager import prompt_manager
from src.learners.jargon_miner_old import search_jargon
from src.learners.learner_utils_old import (
@@ -23,8 +24,8 @@ class JargonExplainer:
def __init__(self, chat_id: str) -> None:
self.chat_id = chat_id
self.llm = LLMRequest(
model_set=model_config.model_task_config.tool_use,
self.llm = LLMServiceClient(
task_name="tool_use",
request_type="jargon.explain",
)
@@ -206,7 +207,10 @@ class JargonExplainer:
prompt_of_summarize.add_context("jargon_explanations", lambda _: explanations_text)
summarize_prompt = await prompt_manager.render_prompt(prompt_of_summarize)
summary, _ = await self.llm.generate_response_async(summarize_prompt, temperature=0.3)
summary_result = await self.llm.generate_response(
summarize_prompt, options=LLMGenerationOptions(temperature=0.3)
)
summary = summary_result.response
if not summary:
# 如果LLM概括失败直接返回原始解释
return f"上下文中的黑话解释:\n{explanations_text}"

View File

@@ -12,17 +12,17 @@ from src.common.data_models.jargon_data_model import MaiJargon
from src.common.database.database import get_db_session
from src.common.database.database_model import Jargon
from src.common.logger import get_logger
from src.config.config import global_config, model_config
from src.llm_models.utils_model import LLMRequest
from src.config.config import global_config
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
from src.services.llm_service import LLMServiceClient
from src.prompt.prompt_manager import prompt_manager
from .expression_utils import is_single_char_jargon
logger = get_logger("jargon")
# TODO: 重构完LLM相关内容后替换成新的模型调用方式
llm_extract = LLMRequest(model_set=model_config.model_task_config.utils, request_type="jargon.extract")
llm_inference = LLMRequest(model_set=model_config.model_task_config.utils, request_type="jargon.inference")
llm_extract = LLMServiceClient(task_name="utils", request_type="jargon.extract")
llm_inference = LLMServiceClient(task_name="utils", request_type="jargon.inference")
class JargonEntry(TypedDict):
@@ -100,7 +100,10 @@ class JargonMiner:
prompt1_template.add_context("previous_meaning_instruction", previous_meaning_instruction)
prompt1 = await prompt_manager.render_prompt(prompt1_template)
llm_response_1, _ = await llm_inference.generate_response_async(prompt1, temperature=0.3)
generation_result_1 = await llm_inference.generate_response(
prompt1, options=LLMGenerationOptions(temperature=0.3)
)
llm_response_1 = generation_result_1.response
if not llm_response_1:
logger.warning(f"jargon {content} 推断1失败无响应")
return
@@ -129,7 +132,10 @@ class JargonMiner:
prompt2_template.add_context("content", content)
prompt2 = await prompt_manager.render_prompt(prompt2_template)
llm_response_2, _ = await llm_inference.generate_response_async(prompt2, temperature=0.3)
generation_result_2 = await llm_inference.generate_response(
prompt2, options=LLMGenerationOptions(temperature=0.3)
)
llm_response_2 = generation_result_2.response
if not llm_response_2:
logger.warning(f"jargon {content} 推断2失败无响应")
return
@@ -153,7 +159,10 @@ class JargonMiner:
if global_config.debug.show_jargon_prompt:
logger.info(f"jargon {content} 比较提示词: {prompt3}")
llm_response_3, _ = await llm_inference.generate_response_async(prompt3, temperature=0.3)
generation_result_3 = await llm_inference.generate_response(
prompt3, options=LLMGenerationOptions(temperature=0.3)
)
llm_response_3 = generation_result_3.response
if not llm_response_3:
logger.warning(f"jargon {content} 比较失败:无响应")
return