feat: Enhance OpenAI compatibility and introduce unified LLM service data models
- Refactored model fetching logic to support various authentication methods for OpenAI-compatible APIs. - Introduced new data models for LLM service requests and responses to standardize interactions across layers. - Added an adapter base class for unified request execution across different providers. - Implemented utility functions for building OpenAI-compatible client configurations and request overrides.
This commit is contained in:
@@ -20,8 +20,8 @@ from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Expression
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import global_config
|
||||
from src.config.config import model_config
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
from src.manager.async_task_manager import AsyncTask
|
||||
|
||||
logger = get_logger("expression_auto_check_task")
|
||||
@@ -76,7 +76,7 @@ def create_evaluation_prompt(situation: str, style: str) -> str:
|
||||
return prompt
|
||||
|
||||
|
||||
judge_llm = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression_check")
|
||||
judge_llm = LLMServiceClient(task_name="tool_use", request_type="expression_check")
|
||||
|
||||
|
||||
async def single_expression_check(situation: str, style: str) -> tuple[bool, str, str | None]:
|
||||
@@ -94,10 +94,11 @@ async def single_expression_check(situation: str, style: str) -> tuple[bool, str
|
||||
prompt = create_evaluation_prompt(situation, style)
|
||||
logger.debug(f"正在评估表达方式: situation={situation}, style={style}")
|
||||
|
||||
response, (reasoning, model_name, _) = await judge_llm.generate_response_async(
|
||||
prompt=prompt, temperature=0.6, max_tokens=1024
|
||||
generation_result = await judge_llm.generate_response(
|
||||
prompt=prompt,
|
||||
options=LLMGenerationOptions(temperature=0.6, max_tokens=1024),
|
||||
)
|
||||
|
||||
response = generation_result.response
|
||||
logger.debug(f"LLM响应: {response}")
|
||||
|
||||
# 解析JSON响应
|
||||
|
||||
@@ -7,8 +7,9 @@ import difflib
|
||||
import json
|
||||
import re
|
||||
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import model_config, global_config
|
||||
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
from src.config.config import global_config
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Expression
|
||||
@@ -26,10 +27,11 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = get_logger("expressor")
|
||||
|
||||
# TODO: 重构完LLM相关内容后,替换成新的模型调用方式
|
||||
express_learn_model = LLMRequest(model_set=model_config.model_task_config.utils, request_type="expression.learner")
|
||||
summary_model = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression.summary")
|
||||
check_model = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression.check")
|
||||
express_learn_model = LLMServiceClient(
|
||||
task_name="utils", request_type="expression.learner"
|
||||
)
|
||||
summary_model = LLMServiceClient(task_name="tool_use", request_type="expression.summary")
|
||||
check_model = LLMServiceClient(task_name="tool_use", request_type="expression.check")
|
||||
|
||||
|
||||
class ExpressionLearner:
|
||||
@@ -74,7 +76,10 @@ class ExpressionLearner:
|
||||
|
||||
# 调用 LLM 学习表达方式
|
||||
try:
|
||||
response, _ = await express_learn_model.generate_response_async(prompt, temperature=0.3)
|
||||
generation_result = await express_learn_model.generate_response(
|
||||
prompt, options=LLMGenerationOptions(temperature=0.3)
|
||||
)
|
||||
response = generation_result.response
|
||||
except Exception as e:
|
||||
logger.error(f"学习表达方式失败,模型生成出错:{e}")
|
||||
return
|
||||
@@ -413,7 +418,10 @@ class ExpressionLearner:
|
||||
"只输出概括内容。"
|
||||
)
|
||||
try:
|
||||
summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2)
|
||||
summary_result = await summary_model.generate_response(
|
||||
prompt, options=LLMGenerationOptions(temperature=0.2)
|
||||
)
|
||||
summary = summary_result.response
|
||||
if summary := summary.strip():
|
||||
return summary
|
||||
except Exception as e:
|
||||
|
||||
@@ -4,10 +4,11 @@ import time
|
||||
from typing import List, Dict, Optional, Any, Tuple
|
||||
from json_repair import repair_json
|
||||
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import global_config, model_config
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
from src.config.config import global_config
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Expression
|
||||
from src.common.utils.utils_session import SessionUtils
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.learners.learner_utils_old import weighted_sample
|
||||
from src.chat.utils.common_utils import TempMethodsExpression
|
||||
@@ -17,8 +18,8 @@ logger = get_logger("expression_selector")
|
||||
|
||||
class ExpressionSelector:
|
||||
def __init__(self):
|
||||
self.llm_model = LLMRequest(
|
||||
model_set=model_config.model_task_config.tool_use, request_type="expression.selector"
|
||||
self.llm_model = LLMServiceClient(
|
||||
task_name="tool_use", request_type="expression.selector"
|
||||
)
|
||||
|
||||
def can_use_expression_for_chat(self, chat_id: str) -> bool:
|
||||
@@ -383,8 +384,8 @@ class ExpressionSelector:
|
||||
prompt = await prompt_manager.render_prompt(prompt_template)
|
||||
|
||||
# 4. 调用LLM
|
||||
content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
|
||||
|
||||
generation_result = await self.llm_model.generate_response(prompt=prompt)
|
||||
content = generation_result.response
|
||||
# print(prompt)
|
||||
# print(content)
|
||||
|
||||
|
||||
@@ -1,19 +1,40 @@
|
||||
from json_repair import repair_json
|
||||
from typing import Tuple, Optional, List
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from src.config.config import model_config
|
||||
from src.config.config import global_config
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.common.logger import get_logger
|
||||
|
||||
logger = get_logger("expression_utils")
|
||||
|
||||
# TODO: 重构完LLM相关内容后,替换成新的模型调用方式
|
||||
judge_llm = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="expression_check")
|
||||
judge_llm = LLMServiceClient(task_name="tool_use", request_type="expression_check")
|
||||
|
||||
|
||||
def _normalize_repair_json_result(repaired_result: Any) -> str:
|
||||
"""将 repair_json 的返回值规范化为 JSON 字符串。
|
||||
|
||||
Args:
|
||||
repaired_result: `repair_json` 的返回值,可能是字符串或带附加信息的元组。
|
||||
|
||||
Returns:
|
||||
str: 可供 `json.loads` 继续解析的 JSON 字符串。
|
||||
|
||||
Raises:
|
||||
TypeError: 当返回值无法规范化为字符串时抛出。
|
||||
"""
|
||||
if isinstance(repaired_result, str):
|
||||
return repaired_result
|
||||
if isinstance(repaired_result, tuple) and repaired_result:
|
||||
first_item = repaired_result[0]
|
||||
if isinstance(first_item, str):
|
||||
return first_item
|
||||
return json.dumps(first_item, ensure_ascii=False)
|
||||
raise TypeError(f"repair_json 返回了无法处理的结果类型: {type(repaired_result)}")
|
||||
|
||||
|
||||
async def check_expression_suitability(situation: str, style: str) -> Tuple[bool, str, Optional[str]]:
|
||||
@@ -51,7 +72,11 @@ async def check_expression_suitability(situation: str, style: str) -> Tuple[bool
|
||||
|
||||
logger.info(f"正在评估表达方式: situation={situation}, style={style}")
|
||||
|
||||
response, _ = await judge_llm.generate_response_async(prompt=prompt, temperature=0.6, max_tokens=1024)
|
||||
generation_result = await judge_llm.generate_response(
|
||||
prompt=prompt,
|
||||
options=LLMGenerationOptions(temperature=0.6, max_tokens=1024),
|
||||
)
|
||||
response = generation_result.response
|
||||
|
||||
logger.debug(f"评估结果: {response}")
|
||||
|
||||
@@ -59,7 +84,7 @@ async def check_expression_suitability(situation: str, style: str) -> Tuple[bool
|
||||
evaluation = json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
try:
|
||||
response_repaired = repair_json(response)
|
||||
response_repaired = _normalize_repair_json_result(repair_json(response))
|
||||
evaluation = json.loads(response_repaired)
|
||||
except Exception as e:
|
||||
raise ValueError(f"无法解析LLM响应为JSON: {response}") from e
|
||||
@@ -74,7 +99,7 @@ async def check_expression_suitability(situation: str, style: str) -> Tuple[bool
|
||||
return False, f"评估结果格式错误: {e}", str(e)
|
||||
|
||||
|
||||
def fix_chinese_quotes_in_json(text):
|
||||
def fix_chinese_quotes_in_json(text: str) -> str:
|
||||
"""使用状态机修复 JSON 字符串值中的中文引号"""
|
||||
result = []
|
||||
i = 0
|
||||
@@ -201,12 +226,12 @@ def is_single_char_jargon(content: str) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _try_parse(text):
|
||||
def _try_parse(text: str) -> Any:
|
||||
try:
|
||||
return json.loads(text)
|
||||
except Exception:
|
||||
try:
|
||||
repaired = repair_json(text)
|
||||
repaired = _normalize_repair_json_result(repair_json(text))
|
||||
return json.loads(repaired)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -4,8 +4,9 @@ from typing import List, Dict, Optional, Any
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Jargon
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import model_config, global_config
|
||||
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
from src.config.config import global_config
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.learners.jargon_miner_old import search_jargon
|
||||
from src.learners.learner_utils_old import (
|
||||
@@ -23,8 +24,8 @@ class JargonExplainer:
|
||||
|
||||
def __init__(self, chat_id: str) -> None:
|
||||
self.chat_id = chat_id
|
||||
self.llm = LLMRequest(
|
||||
model_set=model_config.model_task_config.tool_use,
|
||||
self.llm = LLMServiceClient(
|
||||
task_name="tool_use",
|
||||
request_type="jargon.explain",
|
||||
)
|
||||
|
||||
@@ -206,7 +207,10 @@ class JargonExplainer:
|
||||
prompt_of_summarize.add_context("jargon_explanations", lambda _: explanations_text)
|
||||
summarize_prompt = await prompt_manager.render_prompt(prompt_of_summarize)
|
||||
|
||||
summary, _ = await self.llm.generate_response_async(summarize_prompt, temperature=0.3)
|
||||
summary_result = await self.llm.generate_response(
|
||||
summarize_prompt, options=LLMGenerationOptions(temperature=0.3)
|
||||
)
|
||||
summary = summary_result.response
|
||||
if not summary:
|
||||
# 如果LLM概括失败,直接返回原始解释
|
||||
return f"上下文中的黑话解释:\n{explanations_text}"
|
||||
|
||||
@@ -12,17 +12,17 @@ from src.common.data_models.jargon_data_model import MaiJargon
|
||||
from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Jargon
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import global_config, model_config
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import global_config
|
||||
from src.common.data_models.llm_service_data_models import LLMGenerationOptions
|
||||
from src.services.llm_service import LLMServiceClient
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
|
||||
from .expression_utils import is_single_char_jargon
|
||||
|
||||
logger = get_logger("jargon")
|
||||
|
||||
# TODO: 重构完LLM相关内容后,替换成新的模型调用方式
|
||||
llm_extract = LLMRequest(model_set=model_config.model_task_config.utils, request_type="jargon.extract")
|
||||
llm_inference = LLMRequest(model_set=model_config.model_task_config.utils, request_type="jargon.inference")
|
||||
llm_extract = LLMServiceClient(task_name="utils", request_type="jargon.extract")
|
||||
llm_inference = LLMServiceClient(task_name="utils", request_type="jargon.inference")
|
||||
|
||||
|
||||
class JargonEntry(TypedDict):
|
||||
@@ -100,7 +100,10 @@ class JargonMiner:
|
||||
prompt1_template.add_context("previous_meaning_instruction", previous_meaning_instruction)
|
||||
prompt1 = await prompt_manager.render_prompt(prompt1_template)
|
||||
|
||||
llm_response_1, _ = await llm_inference.generate_response_async(prompt1, temperature=0.3)
|
||||
generation_result_1 = await llm_inference.generate_response(
|
||||
prompt1, options=LLMGenerationOptions(temperature=0.3)
|
||||
)
|
||||
llm_response_1 = generation_result_1.response
|
||||
if not llm_response_1:
|
||||
logger.warning(f"jargon {content} 推断1失败:无响应")
|
||||
return
|
||||
@@ -129,7 +132,10 @@ class JargonMiner:
|
||||
prompt2_template.add_context("content", content)
|
||||
prompt2 = await prompt_manager.render_prompt(prompt2_template)
|
||||
|
||||
llm_response_2, _ = await llm_inference.generate_response_async(prompt2, temperature=0.3)
|
||||
generation_result_2 = await llm_inference.generate_response(
|
||||
prompt2, options=LLMGenerationOptions(temperature=0.3)
|
||||
)
|
||||
llm_response_2 = generation_result_2.response
|
||||
if not llm_response_2:
|
||||
logger.warning(f"jargon {content} 推断2失败:无响应")
|
||||
return
|
||||
@@ -153,7 +159,10 @@ class JargonMiner:
|
||||
if global_config.debug.show_jargon_prompt:
|
||||
logger.info(f"jargon {content} 比较提示词: {prompt3}")
|
||||
|
||||
llm_response_3, _ = await llm_inference.generate_response_async(prompt3, temperature=0.3)
|
||||
generation_result_3 = await llm_inference.generate_response(
|
||||
prompt3, options=LLMGenerationOptions(temperature=0.3)
|
||||
)
|
||||
llm_response_3 = generation_result_3.response
|
||||
if not llm_response_3:
|
||||
logger.warning(f"jargon {content} 比较失败:无响应")
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user