feat: 统一对task中过慢的模型进行警告,并在model_config.toml中设定对应task的慢请求阈值
This commit is contained in:
@@ -839,8 +839,6 @@ class DefaultReplyer:
|
||||
continue
|
||||
|
||||
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
|
||||
if duration > 12:
|
||||
logger.warning(f"回复生成前信息获取耗时过长: {chinese_name} 耗时: {duration:.1f}s,请使用更快的模型")
|
||||
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
|
||||
|
||||
expression_habits_block, selected_expressions = results_dict["expression_habits"]
|
||||
|
||||
@@ -760,8 +760,6 @@ class PrivateReplyer:
|
||||
continue
|
||||
|
||||
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
|
||||
if duration > 12:
|
||||
logger.warning(f"回复生成前信息获取耗时过长: {chinese_name} 耗时: {duration:.1f}s,请使用更快的模型")
|
||||
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
|
||||
|
||||
expression_habits_block, selected_expressions = results_dict["expression_habits"]
|
||||
|
||||
@@ -88,6 +88,9 @@ class TaskConfig(ConfigBase):
|
||||
temperature: float = 0.3
|
||||
"""模型温度"""
|
||||
|
||||
slow_threshold: float = 15.0
|
||||
"""慢请求阈值(秒),超过此值会输出警告日志"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelTaskConfig(ConfigBase):
|
||||
|
||||
@@ -47,6 +47,21 @@ class LLMRequest:
|
||||
}
|
||||
"""模型使用量记录,用于进行负载均衡,对应为(total_tokens, penalty, usage_penalty),惩罚值是为了能在某个模型请求不给力或正在被使用的时候进行调整"""
|
||||
|
||||
def _check_slow_request(self, time_cost: float, model_name: str) -> None:
|
||||
"""检查请求是否过慢并输出警告日志
|
||||
|
||||
Args:
|
||||
time_cost: 请求耗时(秒)
|
||||
model_name: 使用的模型名称
|
||||
"""
|
||||
threshold = self.model_for_task.slow_threshold
|
||||
if time_cost > threshold:
|
||||
request_type_display = self.request_type or "未知任务"
|
||||
logger.warning(
|
||||
f"LLM请求耗时过长: {request_type_display} 使用模型 {model_name} 耗时 {time_cost:.1f}s(阈值: {threshold}s),请考虑使用更快的模型\n"
|
||||
f" 如果你认为该警告出现得过于频繁,请调整model_config.toml中对应任务的slow_threshold至符合你实际情况的合理值"
|
||||
)
|
||||
|
||||
async def generate_response_for_image(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -86,6 +101,8 @@ class LLMRequest:
|
||||
if not reasoning_content and content:
|
||||
content, extracted_reasoning = self._extract_reasoning(content)
|
||||
reasoning_content = extracted_reasoning
|
||||
time_cost = time.time() - start_time
|
||||
self._check_slow_request(time_cost, model_info.name)
|
||||
if usage := response.usage:
|
||||
llm_usage_recorder.record_usage_to_database(
|
||||
model_info=model_info,
|
||||
@@ -93,7 +110,7 @@ class LLMRequest:
|
||||
user_id="system",
|
||||
request_type=self.request_type,
|
||||
endpoint="/chat/completions",
|
||||
time_cost=time.time() - start_time,
|
||||
time_cost=time_cost,
|
||||
)
|
||||
return content, (reasoning_content, model_info.name, tool_calls)
|
||||
|
||||
@@ -198,7 +215,8 @@ class LLMRequest:
|
||||
tool_options=tool_built,
|
||||
)
|
||||
|
||||
logger.debug(f"LLM请求总耗时: {time.time() - start_time}")
|
||||
time_cost = time.time() - start_time
|
||||
logger.debug(f"LLM请求总耗时: {time_cost}")
|
||||
logger.debug(f"LLM生成内容: {response}")
|
||||
|
||||
content = response.content
|
||||
@@ -207,6 +225,7 @@ class LLMRequest:
|
||||
if not reasoning_content and content:
|
||||
content, extracted_reasoning = self._extract_reasoning(content)
|
||||
reasoning_content = extracted_reasoning
|
||||
self._check_slow_request(time_cost, model_info.name)
|
||||
if usage := response.usage:
|
||||
llm_usage_recorder.record_usage_to_database(
|
||||
model_info=model_info,
|
||||
@@ -214,7 +233,7 @@ class LLMRequest:
|
||||
user_id="system",
|
||||
request_type=self.request_type,
|
||||
endpoint="/chat/completions",
|
||||
time_cost=time.time() - start_time,
|
||||
time_cost=time_cost,
|
||||
)
|
||||
return content or "", (reasoning_content, model_info.name, tool_calls)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user