Merge pull request #1390 from Ronifue/dev

fix & feat: 修复、修正一些问题
This commit is contained in:
墨梓柒
2025-11-30 18:07:53 +08:00
committed by GitHub
9 changed files with 171 additions and 49 deletions

View File

@@ -839,8 +839,6 @@ class DefaultReplyer:
continue
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
if duration > 12:
logger.warning(f"回复生成前信息获取耗时过长: {chinese_name} 耗时: {duration:.1f}s请使用更快的模型")
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
expression_habits_block, selected_expressions = results_dict["expression_habits"]

View File

@@ -760,8 +760,6 @@ class PrivateReplyer:
continue
timing_logs.append(f"{chinese_name}: {duration:.1f}s")
if duration > 12:
logger.warning(f"回复生成前信息获取耗时过长: {chinese_name} 耗时: {duration:.1f}s请使用更快的模型")
logger.info(f"回复准备: {'; '.join(timing_logs)}; {almost_zero_str} <0.1s")
expression_habits_block, selected_expressions = results_dict["expression_habits"]

View File

@@ -88,6 +88,9 @@ class TaskConfig(ConfigBase):
temperature: float = 0.3
"""模型温度"""
slow_threshold: float = 15.0
"""慢请求阈值(秒),超过此值会输出警告日志"""
@dataclass
class ModelTaskConfig(ConfigBase):

View File

@@ -47,6 +47,21 @@ class LLMRequest:
}
"""模型使用量记录,用于进行负载均衡,对应为(total_tokens, penalty, usage_penalty),惩罚值是为了能在某个模型请求不给力或正在被使用的时候进行调整"""
def _check_slow_request(self, time_cost: float, model_name: str) -> None:
"""检查请求是否过慢并输出警告日志
Args:
time_cost: 请求耗时(秒)
model_name: 使用的模型名称
"""
threshold = self.model_for_task.slow_threshold
if time_cost > threshold:
request_type_display = self.request_type or "未知任务"
logger.warning(
f"LLM请求耗时过长: {request_type_display} 使用模型 {model_name} 耗时 {time_cost:.1f}s阈值: {threshold}s请考虑使用更快的模型\n"
f" 如果你认为该警告出现得过于频繁请调整model_config.toml中对应任务的slow_threshold至符合你实际情况的合理值"
)
async def generate_response_for_image(
self,
prompt: str,
@@ -86,6 +101,8 @@ class LLMRequest:
if not reasoning_content and content:
content, extracted_reasoning = self._extract_reasoning(content)
reasoning_content = extracted_reasoning
time_cost = time.time() - start_time
self._check_slow_request(time_cost, model_info.name)
if usage := response.usage:
llm_usage_recorder.record_usage_to_database(
model_info=model_info,
@@ -93,7 +110,7 @@ class LLMRequest:
user_id="system",
request_type=self.request_type,
endpoint="/chat/completions",
time_cost=time.time() - start_time,
time_cost=time_cost,
)
return content, (reasoning_content, model_info.name, tool_calls)
@@ -198,7 +215,8 @@ class LLMRequest:
tool_options=tool_built,
)
logger.debug(f"LLM请求总耗时: {time.time() - start_time}")
time_cost = time.time() - start_time
logger.debug(f"LLM请求总耗时: {time_cost}")
logger.debug(f"LLM生成内容: {response}")
content = response.content
@@ -207,6 +225,7 @@ class LLMRequest:
if not reasoning_content and content:
content, extracted_reasoning = self._extract_reasoning(content)
reasoning_content = extracted_reasoning
self._check_slow_request(time_cost, model_info.name)
if usage := response.usage:
llm_usage_recorder.record_usage_to_database(
model_info=model_info,
@@ -214,7 +233,7 @@ class LLMRequest:
user_id="system",
request_type=self.request_type,
endpoint="/chat/completions",
time_cost=time.time() - start_time,
time_cost=time_cost,
)
return content or "", (reasoning_content, model_info.name, tool_calls)
@@ -301,7 +320,7 @@ class LLMRequest:
message_list=(compressed_messages or message_list),
tool_options=tool_options,
max_tokens=self.model_for_task.max_tokens if max_tokens is None else max_tokens,
temperature=self.model_for_task.temperature if temperature is None else temperature,
temperature=temperature if temperature is not None else (model_info.extra_params or {}).get("temperature", self.model_for_task.temperature),
response_format=response_format,
stream_response_handler=stream_response_handler,
async_response_parser=async_response_parser,
@@ -323,24 +342,19 @@ class LLMRequest:
)
except EmptyResponseException as e:
# 空回复:通常为临时问题,单独记录并重试
original_error_info = self._get_original_error_info(e)
retry_remain -= 1
if retry_remain <= 0:
logger.error(f"模型 '{model_info.name}' 在多次出现空回复后仍然失败。")
logger.error(f"模型 '{model_info.name}' 在多次出现空回复后仍然失败。{original_error_info}")
raise ModelAttemptFailed(f"模型 '{model_info.name}' 重试耗尽", original_exception=e) from e
logger.warning(f"模型 '{model_info.name}' 返回空回复(可重试)。剩余重试次数: {retry_remain}")
logger.warning(f"模型 '{model_info.name}' 返回空回复(可重试){original_error_info}。剩余重试次数: {retry_remain}")
await asyncio.sleep(api_provider.retry_interval)
except NetworkConnectionError as e:
# 网络错误:单独记录并重试
# 尝试从链式异常中获取原始错误信息以诊断具体原因
original_error_info = ""
if e.__cause__:
original_error_type = type(e.__cause__).__name__
original_error_msg = str(e.__cause__)
original_error_info = (
f"\n 底层异常类型: {original_error_type}\n 底层异常信息: {original_error_msg}"
)
original_error_info = self._get_original_error_info(e)
retry_remain -= 1
if retry_remain <= 0:
@@ -356,15 +370,17 @@ class LLMRequest:
await asyncio.sleep(api_provider.retry_interval)
except RespNotOkException as e:
original_error_info = self._get_original_error_info(e)
# 可重试的HTTP错误
if e.status_code == 429 or e.status_code >= 500:
retry_remain -= 1
if retry_remain <= 0:
logger.error(f"模型 '{model_info.name}' 在遇到 {e.status_code} 错误并用尽重试次数后仍然失败。")
logger.error(f"模型 '{model_info.name}' 在遇到 {e.status_code} 错误并用尽重试次数后仍然失败。{original_error_info}")
raise ModelAttemptFailed(f"模型 '{model_info.name}' 重试耗尽", original_exception=e) from e
logger.warning(
f"模型 '{model_info.name}' 遇到可重试的HTTP错误: {str(e)}。剩余重试次数: {retry_remain}"
f"模型 '{model_info.name}' 遇到可重试的HTTP错误: {str(e)}{original_error_info}。剩余重试次数: {retry_remain}"
)
await asyncio.sleep(api_provider.retry_interval)
continue
@@ -377,13 +393,15 @@ class LLMRequest:
continue
# 不可重试的HTTP错误
logger.warning(f"模型 '{model_info.name}' 遇到不可重试的HTTP错误: {str(e)}")
logger.warning(f"模型 '{model_info.name}' 遇到不可重试的HTTP错误: {str(e)}{original_error_info}")
raise ModelAttemptFailed(f"模型 '{model_info.name}' 遇到硬错误", original_exception=e) from e
except Exception as e:
logger.error(traceback.format_exc())
logger.warning(f"模型 '{model_info.name}' 遇到未知的不可重试错误: {str(e)}")
original_error_info = self._get_original_error_info(e)
logger.warning(f"模型 '{model_info.name}' 遇到未知的不可重试错误: {str(e)}{original_error_info}")
raise ModelAttemptFailed(f"模型 '{model_info.name}' 遇到硬错误", original_exception=e) from e
raise ModelAttemptFailed(f"模型 '{model_info.name}' 未被尝试因为重试次数已配置为0或更少。")
@@ -497,3 +515,14 @@ class LLMRequest:
content = re.sub(r"(?:<think>)?.*?</think>", "", content, flags=re.DOTALL, count=1).strip()
reasoning = match[1].strip() if match else ""
return content, reasoning
@staticmethod
def _get_original_error_info(e: Exception) -> str:
"""获取原始错误信息"""
if e.__cause__:
original_error_type = type(e.__cause__).__name__
original_error_msg = str(e.__cause__)
return (
f"\n 底层异常类型: {original_error_type}\n 底层异常信息: {original_error_msg}"
)
return ""

View File

@@ -15,7 +15,7 @@ class SearchKnowledgeFromLPMMTool(BaseTool):
description = "从知识库中搜索相关信息,如果你需要知识,就使用这个工具"
parameters = [
("query", ToolParamType.STRING, "搜索查询关键词", True, None),
("limit", ToolParamType.INTEGER, "希望返回的相关知识条数默认5", False, 5),
("limit", ToolParamType.INTEGER, "希望返回的相关知识条数默认5", False, None),
]
available_for_llm = global_config.lpmm_knowledge.enable

View File

@@ -11,8 +11,10 @@ from datetime import datetime
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from src.config.config import MMC_VERSION
from src.common.logger import get_logger
router = APIRouter(prefix="/system", tags=["system"])
logger = get_logger("webui_system")
# 记录启动时间
_start_time = time.time()
@@ -39,21 +41,22 @@ async def restart_maibot():
"""
重启麦麦主程序
使用 os.execv 重启当前进程,配置更改将在重启后生效。
请求重启当前进程,配置更改将在重启后生效。
注意:此操作会使麦麦暂时离线。
"""
import asyncio
try:
# 记录重启操作
print(f"[{datetime.now()}] WebUI 触发重启操作")
logger.info("WebUI 触发重启操作")
# 定义延迟重启的异步任务
async def delayed_restart():
await asyncio.sleep(0.5) # 延迟0.5秒,确保响应已发送
python = sys.executable
args = [python] + sys.argv
os.execv(python, args)
# 使用 os._exit(42) 退出当前进程,配合外部 runner 脚本进行重启
# 42 是约定的重启状态码
logger.info("WebUI 请求重启,退出代码 42")
os._exit(42)
# 创建后台任务执行重启
asyncio.create_task(delayed_restart())