feat;模型选择现在可以使用完全随机的策略
Update model_config_template.toml
This commit is contained in:
@@ -97,6 +97,9 @@ class TaskConfig(ConfigBase):
|
|||||||
slow_threshold: float = 15.0
|
slow_threshold: float = 15.0
|
||||||
"""慢请求阈值(秒),超过此值会输出警告日志"""
|
"""慢请求阈值(秒),超过此值会输出警告日志"""
|
||||||
|
|
||||||
|
selection_strategy: str = field(default="balance")
|
||||||
|
"""模型选择策略:balance(负载均衡)或 random(随机选择)"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ModelTaskConfig(ConfigBase):
|
class ModelTaskConfig(ConfigBase):
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from rich.traceback import install
|
from rich.traceback import install
|
||||||
@@ -266,7 +267,7 @@ class LLMRequest:
|
|||||||
|
|
||||||
def _select_model(self, exclude_models: Optional[Set[str]] = None) -> Tuple[ModelInfo, APIProvider, BaseClient]:
|
def _select_model(self, exclude_models: Optional[Set[str]] = None) -> Tuple[ModelInfo, APIProvider, BaseClient]:
|
||||||
"""
|
"""
|
||||||
根据总tokens和惩罚值选择的模型
|
根据配置的策略选择模型:balance(负载均衡)或 random(随机选择)
|
||||||
"""
|
"""
|
||||||
available_models = {
|
available_models = {
|
||||||
model: scores
|
model: scores
|
||||||
@@ -276,15 +277,30 @@ class LLMRequest:
|
|||||||
if not available_models:
|
if not available_models:
|
||||||
raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。")
|
raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。")
|
||||||
|
|
||||||
least_used_model_name = min(
|
strategy = self.model_for_task.selection_strategy.lower()
|
||||||
available_models,
|
|
||||||
key=lambda k: available_models[k][0] + available_models[k][1] * 300 + available_models[k][2] * 1000,
|
if strategy == "random":
|
||||||
)
|
# 随机选择策略
|
||||||
model_info = model_config.get_model_info(least_used_model_name)
|
selected_model_name = random.choice(list(available_models.keys()))
|
||||||
|
elif strategy == "balance":
|
||||||
|
# 负载均衡策略:根据总tokens和惩罚值选择
|
||||||
|
selected_model_name = min(
|
||||||
|
available_models,
|
||||||
|
key=lambda k: available_models[k][0] + available_models[k][1] * 300 + available_models[k][2] * 1000,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# 默认使用负载均衡策略
|
||||||
|
logger.warning(f"未知的选择策略 '{strategy}',使用默认的负载均衡策略")
|
||||||
|
selected_model_name = min(
|
||||||
|
available_models,
|
||||||
|
key=lambda k: available_models[k][0] + available_models[k][1] * 300 + available_models[k][2] * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
model_info = model_config.get_model_info(selected_model_name)
|
||||||
api_provider = model_config.get_provider(model_info.api_provider)
|
api_provider = model_config.get_provider(model_info.api_provider)
|
||||||
force_new_client = self.request_type == "embedding"
|
force_new_client = self.request_type == "embedding"
|
||||||
client = client_registry.get_client_class_instance(api_provider, force_new=force_new_client)
|
client = client_registry.get_client_class_instance(api_provider, force_new=force_new_client)
|
||||||
logger.debug(f"选择请求模型: {model_info.name}")
|
logger.debug(f"选择请求模型: {model_info.name} (策略: {strategy})")
|
||||||
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
||||||
self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty + 1)
|
self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty + 1)
|
||||||
return model_info, api_provider, client
|
return model_info, api_provider, client
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[inner]
|
[inner]
|
||||||
version = "1.10.1"
|
version = "1.11.0"
|
||||||
|
|
||||||
# 配置文件版本号迭代规则同bot_config.toml
|
# 配置文件版本号迭代规则同bot_config.toml
|
||||||
|
|
||||||
@@ -140,38 +140,45 @@ model_list = ["siliconflow-deepseek-v3.2"] # 使用的模型列表,每个子
|
|||||||
temperature = 0.2 # 模型温度,新V3建议0.1-0.3
|
temperature = 0.2 # 模型温度,新V3建议0.1-0.3
|
||||||
max_tokens = 4096 # 最大输出token数
|
max_tokens = 4096 # 最大输出token数
|
||||||
slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志
|
slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
[model_task_config.tool_use] #功能模型,需要使用支持工具调用的模型,请使用较快的小模型(调用量较大)
|
[model_task_config.tool_use] #功能模型,需要使用支持工具调用的模型,请使用较快的小模型(调用量较大)
|
||||||
model_list = ["qwen3-30b","qwen3-next-80b"]
|
model_list = ["qwen3-30b","qwen3-next-80b"]
|
||||||
temperature = 0.7
|
temperature = 0.7
|
||||||
max_tokens = 1024
|
max_tokens = 1024
|
||||||
slow_threshold = 10.0
|
slow_threshold = 10.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
[model_task_config.replyer] # 首要回复模型,还用于表达方式学习
|
[model_task_config.replyer] # 首要回复模型,还用于表达方式学习
|
||||||
model_list = ["siliconflow-deepseek-v3.2","siliconflow-deepseek-v3.2-think","siliconflow-glm-4.6","siliconflow-glm-4.6-think"]
|
model_list = ["siliconflow-deepseek-v3.2","siliconflow-deepseek-v3.2-think","siliconflow-glm-4.6","siliconflow-glm-4.6-think"]
|
||||||
temperature = 0.3 # 模型温度,新V3建议0.1-0.3
|
temperature = 0.3 # 模型温度,新V3建议0.1-0.3
|
||||||
max_tokens = 2048
|
max_tokens = 2048
|
||||||
slow_threshold = 25.0
|
slow_threshold = 25.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
[model_task_config.planner] #决策:负责决定麦麦该什么时候回复的模型
|
[model_task_config.planner] #决策:负责决定麦麦该什么时候回复的模型
|
||||||
model_list = ["siliconflow-deepseek-v3.2"]
|
model_list = ["siliconflow-deepseek-v3.2"]
|
||||||
temperature = 0.3
|
temperature = 0.3
|
||||||
max_tokens = 800
|
max_tokens = 800
|
||||||
slow_threshold = 12.0
|
slow_threshold = 12.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
[model_task_config.vlm] # 图像识别模型
|
[model_task_config.vlm] # 图像识别模型
|
||||||
model_list = ["qwen3-vl-30"]
|
model_list = ["qwen3-vl-30"]
|
||||||
max_tokens = 256
|
max_tokens = 256
|
||||||
slow_threshold = 15.0
|
slow_threshold = 15.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
[model_task_config.voice] # 语音识别模型
|
[model_task_config.voice] # 语音识别模型
|
||||||
model_list = ["sensevoice-small"]
|
model_list = ["sensevoice-small"]
|
||||||
slow_threshold = 12.0
|
slow_threshold = 12.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
# 嵌入模型
|
# 嵌入模型
|
||||||
[model_task_config.embedding]
|
[model_task_config.embedding]
|
||||||
model_list = ["bge-m3"]
|
model_list = ["bge-m3"]
|
||||||
slow_threshold = 5.0
|
slow_threshold = 5.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
# ------------LPMM知识库模型------------
|
# ------------LPMM知识库模型------------
|
||||||
|
|
||||||
@@ -180,9 +187,11 @@ model_list = ["siliconflow-deepseek-v3.2"]
|
|||||||
temperature = 0.2
|
temperature = 0.2
|
||||||
max_tokens = 800
|
max_tokens = 800
|
||||||
slow_threshold = 20.0
|
slow_threshold = 20.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
|
|
||||||
[model_task_config.lpmm_rdf_build] # RDF构建模型
|
[model_task_config.lpmm_rdf_build] # RDF构建模型
|
||||||
model_list = ["siliconflow-deepseek-v3.2"]
|
model_list = ["siliconflow-deepseek-v3.2"]
|
||||||
temperature = 0.2
|
temperature = 0.2
|
||||||
max_tokens = 800
|
max_tokens = 800
|
||||||
slow_threshold = 20.0
|
slow_threshold = 20.0
|
||||||
|
selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择)
|
||||||
Reference in New Issue
Block a user