This commit is contained in:
墨梓柒
2025-11-13 13:24:55 +08:00
parent e78a070fbd
commit 7839acd25d
52 changed files with 1322 additions and 1408 deletions

View File

@@ -250,7 +250,7 @@ def _build_stream_api_resp(
if fr:
reason = str(fr)
break
if str(reason).endswith("MAX_TOKENS"):
has_visible_output = bool(resp.content and resp.content.strip())
if has_visible_output:
@@ -281,8 +281,8 @@ async def _default_stream_response_handler(
_tool_calls_buffer: list[tuple[str, str, dict]] = [] # 工具调用缓冲区,用于存储接收到的工具调用
_usage_record = None # 使用情况记录
last_resp: GenerateContentResponse | None = None # 保存最后一个 chunk
resp = APIResponse()
resp = APIResponse()
def _insure_buffer_closed():
if _fc_delta_buffer and not _fc_delta_buffer.closed:
_fc_delta_buffer.close()
@@ -298,7 +298,7 @@ async def _default_stream_response_handler(
chunk,
_fc_delta_buffer,
_tool_calls_buffer,
resp=resp,
resp=resp,
)
if chunk.usage_metadata:
@@ -314,7 +314,7 @@ async def _default_stream_response_handler(
_fc_delta_buffer,
_tool_calls_buffer,
last_resp=last_resp,
resp=resp,
resp=resp,
), _usage_record
except Exception:
# 确保缓冲区被关闭

View File

@@ -239,7 +239,7 @@ def _build_stream_api_resp(
# 检查 max_tokens 截断(流式的告警改由处理函数统一输出,这里不再输出)
# 保留 finish_reason 仅用于上层判断
if not resp.content and not resp.tool_calls:
raise EmptyResponseException()
@@ -293,7 +293,7 @@ async def _default_stream_response_handler(
if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason:
finish_reason = event.choices[0].finish_reason
if hasattr(event, "model") and event.model and not _model_name:
_model_name = event.model # 记录模型名
@@ -341,10 +341,7 @@ async def _default_stream_response_handler(
model_dbg = None
# 统一日志格式
logger.info(
"模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整"
% (model_dbg or "")
)
logger.info("模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整" % (model_dbg or ""))
return resp, _usage_record
except Exception:
@@ -387,9 +384,7 @@ def _default_normal_response_parser(
raw_snippet = str(resp)[:300]
except Exception:
raw_snippet = "<unserializable>"
logger.debug(
f"empty choices: model={model_dbg} id={id_dbg} usage={usage_dbg} raw≈{raw_snippet}"
)
logger.debug(f"empty choices: model={model_dbg} id={id_dbg} usage={usage_dbg} raw≈{raw_snippet}")
except Exception:
# 日志采集失败不应影响控制流
pass
@@ -447,14 +442,11 @@ def _default_normal_response_parser(
# print(resp)
_model_name = resp.model
# 统一日志格式
logger.info(
"模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整"
% (_model_name or "")
)
logger.info("模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整" % (_model_name or ""))
return api_response, _usage_record
except Exception as e:
logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}")
if not api_response.content and not api_response.tool_calls:
raise EmptyResponseException()

View File

@@ -277,9 +277,7 @@ class LLMRequest:
logger.error(f"模型 '{model_info.name}' 在多次出现空回复后仍然失败。")
raise ModelAttemptFailed(f"模型 '{model_info.name}' 重试耗尽", original_exception=e) from e
logger.warning(
f"模型 '{model_info.name}' 返回空回复(可重试)。剩余重试次数: {retry_remain}"
)
logger.warning(f"模型 '{model_info.name}' 返回空回复(可重试)。剩余重试次数: {retry_remain}")
await asyncio.sleep(api_provider.retry_interval)
except NetworkConnectionError as e:
@@ -289,9 +287,7 @@ class LLMRequest:
logger.error(f"模型 '{model_info.name}' 在网络错误重试用尽后仍然失败。")
raise ModelAttemptFailed(f"模型 '{model_info.name}' 重试耗尽", original_exception=e) from e
logger.warning(
f"模型 '{model_info.name}' 遇到网络错误(可重试): {str(e)}。剩余重试次数: {retry_remain}"
)
logger.warning(f"模型 '{model_info.name}' 遇到网络错误(可重试): {str(e)}。剩余重试次数: {retry_remain}")
await asyncio.sleep(api_provider.retry_interval)
except RespNotOkException as e: