feat:支持模型缓存和相关配置

This commit is contained in:
SengokuCola
2026-04-25 13:53:30 +08:00
parent 4b1bc2aba8
commit 9759018a0c
11 changed files with 195 additions and 5 deletions

View File

@@ -248,6 +248,33 @@ class MaisakaChatLoopService:
except (TypeError, ValueError):
return default
@staticmethod
def _log_prompt_cache_usage(
*,
request_kind: str,
prompt_tokens: int,
prompt_cache_hit_tokens: int,
prompt_cache_miss_tokens: int,
) -> None:
"""记录模型 KV cache 命中情况。"""
if prompt_cache_miss_tokens == 0 and prompt_cache_hit_tokens > 0:
prompt_cache_miss_tokens = max(prompt_tokens - prompt_cache_hit_tokens, 0)
prompt_cache_total_tokens = prompt_cache_hit_tokens + prompt_cache_miss_tokens
prompt_cache_hit_rate = (
prompt_cache_hit_tokens / prompt_cache_total_tokens * 100
if prompt_cache_total_tokens > 0
else 0
)
logger.info(
"Maisaka KV cache usage - "
f"request_kind={request_kind}, "
f"hit_tokens={prompt_cache_hit_tokens}, "
f"miss_tokens={prompt_cache_miss_tokens}, "
f"hit_rate={prompt_cache_hit_rate:.2f}%, "
f"prompt_tokens={prompt_tokens}"
)
def _build_personality_prompt(self) -> str:
"""构造人格提示词。"""
@@ -554,6 +581,12 @@ class MaisakaChatLoopService:
interrupt_flag=self._interrupt_flag,
),
)
self._log_prompt_cache_usage(
request_kind=request_kind,
prompt_tokens=generation_result.prompt_tokens,
prompt_cache_hit_tokens=getattr(generation_result, "prompt_cache_hit_tokens", 0) or 0,
prompt_cache_miss_tokens=getattr(generation_result, "prompt_cache_miss_tokens", 0) or 0,
)
final_response = generation_result.response or ""
final_tool_calls = list(generation_result.tool_calls or [])