fix(i18n): 修复 PROMPT_EXTENSIONS 元组声明、消除重复代码、优化锁策略

- fix: PROMPT_EXTENSIONS = (".prompt") 是字符串非元组,改为 (".prompt",)
- refactor: 将 extract_placeholders/format_template 统一到 loaders.py,
  消除 formatting.py、prompt_i18n.py、i18n_validate.py 三处重复
- perf: _get_catalog 和 load_prompt 改为双重检查锁定,I/O 不再阻塞其他线程
- perf: _log_once 使用独立 _warning_lock,不再与 _cache_lock 竞争
- fix: _scan_legacy_prompt_directory 添加 prompts_root 参数,修正 relative_to 语义
- refactor: 合并 _supported_prompt_files 两个变体为单函数 + recursive 参数
- docs: i18n.md 强化 repository-specific 校验策略标注,修正时间表述冗余
- fix: 验证脚本错误消息移除 Crowdin 暗示,标注为仓库级校验策略

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
春河晴
2026-03-13 01:25:29 +09:00
parent 8f7f31a164
commit 55eb911dd3
6 changed files with 65 additions and 85 deletions

View File

@@ -87,7 +87,7 @@ Prompt 加载规则:
对于非 `zh-CN` 的目标 locale
- 下面这两条是当前仓库的额外校验策略,不是 Crowdin 默认行为。
- 下面这两条是仓库的 repository-specific 校验策略,不是 Crowdin 默认行为。
- 不要手工把中文 source 文案直接复制进目标语言文件后提交。
- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。
@@ -130,7 +130,7 @@ python scripts/i18n_extract_candidates.py
项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。
现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source并下载当时 Crowdin 中当前可用的翻译结果。
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source并下载当时 Crowdin 中可用的翻译结果。
常用命令:

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
from pathlib import Path
from string import Formatter
import re
import sys
@@ -17,25 +16,16 @@ from src.common.i18n.loaders import ( # noqa: E402
get_locales_root,
load_locale_catalog,
)
from src.common.i18n.loaders import extract_placeholders # noqa: E402
from src.common.prompt_i18n import ( # noqa: E402
PROMPT_EXTENSIONS,
extract_prompt_placeholders,
get_prompts_root,
)
FORMATTER = Formatter()
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
def extract_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def contains_han_characters(text: str) -> bool:
return HAN_CHARACTER_PATTERN.search(text) is not None
@@ -65,7 +55,7 @@ def validate_locale_content(
source_text == target_text and contains_han_characters(source_text)
for source_text, target_text in zip(source_texts, target_texts, strict=False)
):
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译")
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译")
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
@@ -218,6 +208,16 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
return errors, warnings
def _print_warnings(warnings: list[str]) -> None:
if not warnings:
return
print(f"warnings ({len(warnings)}):")
for warning in warnings[:10]:
print(f" - {warning}")
if len(warnings) > 10:
print(f" - ... 另外还有 {len(warnings) - 10} 条 warning")
def main() -> int:
errors = validate_json_locales()
prompt_errors, prompt_warnings = validate_prompt_templates()
@@ -227,21 +227,11 @@ def main() -> int:
print("i18n validation failed:")
for error in errors:
print(f" - {error}")
if prompt_warnings:
print(f"warnings ({len(prompt_warnings)}):")
for warning in prompt_warnings[:10]:
print(f" - {warning}")
if len(prompt_warnings) > 10:
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
_print_warnings(prompt_warnings)
return 1
print("i18n validation passed.")
if prompt_warnings:
print(f"warnings ({len(prompt_warnings)}):")
for warning in prompt_warnings[:10]:
print(f" - {warning}")
if len(prompt_warnings) > 10:
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
_print_warnings(prompt_warnings)
return 0

View File

@@ -2,28 +2,14 @@ from __future__ import annotations
from datetime import date, datetime, time
from decimal import Decimal
from string import Formatter
from babel import Locale
from babel.dates import format_datetime as babel_format_datetime
from babel.numbers import format_decimal as babel_format_decimal
from .loaders import DEFAULT_LOCALE, to_babel_locale
from .loaders import DEFAULT_LOCALE, extract_placeholders, format_template, to_babel_locale
FORMATTER = Formatter()
def extract_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def format_template(template: str, **kwargs: object) -> str:
return template.format(**kwargs)
__all__ = ["extract_placeholders", "format_template"]
def select_plural_category(locale: str, count: int | float | Decimal) -> str:

View File

@@ -1,9 +1,12 @@
from __future__ import annotations
from pathlib import Path
from string import Formatter
import json
_FORMATTER = Formatter()
from .exceptions import (
DuplicateTranslationKeyError,
InvalidLocaleError,
@@ -122,3 +125,16 @@ def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[s
)
merged_translations[key] = value
return merged_translations
def extract_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in _FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def format_template(template: str, **kwargs: object) -> str:
return template.format(**kwargs)

View File

@@ -26,6 +26,7 @@ class I18nManager:
self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None)
self._warning_cache: set[tuple[str, str, str]] = set()
self._cache_lock = threading.RLock()
self._warning_lock = threading.Lock()
def set_locale(self, locale: str) -> str:
self._default_locale = normalize_locale(locale)
@@ -175,23 +176,26 @@ class I18nManager:
if normalized_locale in self._catalog_cache:
return self._catalog_cache[normalized_locale]
try:
catalog = load_locale_catalog(normalized_locale, self._locales_root)
except I18nError as exc:
self._log_once(
("load_failed", normalized_locale, exc.__class__.__name__),
logging.WARNING,
"加载 locale '%s' 失败: %s",
normalized_locale,
exc,
)
catalog = {}
try:
catalog = load_locale_catalog(normalized_locale, self._locales_root)
except I18nError as exc:
self._log_once(
("load_failed", normalized_locale, exc.__class__.__name__),
logging.WARNING,
"加载 locale '%s' 失败: %s",
normalized_locale,
exc,
)
catalog = {}
with self._cache_lock:
if normalized_locale in self._catalog_cache:
return self._catalog_cache[normalized_locale]
self._catalog_cache[normalized_locale] = catalog
return catalog
def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None:
with self._cache_lock:
with self._warning_lock:
if cache_key in self._warning_cache:
return
self._warning_cache.add(cache_key)

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
from pathlib import Path
from string import Formatter
import logging
import os
@@ -9,14 +8,13 @@ import re
import threading
from .i18n import get_locale, t
from .i18n.loaders import DEFAULT_LOCALE, normalize_locale
from .i18n.loaders import DEFAULT_LOCALE, extract_placeholders as extract_prompt_placeholders, normalize_locale
logger = logging.getLogger("maibot.prompt_i18n")
PROJECT_ROOT = Path(__file__).resolve().parents[2]
PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
PROMPT_EXTENSIONS = (".prompt")
FORMATTER = Formatter()
PROMPT_EXTENSIONS = (".prompt",)
SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")
@@ -24,15 +22,6 @@ _prompt_cache: dict[Path, str] = {}
_cache_lock = threading.RLock()
def extract_prompt_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def get_prompts_root(prompts_root: Path | None = None) -> Path:
return (prompts_root or PROMPTS_ROOT).resolve()
@@ -70,17 +59,11 @@ def is_strict_prompt_i18n_mode() -> bool:
return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)
def _supported_prompt_files(directory: Path) -> list[Path]:
def _supported_prompt_files(directory: Path, recursive: bool = True) -> list[Path]:
search = directory.rglob if recursive else directory.glob
matched_files: list[Path] = []
for suffix in PROMPT_EXTENSIONS:
matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file())
return sorted(set(matched_files))
def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]:
matched_files: list[Path] = []
for suffix in PROMPT_EXTENSIONS:
matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file())
matched_files.extend(path for path in search(f"*{suffix}") if path.is_file())
return sorted(set(matched_files))
@@ -104,20 +87,20 @@ def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Pat
return prompt_paths
def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]:
def _scan_legacy_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]:
prompt_paths: dict[str, Path] = {}
if not directory.exists():
return prompt_paths
for prompt_path in _supported_prompt_files_non_recursive(directory):
for prompt_path in _supported_prompt_files(directory, recursive=False):
prompt_name = prompt_path.stem
if prompt_name in prompt_paths:
raise ValueError(
t(
"prompt.duplicate_template_name",
name=prompt_name,
path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)),
path_b=prompt_path.relative_to(get_prompts_root(directory)),
path_a=prompt_paths[prompt_name].relative_to(prompts_root),
path_b=prompt_path.relative_to(prompts_root),
)
)
prompt_paths[prompt_name] = prompt_path
@@ -128,7 +111,7 @@ def list_prompt_templates(locale: str | None = None, prompts_root: Path | None =
resolved_prompts_root = get_prompts_root(prompts_root)
requested_locale = normalize_locale(locale or get_locale())
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root)
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root, resolved_prompts_root)
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))
if requested_locale != DEFAULT_LOCALE:
@@ -176,10 +159,11 @@ def load_prompt(
prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root)
with _cache_lock:
template = _prompt_cache.get(prompt_path)
if template is None:
with open(prompt_path, "r", encoding="utf-8") as prompt_file:
template = prompt_file.read()
_prompt_cache[prompt_path] = template
if template is None:
template = prompt_path.read_text(encoding="utf-8")
with _cache_lock:
_prompt_cache.setdefault(prompt_path, template)
template = _prompt_cache[prompt_path]
if not kwargs:
return template