fix(i18n): 修复 PROMPT_EXTENSIONS 元组声明、消除重复代码、优化锁策略

- fix: PROMPT_EXTENSIONS = (".prompt") 是字符串非元组，改为 (".prompt",) - refactor: 将 extract_placeholders/format_template 统一到 loaders.py，消除 formatting.py、prompt_i18n.py、i18n_validate.py 三处重复 - perf: _get_catalog 和 load_prompt 改为双重检查锁定，I/O 不再阻塞其他线程 - perf: _log_once 使用独立 _warning_lock，不再与 _cache_lock 竞争 - fix: _scan_legacy_prompt_directory 添加 prompts_root 参数，修正 relative_to 语义 - refactor: 合并 _supported_prompt_files 两个变体为单函数 + recursive 参数 - docs: i18n.md 强化 repository-specific 校验策略标注，修正时间表述冗余 - fix: 验证脚本错误消息移除 Crowdin 暗示，标注为仓库级校验策略 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 01:25:29 +09:00
parent 8f7f31a164
commit 55eb911dd3
6 changed files with 65 additions and 85 deletions
--- a/docs/i18n.md
+++ b/docs/i18n.md
@@ -87,7 +87,7 @@ Prompt 加载规则：

 对于非 `zh-CN` 的目标 locale：

- 下面这两条是当前仓库的额外校验策略，不是 Crowdin 默认行为。
+- 下面这两条是本仓库的 repository-specific 校验策略，不是 Crowdin 默认行为。
 - 不要手工把中文 source 文案直接复制进目标语言文件后提交。
 - 英文 locale 文件中不应保留中文字符；这类残留会被校验脚本拦截。

@@ -130,7 +130,7 @@ python scripts/i18n_extract_candidates.py
 项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。
 现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。

-GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source，并下载当时 Crowdin 中当前可用的翻译结果。
+GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source，并下载当时 Crowdin 中可用的翻译结果。

 常用命令：

--- a/scripts/i18n_validate.py
+++ b/scripts/i18n_validate.py
@@ -1,7 +1,6 @@
 from __future__ import annotations

 from pathlib import Path
-from string import Formatter

 import re
 import sys
@@ -17,25 +16,16 @@ from src.common.i18n.loaders import (  # noqa: E402
    get_locales_root,
    load_locale_catalog,
 )
+from src.common.i18n.loaders import extract_placeholders  # noqa: E402
 from src.common.prompt_i18n import (  # noqa: E402
    PROMPT_EXTENSIONS,
    extract_prompt_placeholders,
    get_prompts_root,
 )

-FORMATTER = Formatter()
 HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")


-def extract_placeholders(template: str) -> set[str]:
-    placeholders: set[str] = set()
-    for _, field_name, _, _ in FORMATTER.parse(template):
-        if not field_name:
-            continue
-        placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
-    return placeholders
-
-
 def contains_han_characters(text: str) -> bool:
    return HAN_CHARACTER_PATTERN.search(text) is not None

@@ -65,7 +55,7 @@ def validate_locale_content(
        source_text == target_text and contains_han_characters(source_text)
        for source_text, target_text in zip(source_texts, target_texts, strict=False)
    ):
-        errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案，请通过 Crowdin 提供目标语言翻译")
+        errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案（仓库级校验策略），请提供目标语言翻译")

    if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
        errors.append(f"[{locale}] key '{key}' 仍包含中文字符，请移除源语言残留后再提交")
@@ -218,6 +208,16 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
    return errors, warnings


+def _print_warnings(warnings: list[str]) -> None:
+    if not warnings:
+        return
+    print(f"warnings ({len(warnings)}):")
+    for warning in warnings[:10]:
+        print(f"  - {warning}")
+    if len(warnings) > 10:
+        print(f"  - ... 另外还有 {len(warnings) - 10} 条 warning")
+
+
 def main() -> int:
    errors = validate_json_locales()
    prompt_errors, prompt_warnings = validate_prompt_templates()
@@ -227,21 +227,11 @@ def main() -> int:
        print("i18n validation failed:")
        for error in errors:
            print(f"  - {error}")
-        if prompt_warnings:
-            print(f"warnings ({len(prompt_warnings)}):")
-            for warning in prompt_warnings[:10]:
-                print(f"  - {warning}")
-            if len(prompt_warnings) > 10:
-                print(f"  - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
+        _print_warnings(prompt_warnings)
        return 1

    print("i18n validation passed.")
-    if prompt_warnings:
-        print(f"warnings ({len(prompt_warnings)}):")
-        for warning in prompt_warnings[:10]:
-            print(f"  - {warning}")
-        if len(prompt_warnings) > 10:
-            print(f"  - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
+    _print_warnings(prompt_warnings)
    return 0


--- a/src/common/i18n/formatting.py
+++ b/src/common/i18n/formatting.py
@@ -2,28 +2,14 @@ from __future__ import annotations

 from datetime import date, datetime, time
 from decimal import Decimal
-from string import Formatter

 from babel import Locale
 from babel.dates import format_datetime as babel_format_datetime
 from babel.numbers import format_decimal as babel_format_decimal

-from .loaders import DEFAULT_LOCALE, to_babel_locale
+from .loaders import DEFAULT_LOCALE, extract_placeholders, format_template, to_babel_locale

-FORMATTER = Formatter()
-
-
-def extract_placeholders(template: str) -> set[str]:
-    placeholders: set[str] = set()
-    for _, field_name, _, _ in FORMATTER.parse(template):
-        if not field_name:
-            continue
-        placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
-    return placeholders
-
-
-def format_template(template: str, **kwargs: object) -> str:
-    return template.format(**kwargs)
+__all__ = ["extract_placeholders", "format_template"]


 def select_plural_category(locale: str, count: int | float | Decimal) -> str:
--- a/src/common/i18n/loaders.py
+++ b/src/common/i18n/loaders.py
@@ -1,9 +1,12 @@
 from __future__ import annotations

 from pathlib import Path
+from string import Formatter

 import json

+_FORMATTER = Formatter()
+
 from .exceptions import (
    DuplicateTranslationKeyError,
    InvalidLocaleError,
@@ -122,3 +125,16 @@ def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[s
                )
            merged_translations[key] = value
    return merged_translations
+
+
+def extract_placeholders(template: str) -> set[str]:
+    placeholders: set[str] = set()
+    for _, field_name, _, _ in _FORMATTER.parse(template):
+        if not field_name:
+            continue
+        placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
+    return placeholders
+
+
+def format_template(template: str, **kwargs: object) -> str:
+    return template.format(**kwargs)
--- a/src/common/i18n/manager.py
+++ b/src/common/i18n/manager.py
@@ -26,6 +26,7 @@ class I18nManager:
        self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None)
        self._warning_cache: set[tuple[str, str, str]] = set()
        self._cache_lock = threading.RLock()
+        self._warning_lock = threading.Lock()

    def set_locale(self, locale: str) -> str:
        self._default_locale = normalize_locale(locale)
@@ -175,23 +176,26 @@ class I18nManager:
            if normalized_locale in self._catalog_cache:
                return self._catalog_cache[normalized_locale]

-            try:
-                catalog = load_locale_catalog(normalized_locale, self._locales_root)
-            except I18nError as exc:
-                self._log_once(
-                    ("load_failed", normalized_locale, exc.__class__.__name__),
-                    logging.WARNING,
-                    "加载 locale '%s' 失败: %s",
-                    normalized_locale,
-                    exc,
-                )
-                catalog = {}
+        try:
+            catalog = load_locale_catalog(normalized_locale, self._locales_root)
+        except I18nError as exc:
+            self._log_once(
+                ("load_failed", normalized_locale, exc.__class__.__name__),
+                logging.WARNING,
+                "加载 locale '%s' 失败: %s",
+                normalized_locale,
+                exc,
+            )
+            catalog = {}

+        with self._cache_lock:
+            if normalized_locale in self._catalog_cache:
+                return self._catalog_cache[normalized_locale]
            self._catalog_cache[normalized_locale] = catalog
            return catalog

    def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None:
-        with self._cache_lock:
+        with self._warning_lock:
            if cache_key in self._warning_cache:
                return
            self._warning_cache.add(cache_key)
--- a/src/common/prompt_i18n.py
+++ b/src/common/prompt_i18n.py
@@ -1,7 +1,6 @@
 from __future__ import annotations

 from pathlib import Path
-from string import Formatter

 import logging
 import os
@@ -9,14 +8,13 @@ import re
 import threading

 from .i18n import get_locale, t
-from .i18n.loaders import DEFAULT_LOCALE, normalize_locale
+from .i18n.loaders import DEFAULT_LOCALE, extract_placeholders as extract_prompt_placeholders, normalize_locale

 logger = logging.getLogger("maibot.prompt_i18n")

 PROJECT_ROOT = Path(__file__).resolve().parents[2]
 PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
-PROMPT_EXTENSIONS = (".prompt")
-FORMATTER = Formatter()
+PROMPT_EXTENSIONS = (".prompt",)
 SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
 STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")

@@ -24,15 +22,6 @@ _prompt_cache: dict[Path, str] = {}
 _cache_lock = threading.RLock()


-def extract_prompt_placeholders(template: str) -> set[str]:
-    placeholders: set[str] = set()
-    for _, field_name, _, _ in FORMATTER.parse(template):
-        if not field_name:
-            continue
-        placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
-    return placeholders
-
-
 def get_prompts_root(prompts_root: Path | None = None) -> Path:
    return (prompts_root or PROMPTS_ROOT).resolve()

@@ -70,17 +59,11 @@ def is_strict_prompt_i18n_mode() -> bool:
    return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)


-def _supported_prompt_files(directory: Path) -> list[Path]:
+def _supported_prompt_files(directory: Path, recursive: bool = True) -> list[Path]:
+    search = directory.rglob if recursive else directory.glob
    matched_files: list[Path] = []
    for suffix in PROMPT_EXTENSIONS:
-        matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file())
-    return sorted(set(matched_files))
-
-
-def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]:
-    matched_files: list[Path] = []
-    for suffix in PROMPT_EXTENSIONS:
-        matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file())
+        matched_files.extend(path for path in search(f"*{suffix}") if path.is_file())
    return sorted(set(matched_files))


@@ -104,20 +87,20 @@ def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Pat
    return prompt_paths


-def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]:
+def _scan_legacy_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]:
    prompt_paths: dict[str, Path] = {}
    if not directory.exists():
        return prompt_paths

-    for prompt_path in _supported_prompt_files_non_recursive(directory):
+    for prompt_path in _supported_prompt_files(directory, recursive=False):
        prompt_name = prompt_path.stem
        if prompt_name in prompt_paths:
            raise ValueError(
                t(
                    "prompt.duplicate_template_name",
                    name=prompt_name,
-                    path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)),
-                    path_b=prompt_path.relative_to(get_prompts_root(directory)),
+                    path_a=prompt_paths[prompt_name].relative_to(prompts_root),
+                    path_b=prompt_path.relative_to(prompts_root),
                )
            )
        prompt_paths[prompt_name] = prompt_path
@@ -128,7 +111,7 @@ def list_prompt_templates(locale: str | None = None, prompts_root: Path | None =
    resolved_prompts_root = get_prompts_root(prompts_root)
    requested_locale = normalize_locale(locale or get_locale())

-    prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root)
+    prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root, resolved_prompts_root)
    prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))

    if requested_locale != DEFAULT_LOCALE:
@@ -176,10 +159,11 @@ def load_prompt(
    prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root)
    with _cache_lock:
        template = _prompt_cache.get(prompt_path)
-        if template is None:
-            with open(prompt_path, "r", encoding="utf-8") as prompt_file:
-                template = prompt_file.read()
-            _prompt_cache[prompt_path] = template
+    if template is None:
+        template = prompt_path.read_text(encoding="utf-8")
+        with _cache_lock:
+            _prompt_cache.setdefault(prompt_path, template)
+            template = _prompt_cache[prompt_path]

    if not kwargs:
        return template