diff --git a/docs/i18n.md b/docs/i18n.md index ddec2abe..49819886 100644 --- a/docs/i18n.md +++ b/docs/i18n.md @@ -87,7 +87,7 @@ Prompt 加载规则: 对于非 `zh-CN` 的目标 locale: -- 下面这两条是当前仓库的额外校验策略,不是 Crowdin 默认行为。 +- 下面这两条是本仓库的 repository-specific 校验策略,不是 Crowdin 默认行为。 - 不要手工把中文 source 文案直接复制进目标语言文件后提交。 - 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。 @@ -130,7 +130,7 @@ python scripts/i18n_extract_candidates.py 项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。 现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。 -GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source,并下载当时 Crowdin 中当前可用的翻译结果。 +GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source,并下载当时 Crowdin 中可用的翻译结果。 常用命令: diff --git a/scripts/i18n_validate.py b/scripts/i18n_validate.py index 9b3a4313..62e685c1 100644 --- a/scripts/i18n_validate.py +++ b/scripts/i18n_validate.py @@ -1,7 +1,6 @@ from __future__ import annotations from pathlib import Path -from string import Formatter import re import sys @@ -17,25 +16,16 @@ from src.common.i18n.loaders import ( # noqa: E402 get_locales_root, load_locale_catalog, ) +from src.common.i18n.loaders import extract_placeholders # noqa: E402 from src.common.prompt_i18n import ( # noqa: E402 PROMPT_EXTENSIONS, extract_prompt_placeholders, get_prompts_root, ) -FORMATTER = Formatter() HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]") -def extract_placeholders(template: str) -> set[str]: - placeholders: set[str] = set() - for _, field_name, _, _ in FORMATTER.parse(template): - if not field_name: - continue - placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0]) - return placeholders - - def contains_han_characters(text: str) -> bool: return HAN_CHARACTER_PATTERN.search(text) is not None @@ -65,7 +55,7 @@ def validate_locale_content( source_text == target_text and contains_han_characters(source_text) for source_text, target_text in zip(source_texts, target_texts, strict=False) ): - errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译") + errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译") if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts): errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交") @@ -218,6 +208,16 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st return errors, warnings +def _print_warnings(warnings: list[str]) -> None: + if not warnings: + return + print(f"warnings ({len(warnings)}):") + for warning in warnings[:10]: + print(f" - {warning}") + if len(warnings) > 10: + print(f" - ... 另外还有 {len(warnings) - 10} 条 warning") + + def main() -> int: errors = validate_json_locales() prompt_errors, prompt_warnings = validate_prompt_templates() @@ -227,21 +227,11 @@ def main() -> int: print("i18n validation failed:") for error in errors: print(f" - {error}") - if prompt_warnings: - print(f"warnings ({len(prompt_warnings)}):") - for warning in prompt_warnings[:10]: - print(f" - {warning}") - if len(prompt_warnings) > 10: - print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning") + _print_warnings(prompt_warnings) return 1 print("i18n validation passed.") - if prompt_warnings: - print(f"warnings ({len(prompt_warnings)}):") - for warning in prompt_warnings[:10]: - print(f" - {warning}") - if len(prompt_warnings) > 10: - print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning") + _print_warnings(prompt_warnings) return 0 diff --git a/src/common/i18n/formatting.py b/src/common/i18n/formatting.py index 71114f0c..a1b0fea5 100644 --- a/src/common/i18n/formatting.py +++ b/src/common/i18n/formatting.py @@ -2,28 +2,14 @@ from __future__ import annotations from datetime import date, datetime, time from decimal import Decimal -from string import Formatter from babel import Locale from babel.dates import format_datetime as babel_format_datetime from babel.numbers import format_decimal as babel_format_decimal -from .loaders import DEFAULT_LOCALE, to_babel_locale +from .loaders import DEFAULT_LOCALE, extract_placeholders, format_template, to_babel_locale -FORMATTER = Formatter() - - -def extract_placeholders(template: str) -> set[str]: - placeholders: set[str] = set() - for _, field_name, _, _ in FORMATTER.parse(template): - if not field_name: - continue - placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0]) - return placeholders - - -def format_template(template: str, **kwargs: object) -> str: - return template.format(**kwargs) +__all__ = ["extract_placeholders", "format_template"] def select_plural_category(locale: str, count: int | float | Decimal) -> str: diff --git a/src/common/i18n/loaders.py b/src/common/i18n/loaders.py index a205a44a..0710f7b5 100644 --- a/src/common/i18n/loaders.py +++ b/src/common/i18n/loaders.py @@ -1,9 +1,12 @@ from __future__ import annotations from pathlib import Path +from string import Formatter import json +_FORMATTER = Formatter() + from .exceptions import ( DuplicateTranslationKeyError, InvalidLocaleError, @@ -122,3 +125,16 @@ def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[s ) merged_translations[key] = value return merged_translations + + +def extract_placeholders(template: str) -> set[str]: + placeholders: set[str] = set() + for _, field_name, _, _ in _FORMATTER.parse(template): + if not field_name: + continue + placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0]) + return placeholders + + +def format_template(template: str, **kwargs: object) -> str: + return template.format(**kwargs) diff --git a/src/common/i18n/manager.py b/src/common/i18n/manager.py index 2b943677..c206a471 100644 --- a/src/common/i18n/manager.py +++ b/src/common/i18n/manager.py @@ -26,6 +26,7 @@ class I18nManager: self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None) self._warning_cache: set[tuple[str, str, str]] = set() self._cache_lock = threading.RLock() + self._warning_lock = threading.Lock() def set_locale(self, locale: str) -> str: self._default_locale = normalize_locale(locale) @@ -175,23 +176,26 @@ class I18nManager: if normalized_locale in self._catalog_cache: return self._catalog_cache[normalized_locale] - try: - catalog = load_locale_catalog(normalized_locale, self._locales_root) - except I18nError as exc: - self._log_once( - ("load_failed", normalized_locale, exc.__class__.__name__), - logging.WARNING, - "加载 locale '%s' 失败: %s", - normalized_locale, - exc, - ) - catalog = {} + try: + catalog = load_locale_catalog(normalized_locale, self._locales_root) + except I18nError as exc: + self._log_once( + ("load_failed", normalized_locale, exc.__class__.__name__), + logging.WARNING, + "加载 locale '%s' 失败: %s", + normalized_locale, + exc, + ) + catalog = {} + with self._cache_lock: + if normalized_locale in self._catalog_cache: + return self._catalog_cache[normalized_locale] self._catalog_cache[normalized_locale] = catalog return catalog def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None: - with self._cache_lock: + with self._warning_lock: if cache_key in self._warning_cache: return self._warning_cache.add(cache_key) diff --git a/src/common/prompt_i18n.py b/src/common/prompt_i18n.py index bdaa54d7..7ada030e 100644 --- a/src/common/prompt_i18n.py +++ b/src/common/prompt_i18n.py @@ -1,7 +1,6 @@ from __future__ import annotations from pathlib import Path -from string import Formatter import logging import os @@ -9,14 +8,13 @@ import re import threading from .i18n import get_locale, t -from .i18n.loaders import DEFAULT_LOCALE, normalize_locale +from .i18n.loaders import DEFAULT_LOCALE, extract_placeholders as extract_prompt_placeholders, normalize_locale logger = logging.getLogger("maibot.prompt_i18n") PROJECT_ROOT = Path(__file__).resolve().parents[2] PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve() -PROMPT_EXTENSIONS = (".prompt") -FORMATTER = Formatter() +PROMPT_EXTENSIONS = (".prompt",) SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$") STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT") @@ -24,15 +22,6 @@ _prompt_cache: dict[Path, str] = {} _cache_lock = threading.RLock() -def extract_prompt_placeholders(template: str) -> set[str]: - placeholders: set[str] = set() - for _, field_name, _, _ in FORMATTER.parse(template): - if not field_name: - continue - placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0]) - return placeholders - - def get_prompts_root(prompts_root: Path | None = None) -> Path: return (prompts_root or PROMPTS_ROOT).resolve() @@ -70,17 +59,11 @@ def is_strict_prompt_i18n_mode() -> bool: return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS) -def _supported_prompt_files(directory: Path) -> list[Path]: +def _supported_prompt_files(directory: Path, recursive: bool = True) -> list[Path]: + search = directory.rglob if recursive else directory.glob matched_files: list[Path] = [] for suffix in PROMPT_EXTENSIONS: - matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file()) - return sorted(set(matched_files)) - - -def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]: - matched_files: list[Path] = [] - for suffix in PROMPT_EXTENSIONS: - matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file()) + matched_files.extend(path for path in search(f"*{suffix}") if path.is_file()) return sorted(set(matched_files)) @@ -104,20 +87,20 @@ def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Pat return prompt_paths -def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]: +def _scan_legacy_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]: prompt_paths: dict[str, Path] = {} if not directory.exists(): return prompt_paths - for prompt_path in _supported_prompt_files_non_recursive(directory): + for prompt_path in _supported_prompt_files(directory, recursive=False): prompt_name = prompt_path.stem if prompt_name in prompt_paths: raise ValueError( t( "prompt.duplicate_template_name", name=prompt_name, - path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)), - path_b=prompt_path.relative_to(get_prompts_root(directory)), + path_a=prompt_paths[prompt_name].relative_to(prompts_root), + path_b=prompt_path.relative_to(prompts_root), ) ) prompt_paths[prompt_name] = prompt_path @@ -128,7 +111,7 @@ def list_prompt_templates(locale: str | None = None, prompts_root: Path | None = resolved_prompts_root = get_prompts_root(prompts_root) requested_locale = normalize_locale(locale or get_locale()) - prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root) + prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root, resolved_prompts_root) prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root)) if requested_locale != DEFAULT_LOCALE: @@ -176,10 +159,11 @@ def load_prompt( prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root) with _cache_lock: template = _prompt_cache.get(prompt_path) - if template is None: - with open(prompt_path, "r", encoding="utf-8") as prompt_file: - template = prompt_file.read() - _prompt_cache[prompt_path] = template + if template is None: + template = prompt_path.read_text(encoding="utf-8") + with _cache_lock: + _prompt_cache.setdefault(prompt_path, template) + template = _prompt_cache[prompt_path] if not kwargs: return template