fix(i18n): 修复 PROMPT_EXTENSIONS 元组声明、消除重复代码、优化锁策略

- fix: PROMPT_EXTENSIONS = (".prompt") 是字符串非元组,改为 (".prompt",)
- refactor: 将 extract_placeholders/format_template 统一到 loaders.py,
  消除 formatting.py、prompt_i18n.py、i18n_validate.py 三处重复
- perf: _get_catalog 和 load_prompt 改为双重检查锁定,I/O 不再阻塞其他线程
- perf: _log_once 使用独立 _warning_lock,不再与 _cache_lock 竞争
- fix: _scan_legacy_prompt_directory 添加 prompts_root 参数,修正 relative_to 语义
- refactor: 合并 _supported_prompt_files 两个变体为单函数 + recursive 参数
- docs: i18n.md 强化 repository-specific 校验策略标注,修正时间表述冗余
- fix: 验证脚本错误消息移除 Crowdin 暗示,标注为仓库级校验策略

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
春河晴
2026-03-13 01:25:29 +09:00
parent 8f7f31a164
commit 55eb911dd3
6 changed files with 65 additions and 85 deletions

View File

@@ -87,7 +87,7 @@ Prompt 加载规则:
对于非 `zh-CN` 的目标 locale 对于非 `zh-CN` 的目标 locale
- 下面这两条是当前仓库的额外校验策略,不是 Crowdin 默认行为。 - 下面这两条是仓库的 repository-specific 校验策略,不是 Crowdin 默认行为。
- 不要手工把中文 source 文案直接复制进目标语言文件后提交。 - 不要手工把中文 source 文案直接复制进目标语言文件后提交。
- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。 - 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。
@@ -130,7 +130,7 @@ python scripts/i18n_extract_candidates.py
项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。 项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。
现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。 现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source并下载当时 Crowdin 中当前可用的翻译结果。 GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source并下载当时 Crowdin 中可用的翻译结果。
常用命令: 常用命令:

View File

@@ -1,7 +1,6 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from string import Formatter
import re import re
import sys import sys
@@ -17,25 +16,16 @@ from src.common.i18n.loaders import ( # noqa: E402
get_locales_root, get_locales_root,
load_locale_catalog, load_locale_catalog,
) )
from src.common.i18n.loaders import extract_placeholders # noqa: E402
from src.common.prompt_i18n import ( # noqa: E402 from src.common.prompt_i18n import ( # noqa: E402
PROMPT_EXTENSIONS, PROMPT_EXTENSIONS,
extract_prompt_placeholders, extract_prompt_placeholders,
get_prompts_root, get_prompts_root,
) )
FORMATTER = Formatter()
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]") HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
def extract_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def contains_han_characters(text: str) -> bool: def contains_han_characters(text: str) -> bool:
return HAN_CHARACTER_PATTERN.search(text) is not None return HAN_CHARACTER_PATTERN.search(text) is not None
@@ -65,7 +55,7 @@ def validate_locale_content(
source_text == target_text and contains_han_characters(source_text) source_text == target_text and contains_han_characters(source_text)
for source_text, target_text in zip(source_texts, target_texts, strict=False) for source_text, target_text in zip(source_texts, target_texts, strict=False)
): ):
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译") errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译")
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts): if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交") errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
@@ -218,6 +208,16 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
return errors, warnings return errors, warnings
def _print_warnings(warnings: list[str]) -> None:
if not warnings:
return
print(f"warnings ({len(warnings)}):")
for warning in warnings[:10]:
print(f" - {warning}")
if len(warnings) > 10:
print(f" - ... 另外还有 {len(warnings) - 10} 条 warning")
def main() -> int: def main() -> int:
errors = validate_json_locales() errors = validate_json_locales()
prompt_errors, prompt_warnings = validate_prompt_templates() prompt_errors, prompt_warnings = validate_prompt_templates()
@@ -227,21 +227,11 @@ def main() -> int:
print("i18n validation failed:") print("i18n validation failed:")
for error in errors: for error in errors:
print(f" - {error}") print(f" - {error}")
if prompt_warnings: _print_warnings(prompt_warnings)
print(f"warnings ({len(prompt_warnings)}):")
for warning in prompt_warnings[:10]:
print(f" - {warning}")
if len(prompt_warnings) > 10:
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
return 1 return 1
print("i18n validation passed.") print("i18n validation passed.")
if prompt_warnings: _print_warnings(prompt_warnings)
print(f"warnings ({len(prompt_warnings)}):")
for warning in prompt_warnings[:10]:
print(f" - {warning}")
if len(prompt_warnings) > 10:
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
return 0 return 0

View File

@@ -2,28 +2,14 @@ from __future__ import annotations
from datetime import date, datetime, time from datetime import date, datetime, time
from decimal import Decimal from decimal import Decimal
from string import Formatter
from babel import Locale from babel import Locale
from babel.dates import format_datetime as babel_format_datetime from babel.dates import format_datetime as babel_format_datetime
from babel.numbers import format_decimal as babel_format_decimal from babel.numbers import format_decimal as babel_format_decimal
from .loaders import DEFAULT_LOCALE, to_babel_locale from .loaders import DEFAULT_LOCALE, extract_placeholders, format_template, to_babel_locale
FORMATTER = Formatter() __all__ = ["extract_placeholders", "format_template"]
def extract_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def format_template(template: str, **kwargs: object) -> str:
return template.format(**kwargs)
def select_plural_category(locale: str, count: int | float | Decimal) -> str: def select_plural_category(locale: str, count: int | float | Decimal) -> str:

View File

@@ -1,9 +1,12 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from string import Formatter
import json import json
_FORMATTER = Formatter()
from .exceptions import ( from .exceptions import (
DuplicateTranslationKeyError, DuplicateTranslationKeyError,
InvalidLocaleError, InvalidLocaleError,
@@ -122,3 +125,16 @@ def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[s
) )
merged_translations[key] = value merged_translations[key] = value
return merged_translations return merged_translations
def extract_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in _FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def format_template(template: str, **kwargs: object) -> str:
return template.format(**kwargs)

View File

@@ -26,6 +26,7 @@ class I18nManager:
self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None) self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None)
self._warning_cache: set[tuple[str, str, str]] = set() self._warning_cache: set[tuple[str, str, str]] = set()
self._cache_lock = threading.RLock() self._cache_lock = threading.RLock()
self._warning_lock = threading.Lock()
def set_locale(self, locale: str) -> str: def set_locale(self, locale: str) -> str:
self._default_locale = normalize_locale(locale) self._default_locale = normalize_locale(locale)
@@ -187,11 +188,14 @@ class I18nManager:
) )
catalog = {} catalog = {}
with self._cache_lock:
if normalized_locale in self._catalog_cache:
return self._catalog_cache[normalized_locale]
self._catalog_cache[normalized_locale] = catalog self._catalog_cache[normalized_locale] = catalog
return catalog return catalog
def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None: def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None:
with self._cache_lock: with self._warning_lock:
if cache_key in self._warning_cache: if cache_key in self._warning_cache:
return return
self._warning_cache.add(cache_key) self._warning_cache.add(cache_key)

View File

@@ -1,7 +1,6 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from string import Formatter
import logging import logging
import os import os
@@ -9,14 +8,13 @@ import re
import threading import threading
from .i18n import get_locale, t from .i18n import get_locale, t
from .i18n.loaders import DEFAULT_LOCALE, normalize_locale from .i18n.loaders import DEFAULT_LOCALE, extract_placeholders as extract_prompt_placeholders, normalize_locale
logger = logging.getLogger("maibot.prompt_i18n") logger = logging.getLogger("maibot.prompt_i18n")
PROJECT_ROOT = Path(__file__).resolve().parents[2] PROJECT_ROOT = Path(__file__).resolve().parents[2]
PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve() PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
PROMPT_EXTENSIONS = (".prompt") PROMPT_EXTENSIONS = (".prompt",)
FORMATTER = Formatter()
SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$") SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT") STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")
@@ -24,15 +22,6 @@ _prompt_cache: dict[Path, str] = {}
_cache_lock = threading.RLock() _cache_lock = threading.RLock()
def extract_prompt_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def get_prompts_root(prompts_root: Path | None = None) -> Path: def get_prompts_root(prompts_root: Path | None = None) -> Path:
return (prompts_root or PROMPTS_ROOT).resolve() return (prompts_root or PROMPTS_ROOT).resolve()
@@ -70,17 +59,11 @@ def is_strict_prompt_i18n_mode() -> bool:
return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS) return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)
def _supported_prompt_files(directory: Path) -> list[Path]: def _supported_prompt_files(directory: Path, recursive: bool = True) -> list[Path]:
search = directory.rglob if recursive else directory.glob
matched_files: list[Path] = [] matched_files: list[Path] = []
for suffix in PROMPT_EXTENSIONS: for suffix in PROMPT_EXTENSIONS:
matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file()) matched_files.extend(path for path in search(f"*{suffix}") if path.is_file())
return sorted(set(matched_files))
def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]:
matched_files: list[Path] = []
for suffix in PROMPT_EXTENSIONS:
matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file())
return sorted(set(matched_files)) return sorted(set(matched_files))
@@ -104,20 +87,20 @@ def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Pat
return prompt_paths return prompt_paths
def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]: def _scan_legacy_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]:
prompt_paths: dict[str, Path] = {} prompt_paths: dict[str, Path] = {}
if not directory.exists(): if not directory.exists():
return prompt_paths return prompt_paths
for prompt_path in _supported_prompt_files_non_recursive(directory): for prompt_path in _supported_prompt_files(directory, recursive=False):
prompt_name = prompt_path.stem prompt_name = prompt_path.stem
if prompt_name in prompt_paths: if prompt_name in prompt_paths:
raise ValueError( raise ValueError(
t( t(
"prompt.duplicate_template_name", "prompt.duplicate_template_name",
name=prompt_name, name=prompt_name,
path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)), path_a=prompt_paths[prompt_name].relative_to(prompts_root),
path_b=prompt_path.relative_to(get_prompts_root(directory)), path_b=prompt_path.relative_to(prompts_root),
) )
) )
prompt_paths[prompt_name] = prompt_path prompt_paths[prompt_name] = prompt_path
@@ -128,7 +111,7 @@ def list_prompt_templates(locale: str | None = None, prompts_root: Path | None =
resolved_prompts_root = get_prompts_root(prompts_root) resolved_prompts_root = get_prompts_root(prompts_root)
requested_locale = normalize_locale(locale or get_locale()) requested_locale = normalize_locale(locale or get_locale())
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root) prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root, resolved_prompts_root)
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root)) prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))
if requested_locale != DEFAULT_LOCALE: if requested_locale != DEFAULT_LOCALE:
@@ -177,9 +160,10 @@ def load_prompt(
with _cache_lock: with _cache_lock:
template = _prompt_cache.get(prompt_path) template = _prompt_cache.get(prompt_path)
if template is None: if template is None:
with open(prompt_path, "r", encoding="utf-8") as prompt_file: template = prompt_path.read_text(encoding="utf-8")
template = prompt_file.read() with _cache_lock:
_prompt_cache[prompt_path] = template _prompt_cache.setdefault(prompt_path, template)
template = _prompt_cache[prompt_path]
if not kwargs: if not kwargs:
return template return template