fix(i18n): 修复 PROMPT_EXTENSIONS 元组声明、消除重复代码、优化锁策略
- fix: PROMPT_EXTENSIONS = (".prompt") 是字符串非元组,改为 (".prompt",)
- refactor: 将 extract_placeholders/format_template 统一到 loaders.py,
消除 formatting.py、prompt_i18n.py、i18n_validate.py 三处重复
- perf: _get_catalog 和 load_prompt 改为双重检查锁定,I/O 不再阻塞其他线程
- perf: _log_once 使用独立 _warning_lock,不再与 _cache_lock 竞争
- fix: _scan_legacy_prompt_directory 添加 prompts_root 参数,修正 relative_to 语义
- refactor: 合并 _supported_prompt_files 两个变体为单函数 + recursive 参数
- docs: i18n.md 强化 repository-specific 校验策略标注,修正时间表述冗余
- fix: 验证脚本错误消息移除 Crowdin 暗示,标注为仓库级校验策略
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -87,7 +87,7 @@ Prompt 加载规则:
|
||||
|
||||
对于非 `zh-CN` 的目标 locale:
|
||||
|
||||
- 下面这两条是当前仓库的额外校验策略,不是 Crowdin 默认行为。
|
||||
- 下面这两条是本仓库的 repository-specific 校验策略,不是 Crowdin 默认行为。
|
||||
- 不要手工把中文 source 文案直接复制进目标语言文件后提交。
|
||||
- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。
|
||||
|
||||
@@ -130,7 +130,7 @@ python scripts/i18n_extract_candidates.py
|
||||
项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。
|
||||
现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。
|
||||
|
||||
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source,并下载当时 Crowdin 中当前可用的翻译结果。
|
||||
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source,并下载当时 Crowdin 中可用的翻译结果。
|
||||
|
||||
常用命令:
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from string import Formatter
|
||||
|
||||
import re
|
||||
import sys
|
||||
@@ -17,25 +16,16 @@ from src.common.i18n.loaders import ( # noqa: E402
|
||||
get_locales_root,
|
||||
load_locale_catalog,
|
||||
)
|
||||
from src.common.i18n.loaders import extract_placeholders # noqa: E402
|
||||
from src.common.prompt_i18n import ( # noqa: E402
|
||||
PROMPT_EXTENSIONS,
|
||||
extract_prompt_placeholders,
|
||||
get_prompts_root,
|
||||
)
|
||||
|
||||
FORMATTER = Formatter()
|
||||
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
|
||||
|
||||
|
||||
def extract_placeholders(template: str) -> set[str]:
|
||||
placeholders: set[str] = set()
|
||||
for _, field_name, _, _ in FORMATTER.parse(template):
|
||||
if not field_name:
|
||||
continue
|
||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
||||
return placeholders
|
||||
|
||||
|
||||
def contains_han_characters(text: str) -> bool:
|
||||
return HAN_CHARACTER_PATTERN.search(text) is not None
|
||||
|
||||
@@ -65,7 +55,7 @@ def validate_locale_content(
|
||||
source_text == target_text and contains_han_characters(source_text)
|
||||
for source_text, target_text in zip(source_texts, target_texts, strict=False)
|
||||
):
|
||||
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译")
|
||||
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译")
|
||||
|
||||
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
|
||||
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
|
||||
@@ -218,6 +208,16 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
|
||||
return errors, warnings
|
||||
|
||||
|
||||
def _print_warnings(warnings: list[str]) -> None:
|
||||
if not warnings:
|
||||
return
|
||||
print(f"warnings ({len(warnings)}):")
|
||||
for warning in warnings[:10]:
|
||||
print(f" - {warning}")
|
||||
if len(warnings) > 10:
|
||||
print(f" - ... 另外还有 {len(warnings) - 10} 条 warning")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
errors = validate_json_locales()
|
||||
prompt_errors, prompt_warnings = validate_prompt_templates()
|
||||
@@ -227,21 +227,11 @@ def main() -> int:
|
||||
print("i18n validation failed:")
|
||||
for error in errors:
|
||||
print(f" - {error}")
|
||||
if prompt_warnings:
|
||||
print(f"warnings ({len(prompt_warnings)}):")
|
||||
for warning in prompt_warnings[:10]:
|
||||
print(f" - {warning}")
|
||||
if len(prompt_warnings) > 10:
|
||||
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
|
||||
_print_warnings(prompt_warnings)
|
||||
return 1
|
||||
|
||||
print("i18n validation passed.")
|
||||
if prompt_warnings:
|
||||
print(f"warnings ({len(prompt_warnings)}):")
|
||||
for warning in prompt_warnings[:10]:
|
||||
print(f" - {warning}")
|
||||
if len(prompt_warnings) > 10:
|
||||
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
|
||||
_print_warnings(prompt_warnings)
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@@ -2,28 +2,14 @@ from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, time
|
||||
from decimal import Decimal
|
||||
from string import Formatter
|
||||
|
||||
from babel import Locale
|
||||
from babel.dates import format_datetime as babel_format_datetime
|
||||
from babel.numbers import format_decimal as babel_format_decimal
|
||||
|
||||
from .loaders import DEFAULT_LOCALE, to_babel_locale
|
||||
from .loaders import DEFAULT_LOCALE, extract_placeholders, format_template, to_babel_locale
|
||||
|
||||
FORMATTER = Formatter()
|
||||
|
||||
|
||||
def extract_placeholders(template: str) -> set[str]:
|
||||
placeholders: set[str] = set()
|
||||
for _, field_name, _, _ in FORMATTER.parse(template):
|
||||
if not field_name:
|
||||
continue
|
||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
||||
return placeholders
|
||||
|
||||
|
||||
def format_template(template: str, **kwargs: object) -> str:
|
||||
return template.format(**kwargs)
|
||||
__all__ = ["extract_placeholders", "format_template"]
|
||||
|
||||
|
||||
def select_plural_category(locale: str, count: int | float | Decimal) -> str:
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from string import Formatter
|
||||
|
||||
import json
|
||||
|
||||
_FORMATTER = Formatter()
|
||||
|
||||
from .exceptions import (
|
||||
DuplicateTranslationKeyError,
|
||||
InvalidLocaleError,
|
||||
@@ -122,3 +125,16 @@ def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[s
|
||||
)
|
||||
merged_translations[key] = value
|
||||
return merged_translations
|
||||
|
||||
|
||||
def extract_placeholders(template: str) -> set[str]:
|
||||
placeholders: set[str] = set()
|
||||
for _, field_name, _, _ in _FORMATTER.parse(template):
|
||||
if not field_name:
|
||||
continue
|
||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
||||
return placeholders
|
||||
|
||||
|
||||
def format_template(template: str, **kwargs: object) -> str:
|
||||
return template.format(**kwargs)
|
||||
|
||||
@@ -26,6 +26,7 @@ class I18nManager:
|
||||
self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None)
|
||||
self._warning_cache: set[tuple[str, str, str]] = set()
|
||||
self._cache_lock = threading.RLock()
|
||||
self._warning_lock = threading.Lock()
|
||||
|
||||
def set_locale(self, locale: str) -> str:
|
||||
self._default_locale = normalize_locale(locale)
|
||||
@@ -175,23 +176,26 @@ class I18nManager:
|
||||
if normalized_locale in self._catalog_cache:
|
||||
return self._catalog_cache[normalized_locale]
|
||||
|
||||
try:
|
||||
catalog = load_locale_catalog(normalized_locale, self._locales_root)
|
||||
except I18nError as exc:
|
||||
self._log_once(
|
||||
("load_failed", normalized_locale, exc.__class__.__name__),
|
||||
logging.WARNING,
|
||||
"加载 locale '%s' 失败: %s",
|
||||
normalized_locale,
|
||||
exc,
|
||||
)
|
||||
catalog = {}
|
||||
try:
|
||||
catalog = load_locale_catalog(normalized_locale, self._locales_root)
|
||||
except I18nError as exc:
|
||||
self._log_once(
|
||||
("load_failed", normalized_locale, exc.__class__.__name__),
|
||||
logging.WARNING,
|
||||
"加载 locale '%s' 失败: %s",
|
||||
normalized_locale,
|
||||
exc,
|
||||
)
|
||||
catalog = {}
|
||||
|
||||
with self._cache_lock:
|
||||
if normalized_locale in self._catalog_cache:
|
||||
return self._catalog_cache[normalized_locale]
|
||||
self._catalog_cache[normalized_locale] = catalog
|
||||
return catalog
|
||||
|
||||
def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None:
|
||||
with self._cache_lock:
|
||||
with self._warning_lock:
|
||||
if cache_key in self._warning_cache:
|
||||
return
|
||||
self._warning_cache.add(cache_key)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from string import Formatter
|
||||
|
||||
import logging
|
||||
import os
|
||||
@@ -9,14 +8,13 @@ import re
|
||||
import threading
|
||||
|
||||
from .i18n import get_locale, t
|
||||
from .i18n.loaders import DEFAULT_LOCALE, normalize_locale
|
||||
from .i18n.loaders import DEFAULT_LOCALE, extract_placeholders as extract_prompt_placeholders, normalize_locale
|
||||
|
||||
logger = logging.getLogger("maibot.prompt_i18n")
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
|
||||
PROMPT_EXTENSIONS = (".prompt")
|
||||
FORMATTER = Formatter()
|
||||
PROMPT_EXTENSIONS = (".prompt",)
|
||||
SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
|
||||
STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")
|
||||
|
||||
@@ -24,15 +22,6 @@ _prompt_cache: dict[Path, str] = {}
|
||||
_cache_lock = threading.RLock()
|
||||
|
||||
|
||||
def extract_prompt_placeholders(template: str) -> set[str]:
|
||||
placeholders: set[str] = set()
|
||||
for _, field_name, _, _ in FORMATTER.parse(template):
|
||||
if not field_name:
|
||||
continue
|
||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
||||
return placeholders
|
||||
|
||||
|
||||
def get_prompts_root(prompts_root: Path | None = None) -> Path:
|
||||
return (prompts_root or PROMPTS_ROOT).resolve()
|
||||
|
||||
@@ -70,17 +59,11 @@ def is_strict_prompt_i18n_mode() -> bool:
|
||||
return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)
|
||||
|
||||
|
||||
def _supported_prompt_files(directory: Path) -> list[Path]:
|
||||
def _supported_prompt_files(directory: Path, recursive: bool = True) -> list[Path]:
|
||||
search = directory.rglob if recursive else directory.glob
|
||||
matched_files: list[Path] = []
|
||||
for suffix in PROMPT_EXTENSIONS:
|
||||
matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file())
|
||||
return sorted(set(matched_files))
|
||||
|
||||
|
||||
def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]:
|
||||
matched_files: list[Path] = []
|
||||
for suffix in PROMPT_EXTENSIONS:
|
||||
matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file())
|
||||
matched_files.extend(path for path in search(f"*{suffix}") if path.is_file())
|
||||
return sorted(set(matched_files))
|
||||
|
||||
|
||||
@@ -104,20 +87,20 @@ def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Pat
|
||||
return prompt_paths
|
||||
|
||||
|
||||
def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]:
|
||||
def _scan_legacy_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]:
|
||||
prompt_paths: dict[str, Path] = {}
|
||||
if not directory.exists():
|
||||
return prompt_paths
|
||||
|
||||
for prompt_path in _supported_prompt_files_non_recursive(directory):
|
||||
for prompt_path in _supported_prompt_files(directory, recursive=False):
|
||||
prompt_name = prompt_path.stem
|
||||
if prompt_name in prompt_paths:
|
||||
raise ValueError(
|
||||
t(
|
||||
"prompt.duplicate_template_name",
|
||||
name=prompt_name,
|
||||
path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)),
|
||||
path_b=prompt_path.relative_to(get_prompts_root(directory)),
|
||||
path_a=prompt_paths[prompt_name].relative_to(prompts_root),
|
||||
path_b=prompt_path.relative_to(prompts_root),
|
||||
)
|
||||
)
|
||||
prompt_paths[prompt_name] = prompt_path
|
||||
@@ -128,7 +111,7 @@ def list_prompt_templates(locale: str | None = None, prompts_root: Path | None =
|
||||
resolved_prompts_root = get_prompts_root(prompts_root)
|
||||
requested_locale = normalize_locale(locale or get_locale())
|
||||
|
||||
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root)
|
||||
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root, resolved_prompts_root)
|
||||
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))
|
||||
|
||||
if requested_locale != DEFAULT_LOCALE:
|
||||
@@ -176,10 +159,11 @@ def load_prompt(
|
||||
prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root)
|
||||
with _cache_lock:
|
||||
template = _prompt_cache.get(prompt_path)
|
||||
if template is None:
|
||||
with open(prompt_path, "r", encoding="utf-8") as prompt_file:
|
||||
template = prompt_file.read()
|
||||
_prompt_cache[prompt_path] = template
|
||||
if template is None:
|
||||
template = prompt_path.read_text(encoding="utf-8")
|
||||
with _cache_lock:
|
||||
_prompt_cache.setdefault(prompt_path, template)
|
||||
template = _prompt_cache[prompt_path]
|
||||
|
||||
if not kwargs:
|
||||
return template
|
||||
|
||||
Reference in New Issue
Block a user