fix(i18n): 修复 PROMPT_EXTENSIONS 元组声明、消除重复代码、优化锁策略
- fix: PROMPT_EXTENSIONS = (".prompt") 是字符串非元组,改为 (".prompt",)
- refactor: 将 extract_placeholders/format_template 统一到 loaders.py,
消除 formatting.py、prompt_i18n.py、i18n_validate.py 三处重复
- perf: _get_catalog 和 load_prompt 改为双重检查锁定,I/O 不再阻塞其他线程
- perf: _log_once 使用独立 _warning_lock,不再与 _cache_lock 竞争
- fix: _scan_legacy_prompt_directory 添加 prompts_root 参数,修正 relative_to 语义
- refactor: 合并 _supported_prompt_files 两个变体为单函数 + recursive 参数
- docs: i18n.md 强化 repository-specific 校验策略标注,修正时间表述冗余
- fix: 验证脚本错误消息移除 Crowdin 暗示,标注为仓库级校验策略
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -87,7 +87,7 @@ Prompt 加载规则:
|
|||||||
|
|
||||||
对于非 `zh-CN` 的目标 locale:
|
对于非 `zh-CN` 的目标 locale:
|
||||||
|
|
||||||
- 下面这两条是当前仓库的额外校验策略,不是 Crowdin 默认行为。
|
- 下面这两条是本仓库的 repository-specific 校验策略,不是 Crowdin 默认行为。
|
||||||
- 不要手工把中文 source 文案直接复制进目标语言文件后提交。
|
- 不要手工把中文 source 文案直接复制进目标语言文件后提交。
|
||||||
- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。
|
- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。
|
||||||
|
|
||||||
@@ -130,7 +130,7 @@ python scripts/i18n_extract_candidates.py
|
|||||||
项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。
|
项目根目录的 [`crowdin.yml`](../crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。
|
||||||
现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。
|
现在也会把 `prompts/zh-CN/**/*.prompt` 作为单文件 Prompt 模板 source 上传到 Crowdin。
|
||||||
|
|
||||||
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source,并下载当时 Crowdin 中当前可用的翻译结果。
|
GitHub Actions 中的 [`crowdin-sync.yml`](../.github/workflows/crowdin-sync.yml) 会在 workflow 运行时上传 source,并下载当时 Crowdin 中可用的翻译结果。
|
||||||
|
|
||||||
常用命令:
|
常用命令:
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from string import Formatter
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
@@ -17,25 +16,16 @@ from src.common.i18n.loaders import ( # noqa: E402
|
|||||||
get_locales_root,
|
get_locales_root,
|
||||||
load_locale_catalog,
|
load_locale_catalog,
|
||||||
)
|
)
|
||||||
|
from src.common.i18n.loaders import extract_placeholders # noqa: E402
|
||||||
from src.common.prompt_i18n import ( # noqa: E402
|
from src.common.prompt_i18n import ( # noqa: E402
|
||||||
PROMPT_EXTENSIONS,
|
PROMPT_EXTENSIONS,
|
||||||
extract_prompt_placeholders,
|
extract_prompt_placeholders,
|
||||||
get_prompts_root,
|
get_prompts_root,
|
||||||
)
|
)
|
||||||
|
|
||||||
FORMATTER = Formatter()
|
|
||||||
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
|
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
|
||||||
|
|
||||||
|
|
||||||
def extract_placeholders(template: str) -> set[str]:
|
|
||||||
placeholders: set[str] = set()
|
|
||||||
for _, field_name, _, _ in FORMATTER.parse(template):
|
|
||||||
if not field_name:
|
|
||||||
continue
|
|
||||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
|
||||||
return placeholders
|
|
||||||
|
|
||||||
|
|
||||||
def contains_han_characters(text: str) -> bool:
|
def contains_han_characters(text: str) -> bool:
|
||||||
return HAN_CHARACTER_PATTERN.search(text) is not None
|
return HAN_CHARACTER_PATTERN.search(text) is not None
|
||||||
|
|
||||||
@@ -65,7 +55,7 @@ def validate_locale_content(
|
|||||||
source_text == target_text and contains_han_characters(source_text)
|
source_text == target_text and contains_han_characters(source_text)
|
||||||
for source_text, target_text in zip(source_texts, target_texts, strict=False)
|
for source_text, target_text in zip(source_texts, target_texts, strict=False)
|
||||||
):
|
):
|
||||||
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译")
|
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译")
|
||||||
|
|
||||||
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
|
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
|
||||||
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
|
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
|
||||||
@@ -218,6 +208,16 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
|
|||||||
return errors, warnings
|
return errors, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def _print_warnings(warnings: list[str]) -> None:
|
||||||
|
if not warnings:
|
||||||
|
return
|
||||||
|
print(f"warnings ({len(warnings)}):")
|
||||||
|
for warning in warnings[:10]:
|
||||||
|
print(f" - {warning}")
|
||||||
|
if len(warnings) > 10:
|
||||||
|
print(f" - ... 另外还有 {len(warnings) - 10} 条 warning")
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
errors = validate_json_locales()
|
errors = validate_json_locales()
|
||||||
prompt_errors, prompt_warnings = validate_prompt_templates()
|
prompt_errors, prompt_warnings = validate_prompt_templates()
|
||||||
@@ -227,21 +227,11 @@ def main() -> int:
|
|||||||
print("i18n validation failed:")
|
print("i18n validation failed:")
|
||||||
for error in errors:
|
for error in errors:
|
||||||
print(f" - {error}")
|
print(f" - {error}")
|
||||||
if prompt_warnings:
|
_print_warnings(prompt_warnings)
|
||||||
print(f"warnings ({len(prompt_warnings)}):")
|
|
||||||
for warning in prompt_warnings[:10]:
|
|
||||||
print(f" - {warning}")
|
|
||||||
if len(prompt_warnings) > 10:
|
|
||||||
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
print("i18n validation passed.")
|
print("i18n validation passed.")
|
||||||
if prompt_warnings:
|
_print_warnings(prompt_warnings)
|
||||||
print(f"warnings ({len(prompt_warnings)}):")
|
|
||||||
for warning in prompt_warnings[:10]:
|
|
||||||
print(f" - {warning}")
|
|
||||||
if len(prompt_warnings) > 10:
|
|
||||||
print(f" - ... 另外还有 {len(prompt_warnings) - 10} 条 warning")
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,28 +2,14 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from datetime import date, datetime, time
|
from datetime import date, datetime, time
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from string import Formatter
|
|
||||||
|
|
||||||
from babel import Locale
|
from babel import Locale
|
||||||
from babel.dates import format_datetime as babel_format_datetime
|
from babel.dates import format_datetime as babel_format_datetime
|
||||||
from babel.numbers import format_decimal as babel_format_decimal
|
from babel.numbers import format_decimal as babel_format_decimal
|
||||||
|
|
||||||
from .loaders import DEFAULT_LOCALE, to_babel_locale
|
from .loaders import DEFAULT_LOCALE, extract_placeholders, format_template, to_babel_locale
|
||||||
|
|
||||||
FORMATTER = Formatter()
|
__all__ = ["extract_placeholders", "format_template"]
|
||||||
|
|
||||||
|
|
||||||
def extract_placeholders(template: str) -> set[str]:
|
|
||||||
placeholders: set[str] = set()
|
|
||||||
for _, field_name, _, _ in FORMATTER.parse(template):
|
|
||||||
if not field_name:
|
|
||||||
continue
|
|
||||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
|
||||||
return placeholders
|
|
||||||
|
|
||||||
|
|
||||||
def format_template(template: str, **kwargs: object) -> str:
|
|
||||||
return template.format(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def select_plural_category(locale: str, count: int | float | Decimal) -> str:
|
def select_plural_category(locale: str, count: int | float | Decimal) -> str:
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from string import Formatter
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
_FORMATTER = Formatter()
|
||||||
|
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
DuplicateTranslationKeyError,
|
DuplicateTranslationKeyError,
|
||||||
InvalidLocaleError,
|
InvalidLocaleError,
|
||||||
@@ -122,3 +125,16 @@ def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[s
|
|||||||
)
|
)
|
||||||
merged_translations[key] = value
|
merged_translations[key] = value
|
||||||
return merged_translations
|
return merged_translations
|
||||||
|
|
||||||
|
|
||||||
|
def extract_placeholders(template: str) -> set[str]:
|
||||||
|
placeholders: set[str] = set()
|
||||||
|
for _, field_name, _, _ in _FORMATTER.parse(template):
|
||||||
|
if not field_name:
|
||||||
|
continue
|
||||||
|
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
||||||
|
return placeholders
|
||||||
|
|
||||||
|
|
||||||
|
def format_template(template: str, **kwargs: object) -> str:
|
||||||
|
return template.format(**kwargs)
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ class I18nManager:
|
|||||||
self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None)
|
self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None)
|
||||||
self._warning_cache: set[tuple[str, str, str]] = set()
|
self._warning_cache: set[tuple[str, str, str]] = set()
|
||||||
self._cache_lock = threading.RLock()
|
self._cache_lock = threading.RLock()
|
||||||
|
self._warning_lock = threading.Lock()
|
||||||
|
|
||||||
def set_locale(self, locale: str) -> str:
|
def set_locale(self, locale: str) -> str:
|
||||||
self._default_locale = normalize_locale(locale)
|
self._default_locale = normalize_locale(locale)
|
||||||
@@ -175,23 +176,26 @@ class I18nManager:
|
|||||||
if normalized_locale in self._catalog_cache:
|
if normalized_locale in self._catalog_cache:
|
||||||
return self._catalog_cache[normalized_locale]
|
return self._catalog_cache[normalized_locale]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
catalog = load_locale_catalog(normalized_locale, self._locales_root)
|
catalog = load_locale_catalog(normalized_locale, self._locales_root)
|
||||||
except I18nError as exc:
|
except I18nError as exc:
|
||||||
self._log_once(
|
self._log_once(
|
||||||
("load_failed", normalized_locale, exc.__class__.__name__),
|
("load_failed", normalized_locale, exc.__class__.__name__),
|
||||||
logging.WARNING,
|
logging.WARNING,
|
||||||
"加载 locale '%s' 失败: %s",
|
"加载 locale '%s' 失败: %s",
|
||||||
normalized_locale,
|
normalized_locale,
|
||||||
exc,
|
exc,
|
||||||
)
|
)
|
||||||
catalog = {}
|
catalog = {}
|
||||||
|
|
||||||
|
with self._cache_lock:
|
||||||
|
if normalized_locale in self._catalog_cache:
|
||||||
|
return self._catalog_cache[normalized_locale]
|
||||||
self._catalog_cache[normalized_locale] = catalog
|
self._catalog_cache[normalized_locale] = catalog
|
||||||
return catalog
|
return catalog
|
||||||
|
|
||||||
def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None:
|
def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None:
|
||||||
with self._cache_lock:
|
with self._warning_lock:
|
||||||
if cache_key in self._warning_cache:
|
if cache_key in self._warning_cache:
|
||||||
return
|
return
|
||||||
self._warning_cache.add(cache_key)
|
self._warning_cache.add(cache_key)
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from string import Formatter
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -9,14 +8,13 @@ import re
|
|||||||
import threading
|
import threading
|
||||||
|
|
||||||
from .i18n import get_locale, t
|
from .i18n import get_locale, t
|
||||||
from .i18n.loaders import DEFAULT_LOCALE, normalize_locale
|
from .i18n.loaders import DEFAULT_LOCALE, extract_placeholders as extract_prompt_placeholders, normalize_locale
|
||||||
|
|
||||||
logger = logging.getLogger("maibot.prompt_i18n")
|
logger = logging.getLogger("maibot.prompt_i18n")
|
||||||
|
|
||||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||||
PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
|
PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
|
||||||
PROMPT_EXTENSIONS = (".prompt")
|
PROMPT_EXTENSIONS = (".prompt",)
|
||||||
FORMATTER = Formatter()
|
|
||||||
SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
|
SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
|
||||||
STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")
|
STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")
|
||||||
|
|
||||||
@@ -24,15 +22,6 @@ _prompt_cache: dict[Path, str] = {}
|
|||||||
_cache_lock = threading.RLock()
|
_cache_lock = threading.RLock()
|
||||||
|
|
||||||
|
|
||||||
def extract_prompt_placeholders(template: str) -> set[str]:
|
|
||||||
placeholders: set[str] = set()
|
|
||||||
for _, field_name, _, _ in FORMATTER.parse(template):
|
|
||||||
if not field_name:
|
|
||||||
continue
|
|
||||||
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
|
|
||||||
return placeholders
|
|
||||||
|
|
||||||
|
|
||||||
def get_prompts_root(prompts_root: Path | None = None) -> Path:
|
def get_prompts_root(prompts_root: Path | None = None) -> Path:
|
||||||
return (prompts_root or PROMPTS_ROOT).resolve()
|
return (prompts_root or PROMPTS_ROOT).resolve()
|
||||||
|
|
||||||
@@ -70,17 +59,11 @@ def is_strict_prompt_i18n_mode() -> bool:
|
|||||||
return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)
|
return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)
|
||||||
|
|
||||||
|
|
||||||
def _supported_prompt_files(directory: Path) -> list[Path]:
|
def _supported_prompt_files(directory: Path, recursive: bool = True) -> list[Path]:
|
||||||
|
search = directory.rglob if recursive else directory.glob
|
||||||
matched_files: list[Path] = []
|
matched_files: list[Path] = []
|
||||||
for suffix in PROMPT_EXTENSIONS:
|
for suffix in PROMPT_EXTENSIONS:
|
||||||
matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file())
|
matched_files.extend(path for path in search(f"*{suffix}") if path.is_file())
|
||||||
return sorted(set(matched_files))
|
|
||||||
|
|
||||||
|
|
||||||
def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]:
|
|
||||||
matched_files: list[Path] = []
|
|
||||||
for suffix in PROMPT_EXTENSIONS:
|
|
||||||
matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file())
|
|
||||||
return sorted(set(matched_files))
|
return sorted(set(matched_files))
|
||||||
|
|
||||||
|
|
||||||
@@ -104,20 +87,20 @@ def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Pat
|
|||||||
return prompt_paths
|
return prompt_paths
|
||||||
|
|
||||||
|
|
||||||
def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]:
|
def _scan_legacy_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]:
|
||||||
prompt_paths: dict[str, Path] = {}
|
prompt_paths: dict[str, Path] = {}
|
||||||
if not directory.exists():
|
if not directory.exists():
|
||||||
return prompt_paths
|
return prompt_paths
|
||||||
|
|
||||||
for prompt_path in _supported_prompt_files_non_recursive(directory):
|
for prompt_path in _supported_prompt_files(directory, recursive=False):
|
||||||
prompt_name = prompt_path.stem
|
prompt_name = prompt_path.stem
|
||||||
if prompt_name in prompt_paths:
|
if prompt_name in prompt_paths:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
t(
|
t(
|
||||||
"prompt.duplicate_template_name",
|
"prompt.duplicate_template_name",
|
||||||
name=prompt_name,
|
name=prompt_name,
|
||||||
path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)),
|
path_a=prompt_paths[prompt_name].relative_to(prompts_root),
|
||||||
path_b=prompt_path.relative_to(get_prompts_root(directory)),
|
path_b=prompt_path.relative_to(prompts_root),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
prompt_paths[prompt_name] = prompt_path
|
prompt_paths[prompt_name] = prompt_path
|
||||||
@@ -128,7 +111,7 @@ def list_prompt_templates(locale: str | None = None, prompts_root: Path | None =
|
|||||||
resolved_prompts_root = get_prompts_root(prompts_root)
|
resolved_prompts_root = get_prompts_root(prompts_root)
|
||||||
requested_locale = normalize_locale(locale or get_locale())
|
requested_locale = normalize_locale(locale or get_locale())
|
||||||
|
|
||||||
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root)
|
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root, resolved_prompts_root)
|
||||||
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))
|
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))
|
||||||
|
|
||||||
if requested_locale != DEFAULT_LOCALE:
|
if requested_locale != DEFAULT_LOCALE:
|
||||||
@@ -176,10 +159,11 @@ def load_prompt(
|
|||||||
prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root)
|
prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root)
|
||||||
with _cache_lock:
|
with _cache_lock:
|
||||||
template = _prompt_cache.get(prompt_path)
|
template = _prompt_cache.get(prompt_path)
|
||||||
if template is None:
|
if template is None:
|
||||||
with open(prompt_path, "r", encoding="utf-8") as prompt_file:
|
template = prompt_path.read_text(encoding="utf-8")
|
||||||
template = prompt_file.read()
|
with _cache_lock:
|
||||||
_prompt_cache[prompt_path] = template
|
_prompt_cache.setdefault(prompt_path, template)
|
||||||
|
template = _prompt_cache[prompt_path]
|
||||||
|
|
||||||
if not kwargs:
|
if not kwargs:
|
||||||
return template
|
return template
|
||||||
|
|||||||
Reference in New Issue
Block a user