i18n: localize prompt templates with crowdin

This commit is contained in:
春河晴
2026-03-13 00:09:01 +09:00
parent 68f0ac3803
commit c9bfedddd2
46 changed files with 1156 additions and 17 deletions

211
src/common/prompt_i18n.py Normal file
View File

@@ -0,0 +1,211 @@
from __future__ import annotations
from pathlib import Path
from string import Formatter
import logging
import os
import re
import threading
from .i18n import get_locale, t
from .i18n.loaders import DEFAULT_LOCALE, normalize_locale
logger = logging.getLogger("maibot.prompt_i18n")
PROJECT_ROOT = Path(__file__).resolve().parents[2]
PROMPTS_ROOT = (PROJECT_ROOT / "prompts").resolve()
PROMPT_EXTENSIONS = (".prompt", ".txt")
FORMATTER = Formatter()
SAFE_SEGMENT_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$")
STRICT_ENV_KEYS = ("MAIBOT_PROMPT_I18N_STRICT", "MAIBOT_I18N_STRICT")
_prompt_cache: dict[Path, str] = {}
_cache_lock = threading.RLock()
def extract_prompt_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for _, field_name, _, _ in FORMATTER.parse(template):
if not field_name:
continue
placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def get_prompts_root(prompts_root: Path | None = None) -> Path:
return (prompts_root or PROMPTS_ROOT).resolve()
def normalize_prompt_name(name: str) -> str:
candidate_name = name.strip()
for suffix in PROMPT_EXTENSIONS:
if candidate_name.endswith(suffix):
candidate_name = candidate_name[: -len(suffix)]
break
if candidate_name in {".", ".."} or not candidate_name or not SAFE_SEGMENT_PATTERN.fullmatch(candidate_name):
raise ValueError(t("prompt.invalid_name", name=name))
return candidate_name
def normalize_prompt_category(category: str | None) -> str | None:
if category is None:
return None
category_parts = [part for part in category.strip().split("/") if part]
if not category_parts:
raise ValueError(t("prompt.invalid_category", category=category))
for part in category_parts:
if part in {".", ".."} or not SAFE_SEGMENT_PATTERN.fullmatch(part):
raise ValueError(t("prompt.invalid_category", category=category))
return "/".join(category_parts)
def is_strict_prompt_i18n_mode() -> bool:
if os.getenv("PYTEST_CURRENT_TEST"):
return True
return any(os.getenv(env_key, "").strip().lower() in {"1", "true", "yes", "on"} for env_key in STRICT_ENV_KEYS)
def _supported_prompt_files(directory: Path) -> list[Path]:
matched_files: list[Path] = []
for suffix in PROMPT_EXTENSIONS:
matched_files.extend(path for path in directory.rglob(f"*{suffix}") if path.is_file())
return sorted(set(matched_files))
def _supported_prompt_files_non_recursive(directory: Path) -> list[Path]:
matched_files: list[Path] = []
for suffix in PROMPT_EXTENSIONS:
matched_files.extend(path for path in directory.glob(f"*{suffix}") if path.is_file())
return sorted(set(matched_files))
def _scan_prompt_directory(directory: Path, prompts_root: Path) -> dict[str, Path]:
prompt_paths: dict[str, Path] = {}
if not directory.exists():
return prompt_paths
for prompt_path in _supported_prompt_files(directory):
prompt_name = prompt_path.stem
if prompt_name in prompt_paths:
raise ValueError(
t(
"prompt.duplicate_template_name",
name=prompt_name,
path_a=prompt_paths[prompt_name].relative_to(prompts_root),
path_b=prompt_path.relative_to(prompts_root),
)
)
prompt_paths[prompt_name] = prompt_path
return prompt_paths
def _scan_legacy_prompt_directory(directory: Path) -> dict[str, Path]:
prompt_paths: dict[str, Path] = {}
if not directory.exists():
return prompt_paths
for prompt_path in _supported_prompt_files_non_recursive(directory):
prompt_name = prompt_path.stem
if prompt_name in prompt_paths:
raise ValueError(
t(
"prompt.duplicate_template_name",
name=prompt_name,
path_a=prompt_paths[prompt_name].relative_to(get_prompts_root(directory)),
path_b=prompt_path.relative_to(get_prompts_root(directory)),
)
)
prompt_paths[prompt_name] = prompt_path
return prompt_paths
def list_prompt_templates(locale: str | None = None, prompts_root: Path | None = None) -> dict[str, Path]:
resolved_prompts_root = get_prompts_root(prompts_root)
requested_locale = normalize_locale(locale or get_locale())
prompt_paths = _scan_legacy_prompt_directory(resolved_prompts_root)
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / DEFAULT_LOCALE, resolved_prompts_root))
if requested_locale != DEFAULT_LOCALE:
prompt_paths.update(_scan_prompt_directory(resolved_prompts_root / requested_locale, resolved_prompts_root))
return prompt_paths
def resolve_prompt_path(name: str, locale: str | None = None, category: str | None = None, prompts_root: Path | None = None) -> Path:
resolved_prompts_root = get_prompts_root(prompts_root)
normalized_name = normalize_prompt_name(name)
normalized_category = normalize_prompt_category(category)
requested_locale = normalize_locale(locale or get_locale())
locale_candidates: list[str | None] = [requested_locale]
if requested_locale != DEFAULT_LOCALE:
locale_candidates.append(DEFAULT_LOCALE)
locale_candidates.append(None)
if normalized_category is not None:
for locale_candidate in locale_candidates:
base_dir = resolved_prompts_root if locale_candidate is None else resolved_prompts_root / locale_candidate
for suffix in PROMPT_EXTENSIONS:
candidate_paths = [(base_dir / normalized_category / f"{normalized_name}{suffix}").resolve()]
# 允许带 category 的调用在旧版平铺目录或未迁移完的 locale 目录中继续工作。
candidate_paths.append((base_dir / f"{normalized_name}{suffix}").resolve())
for candidate_path in candidate_paths:
if candidate_path.is_file():
return candidate_path
else:
prompt_paths = list_prompt_templates(locale=requested_locale, prompts_root=resolved_prompts_root)
if normalized_name in prompt_paths:
return prompt_paths[normalized_name]
raise FileNotFoundError(t("prompt.template_not_found", locale=requested_locale, name=normalized_name))
def load_prompt(
name: str,
locale: str | None = None,
category: str | None = None,
prompts_root: Path | None = None,
**kwargs: object,
) -> str:
prompt_path = resolve_prompt_path(name=name, locale=locale, category=category, prompts_root=prompts_root)
with _cache_lock:
template = _prompt_cache.get(prompt_path)
if template is None:
with open(prompt_path, "r", encoding="utf-8") as prompt_file:
template = prompt_file.read()
_prompt_cache[prompt_path] = template
if not kwargs:
return template
try:
return template.format(**kwargs)
except KeyError as exc:
missing_placeholder = exc.args[0]
error = KeyError(
t(
"prompt.missing_placeholder",
name=normalize_prompt_name(name),
placeholder=missing_placeholder,
)
)
if is_strict_prompt_i18n_mode():
raise error from exc
logger.error("%s", error)
return template
except Exception as exc:
logger.error(t("prompt.format_failed", name=normalize_prompt_name(name), error=exc))
if is_strict_prompt_i18n_mode():
raise
return template
def clear_prompt_cache() -> None:
with _cache_lock:
_prompt_cache.clear()