feat: 增强国际化验证功能,添加对共享翻译字符串的支持,优化提示模板加载逻辑

This commit is contained in:
春河晴
2026-03-13 01:42:22 +09:00
parent 55eb911dd3
commit d418a8451b
6 changed files with 217 additions and 178 deletions

View File

@@ -18,9 +18,10 @@ from src.common.i18n.loaders import ( # noqa: E402
)
from src.common.i18n.loaders import extract_placeholders # noqa: E402
from src.common.prompt_i18n import ( # noqa: E402
PROMPT_EXTENSIONS,
discover_prompt_locales,
extract_prompt_placeholders,
get_prompts_root,
iter_prompt_files,
)
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
@@ -36,6 +37,18 @@ def iter_translation_strings(value: TranslationValue) -> list[str]:
return [value[category] for category in sorted(value.keys())]
def iter_shared_translation_strings(
source_value: TranslationValue, target_value: TranslationValue
) -> list[tuple[str, str]]:
if isinstance(source_value, str) or isinstance(target_value, str):
if isinstance(source_value, str) and isinstance(target_value, str):
return [(source_value, target_value)]
return []
shared_categories = sorted(set(source_value.keys()) & set(target_value.keys()))
return [(source_value[category], target_value[category]) for category in shared_categories]
def locale_requires_latin_only_validation(locale: str) -> bool:
normalized_locale = locale.lower()
return normalized_locale == "en" or normalized_locale.startswith("en-")
@@ -48,14 +61,15 @@ def validate_locale_content(
locale: str,
errors: list[str],
) -> None:
source_texts = iter_translation_strings(source_value)
target_texts = iter_translation_strings(target_value)
if any(
source_text == target_text and contains_han_characters(source_text)
for source_text, target_text in zip(source_texts, target_texts, strict=False)
for source_text, target_text in iter_shared_translation_strings(source_value, target_value)
):
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译")
errors.append(
f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译"
)
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
@@ -121,12 +135,9 @@ def validate_json_locales(locales_root: Path | None = None) -> list[str]:
continue
locale_keys = set(catalog.keys())
missing_keys = sorted(source_keys - locale_keys)
extra_keys = sorted(locale_keys - source_keys)
for key in missing_keys:
for key in sorted(source_keys - locale_keys):
errors.append(f"[{locale}] 缺少 key: {key}")
for key in extra_keys:
for key in sorted(locale_keys - source_keys):
errors.append(f"[{locale}] 存在多余 key: {key}")
for key in sorted(source_keys & locale_keys):
@@ -139,25 +150,13 @@ def validate_json_locales(locales_root: Path | None = None) -> list[str]:
return errors
def discover_prompt_locales(prompts_root: Path | None = None) -> list[str]:
resolved_prompts_root = get_prompts_root(prompts_root)
if not resolved_prompts_root.exists():
return []
locale_names = [path.name for path in resolved_prompts_root.iterdir() if path.is_dir()]
return sorted(locale_names)
def iter_prompt_files(locale_dir: Path) -> list[Path]:
prompt_files: list[Path] = []
for extension in PROMPT_EXTENSIONS:
prompt_files.extend(path for path in locale_dir.rglob(f"*{extension}") if path.is_file())
return sorted(set(prompt_files))
def build_prompt_catalog(locale_dir: Path) -> dict[Path, Path]:
return {path.relative_to(locale_dir): path for path in iter_prompt_files(locale_dir)}
def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[str], list[str]]:
resolved_prompts_root = get_prompts_root(prompts_root)
prompt_locales = discover_prompt_locales(resolved_prompts_root)
prompt_locales = set(discover_prompt_locales(resolved_prompts_root))
known_locales = [locale for locale in discover_locales(get_locales_root()) if locale != DEFAULT_LOCALE]
errors: list[str] = []
warnings: list[str] = []
@@ -167,7 +166,8 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
return errors, warnings
source_dir = resolved_prompts_root / DEFAULT_LOCALE
source_files = {path.relative_to(source_dir): path for path in iter_prompt_files(source_dir)}
source_files = build_prompt_catalog(source_dir)
source_relative_paths = set(source_files.keys())
for locale in known_locales:
locale_dir = resolved_prompts_root / locale
@@ -175,8 +175,7 @@ def validate_prompt_templates(prompts_root: Path | None = None) -> tuple[list[st
warnings.append(f"[prompt:{locale}] 缺少 locale 目录,运行时将回退到 {DEFAULT_LOCALE}")
continue
locale_files = {path.relative_to(locale_dir): path for path in iter_prompt_files(locale_dir)}
source_relative_paths = set(source_files.keys())
locale_files = build_prompt_catalog(locale_dir)
locale_relative_paths = set(locale_files.keys())
for relative_path in sorted(source_relative_paths - locale_relative_paths):