feat: 更新 Crowdin 工作流以支持 dashboard WebUI 的本地化文件,添加相关验证逻辑

This commit is contained in:
春河晴
2026-03-13 20:09:21 +09:00
parent 5f5ff4ce8e
commit 5da82c4e24
8 changed files with 283 additions and 15 deletions

View File

@@ -1,7 +1,9 @@
from __future__ import annotations
from pathlib import Path
from typing import Callable
import json
import re
import sys
@@ -9,12 +11,19 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from src.common.i18n.exceptions import ( # noqa: E402
DuplicateTranslationKeyError,
InvalidTranslationFileError,
LocaleNotFoundError,
)
from src.common.i18n.loaders import ( # noqa: E402
DEFAULT_LOCALE,
PLURAL_CATEGORIES,
TranslationValue,
discover_locales,
get_locales_root,
load_locale_catalog,
validate_translation_value,
)
from src.common.i18n.loaders import extract_placeholders # noqa: E402
from src.common.prompt_i18n import ( # noqa: E402
@@ -25,12 +34,22 @@ from src.common.prompt_i18n import ( # noqa: E402
)
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
I18NEXT_PLACEHOLDER_PATTERN = re.compile(r"\{\{\s*([^\s,}]+)(?:\s*,[^}]*)?\s*\}\}")
DASHBOARD_DEFAULT_LOCALE = "zh"
def contains_han_characters(text: str) -> bool:
return HAN_CHARACTER_PATTERN.search(text) is not None
def extract_i18next_placeholders(template: str) -> set[str]:
placeholders: set[str] = set()
for match in I18NEXT_PLACEHOLDER_PATTERN.finditer(template):
placeholder_name = match.group(1)
placeholders.add(placeholder_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0])
return placeholders
def iter_translation_strings(value: TranslationValue) -> list[str]:
if isinstance(value, str):
return [value]
@@ -60,7 +79,9 @@ def validate_locale_content(
target_value: TranslationValue,
locale: str,
errors: list[str],
locale_label: str | None = None,
) -> None:
resolved_locale_label = locale_label or locale
target_texts = iter_translation_strings(target_value)
if any(
@@ -68,11 +89,11 @@ def validate_locale_content(
for source_text, target_text in iter_shared_translation_strings(source_value, target_value)
):
errors.append(
f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译"
f"[{resolved_locale_label}] key '{key}' 直接保留了包含中文字符的 source 文案(仓库级校验策略),请提供目标语言翻译"
)
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
errors.append(f"[{resolved_locale_label}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
def validate_translation_pair(
@@ -81,32 +102,183 @@ def validate_translation_pair(
target_value: TranslationValue,
locale: str,
errors: list[str],
placeholder_extractor: Callable[[str], set[str]] = extract_placeholders,
locale_label: str | None = None,
) -> None:
resolved_locale_label = locale_label or locale
if isinstance(source_value, str):
if not isinstance(target_value, str):
errors.append(f"[{locale}] key '{key}' 与 source 的类型不一致source=string, target=plural")
errors.append(
f"[{resolved_locale_label}] key '{key}' 与 source 的类型不一致source=string, target=plural"
)
return
if extract_placeholders(source_value) != extract_placeholders(target_value):
errors.append(f"[{locale}] key '{key}' 的占位符集合与 source 不一致")
if placeholder_extractor(source_value) != placeholder_extractor(target_value):
errors.append(f"[{resolved_locale_label}] key '{key}' 的占位符集合与 source 不一致")
return
if not isinstance(target_value, dict):
errors.append(f"[{locale}] key '{key}' 与 source 的类型不一致source=plural, target=string")
errors.append(f"[{resolved_locale_label}] key '{key}' 与 source 的类型不一致source=plural, target=string")
return
source_categories = set(source_value.keys())
target_categories = set(target_value.keys())
if source_categories != target_categories:
errors.append(
f"[{locale}] key '{key}' 的 plural category 不一致:"
f"[{resolved_locale_label}] key '{key}' 的 plural category 不一致:"
f"source={sorted(source_categories)}, target={sorted(target_categories)}"
)
for category in sorted(source_categories & target_categories):
source_placeholders = extract_placeholders(source_value[category])
target_placeholders = extract_placeholders(target_value[category])
source_placeholders = placeholder_extractor(source_value[category])
target_placeholders = placeholder_extractor(target_value[category])
if source_placeholders != target_placeholders:
errors.append(f"[{locale}] key '{key}' 的 plural category '{category}' 占位符集合与 source 不一致")
errors.append(
f"[{resolved_locale_label}] key '{key}' 的 plural category '{category}' 占位符集合与 source 不一致"
)
def get_dashboard_locales_root(locales_root: Path | None = None) -> Path:
if locales_root is not None:
return locales_root.resolve()
return (PROJECT_ROOT / "dashboard" / "src" / "i18n" / "locales").resolve()
def discover_dashboard_locales(locales_root: Path | None = None) -> list[str]:
root = get_dashboard_locales_root(locales_root)
if not root.exists():
return []
locale_names = [path.stem for path in root.glob("*.json") if path.is_file()]
return sorted(locale_names)
def is_plural_translation_node(value: object) -> bool:
if not isinstance(value, dict) or not value:
return False
return all(
isinstance(category, str) and category in PLURAL_CATEGORIES and isinstance(category_value, str)
for category, category_value in value.items()
)
def flatten_dashboard_translation_mapping(
value: dict[str, object],
file_path: Path,
translations: dict[str, TranslationValue],
parent_keys: list[str] | None = None,
) -> None:
current_parent_keys = parent_keys or []
if not value:
if current_parent_keys:
raise InvalidTranslationFileError(
f"{file_path} 中的 key '{'.'.join(current_parent_keys)}' 不能为空对象"
)
raise InvalidTranslationFileError(f"{file_path} 顶层不能为空对象")
for raw_key, raw_value in value.items():
if not isinstance(raw_key, str):
raise InvalidTranslationFileError(f"{file_path} 中存在非字符串 key")
normalized_key = raw_key.strip()
if not normalized_key:
raise InvalidTranslationFileError(f"{file_path} 中存在空字符串 key")
current_key_parts = [*current_parent_keys, normalized_key]
current_key = ".".join(current_key_parts)
if isinstance(raw_value, str):
if current_key in translations:
raise DuplicateTranslationKeyError(f"{file_path} 中存在重复 key: '{current_key}'")
translations[current_key] = raw_value
continue
if is_plural_translation_node(raw_value):
if current_key in translations:
raise DuplicateTranslationKeyError(f"{file_path} 中存在重复 key: '{current_key}'")
translations[current_key] = validate_translation_value(current_key, raw_value, file_path)
continue
if isinstance(raw_value, dict):
flatten_dashboard_translation_mapping(raw_value, file_path, translations, current_key_parts)
continue
raise InvalidTranslationFileError(f"{file_path} 中的 key '{current_key}' 必须是字符串或对象")
def load_dashboard_translation_file(file_path: Path) -> dict[str, TranslationValue]:
try:
raw_payload = json.loads(file_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
raise InvalidTranslationFileError(f"{file_path} 不是合法 JSON: {exc}") from exc
if not isinstance(raw_payload, dict):
raise InvalidTranslationFileError(f"{file_path} 顶层必须是 JSON object")
translations: dict[str, TranslationValue] = {}
flatten_dashboard_translation_mapping(raw_payload, file_path, translations)
return translations
def load_dashboard_locale_catalog(
locale: str,
locales_root: Path | None = None,
) -> dict[str, TranslationValue]:
locale_file = get_dashboard_locales_root(locales_root) / f"{locale}.json"
if not locale_file.exists():
raise LocaleNotFoundError(f"未找到 locale 文件: {locale_file}")
return load_dashboard_translation_file(locale_file)
def validate_dashboard_json_locales(locales_root: Path | None = None) -> list[str]:
resolved_locales_root = get_dashboard_locales_root(locales_root)
locales = discover_dashboard_locales(resolved_locales_root)
errors: list[str] = []
if DASHBOARD_DEFAULT_LOCALE not in locales:
errors.append(f"[dashboard] 缺少默认 locale 文件: {DASHBOARD_DEFAULT_LOCALE}.json")
return errors
catalogs: dict[str, dict[str, TranslationValue]] = {}
for locale in locales:
try:
catalogs[locale] = load_dashboard_locale_catalog(locale, resolved_locales_root)
except Exception as exc:
errors.append(f"[dashboard:{locale}] 加载失败: {exc}")
source_catalog = catalogs.get(DASHBOARD_DEFAULT_LOCALE)
if source_catalog is None:
return errors
source_keys = set(source_catalog.keys())
for locale, catalog in catalogs.items():
if locale == DASHBOARD_DEFAULT_LOCALE:
continue
locale_label = f"dashboard:{locale}"
locale_keys = set(catalog.keys())
for key in sorted(source_keys - locale_keys):
errors.append(f"[{locale_label}] 缺少 key: {key}")
for key in sorted(locale_keys - source_keys):
errors.append(f"[{locale_label}] 存在多余 key: {key}")
for key in sorted(source_keys & locale_keys):
source_value = source_catalog[key]
target_value = catalog[key]
validate_translation_pair(
key,
source_value,
target_value,
locale,
errors,
placeholder_extractor=extract_i18next_placeholders,
locale_label=locale_label,
)
if isinstance(source_value, str) == isinstance(target_value, str):
validate_locale_content(key, source_value, target_value, locale, errors, locale_label=locale_label)
return errors
def validate_json_locales(locales_root: Path | None = None) -> list[str]:
@@ -219,6 +391,7 @@ def _print_warnings(warnings: list[str]) -> None:
def main() -> int:
errors = validate_json_locales()
errors.extend(validate_dashboard_json_locales())
prompt_errors, prompt_warnings = validate_prompt_templates()
errors.extend(prompt_errors)