ci: tighten crowdin and i18n PR checks

This commit is contained in:
春河晴
2026-03-13 00:35:11 +09:00
parent 6bac2b9331
commit 4b7ee3923c
6 changed files with 132 additions and 15 deletions

View File

@@ -2,6 +2,10 @@
name: PR Precheck
on: [pull_request]
permissions:
contents: read
issues: write
jobs:
conflict-check:
runs-on: ubuntu-24.04
@@ -11,19 +15,37 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Check Conflicts
id: check-conflicts
env:
BASE_REF: ${{ github.event.pull_request.base.ref }}
run: |
git fetch origin main
$conflicts = git diff --name-only --diff-filter=U origin/main...HEAD
if ($conflicts) {
echo "conflict=true" >> $env:GITHUB_OUTPUT
Write-Host "Conflicts detected in files: $conflicts"
} else {
echo "conflict=false" >> $env:GITHUB_OUTPUT
Write-Host "No conflicts detected"
}
shell: pwsh
set -euo pipefail
git fetch origin "$BASE_REF":"refs/remotes/origin/$BASE_REF" --depth=1
git config user.email "github-actions[bot]@users.noreply.github.com"
git config user.name "github-actions[bot]"
if git merge --no-commit --no-ff "origin/$BASE_REF" > /tmp/precheck-merge.log 2>&1; then
echo "conflict=false" >> "$GITHUB_OUTPUT"
echo "No conflicts detected against origin/$BASE_REF"
git merge --abort > /dev/null 2>&1 || true
exit 0
fi
if git diff --name-only --diff-filter=U | grep -q .; then
echo "conflict=true" >> "$GITHUB_OUTPUT"
echo "Conflicts detected against origin/$BASE_REF:"
git diff --name-only --diff-filter=U
else
echo "conflict=false" >> "$GITHUB_OUTPUT"
echo "Merge check returned non-zero without unmerged files against origin/$BASE_REF"
cat /tmp/precheck-merge.log
fi
git merge --abort > /dev/null 2>&1 || true
shell: bash
labeler:
runs-on: ubuntu-24.04
needs: conflict-check

View File

@@ -1,5 +1,15 @@
name: Ruff PR Check
on: [ pull_request ]
on:
pull_request:
paths:
- "*.py"
- "**/*.py"
- "pyproject.toml"
- "ruff.toml"
- ".ruff.toml"
- "setup.cfg"
- "tox.ini"
- ".pre-commit-config.yaml"
jobs:
ruff:
runs-on: ubuntu-24.04
@@ -18,4 +28,3 @@ jobs:
- name: Run Ruff Format Check
run: ruff format --check --diff
shell: pwsh

View File

@@ -85,6 +85,11 @@ Prompt 加载规则:
3. 在代码中用 `t()``tn()` 替换硬编码字符串。
4. 运行 `python scripts/i18n_validate.py` 校验结构。
对于非 `zh-CN` 的目标 locale
- 不要手工把中文 source 文案直接复制进目标语言文件后提交。
- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。
## 校验脚本
运行:
@@ -101,6 +106,7 @@ python scripts/i18n_validate.py
- 各语言 key 集合是否与 `zh-CN` 对齐
- 占位符集合是否一致
- plural 结构是否一致
-`zh-CN` locale 是否直接保留了包含中文字符的 source 文案
- prompt 模板已存在时,其占位符集合必须与 `prompts/zh-CN/` 对齐
对于 prompt 模板:

View File

@@ -1,6 +1,6 @@
{
"startup.agreement_confirm_prompt": "Type \"confirmed\" or the Chinese word \"同意\", or set environment variables \"EULA_AGREE={eula_hash}\" and \"PRIVACY_AGREE={privacy_hash}\" to continue",
"startup.agreement_confirm_retry": "Please type \"confirmed\" or \"同意\" to continue",
"startup.agreement_confirm_prompt": "Type \"confirmed\", or set environment variables \"EULA_AGREE={eula_hash}\" and \"PRIVACY_AGREE={privacy_hash}\" to continue",
"startup.agreement_confirm_retry": "Please type \"confirmed\" to continue",
"startup.agreement_reconfirm": "The EULA or Privacy Policy has been updated. Please review and confirm again before continuing.",
"startup.agreement_updated": "Updated the {agreement_name} confirmation file {file_hash}",
"startup.brain_external_world_failed": "Failed to start the brain and external world: {error}",

View File

@@ -0,0 +1,39 @@
from __future__ import annotations
from importlib.util import module_from_spec, spec_from_file_location
from pathlib import Path
import json
SCRIPT_PATH = Path(__file__).resolve().parents[2] / "scripts" / "i18n_validate.py"
MODULE_SPEC = spec_from_file_location("i18n_validate_script", SCRIPT_PATH)
assert MODULE_SPEC is not None
assert MODULE_SPEC.loader is not None
I18N_VALIDATE = module_from_spec(MODULE_SPEC)
MODULE_SPEC.loader.exec_module(I18N_VALIDATE)
def write_locale_file(locales_root: Path, locale: str, file_name: str, payload: dict[str, object]) -> None:
locale_dir = locales_root / locale
locale_dir.mkdir(parents=True, exist_ok=True)
(locale_dir / file_name).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def test_validate_json_locales_rejects_han_characters_in_english_locale(tmp_path: Path) -> None:
locales_root = tmp_path / "locales"
write_locale_file(locales_root, "zh-CN", "core.json", {"consent.prompt": "输入\"同意\"继续"})
write_locale_file(locales_root, "en-US", "core.json", {"consent.prompt": "Type \"confirmed\" or \"同意\" to continue"})
errors = I18N_VALIDATE.validate_json_locales(locales_root)
assert any("consent.prompt" in error and "仍包含中文字符" in error for error in errors)
def test_validate_json_locales_rejects_untranslated_han_source_in_other_target_locales(tmp_path: Path) -> None:
locales_root = tmp_path / "locales"
write_locale_file(locales_root, "zh-CN", "core.json", {"greeting": "你好,世界"})
write_locale_file(locales_root, "ja", "core.json", {"greeting": "你好,世界"})
errors = I18N_VALIDATE.validate_json_locales(locales_root)
assert any("greeting" in error and "直接保留了包含中文字符的 source 文案" in error for error in errors)

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
from pathlib import Path
from string import Formatter
import re
import sys
PROJECT_ROOT = Path(__file__).resolve().parents[1]
@@ -23,6 +24,7 @@ from src.common.prompt_i18n import ( # noqa: E402
)
FORMATTER = Formatter()
HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]")
def extract_placeholders(template: str) -> set[str]:
@@ -34,6 +36,41 @@ def extract_placeholders(template: str) -> set[str]:
return placeholders
def contains_han_characters(text: str) -> bool:
return HAN_CHARACTER_PATTERN.search(text) is not None
def iter_translation_strings(value: TranslationValue) -> list[str]:
if isinstance(value, str):
return [value]
return [value[category] for category in sorted(value.keys())]
def locale_requires_latin_only_validation(locale: str) -> bool:
normalized_locale = locale.lower()
return normalized_locale == "en" or normalized_locale.startswith("en-")
def validate_locale_content(
key: str,
source_value: TranslationValue,
target_value: TranslationValue,
locale: str,
errors: list[str],
) -> None:
source_texts = iter_translation_strings(source_value)
target_texts = iter_translation_strings(target_value)
if any(
source_text == target_text and contains_han_characters(source_text)
for source_text, target_text in zip(source_texts, target_texts, strict=False)
):
errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译")
if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts):
errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交")
def validate_translation_pair(
key: str,
source_value: TranslationValue,
@@ -103,7 +140,11 @@ def validate_json_locales(locales_root: Path | None = None) -> list[str]:
errors.append(f"[{locale}] 存在多余 key: {key}")
for key in sorted(source_keys & locale_keys):
validate_translation_pair(key, source_catalog[key], catalog[key], locale, errors)
source_value = source_catalog[key]
target_value = catalog[key]
validate_translation_pair(key, source_value, target_value, locale, errors)
if isinstance(source_value, str) == isinstance(target_value, str):
validate_locale_content(key, source_value, target_value, locale, errors)
return errors