diff --git a/.github/workflows/precheck.yml b/.github/workflows/precheck.yml index f678cb17..bf6f9529 100644 --- a/.github/workflows/precheck.yml +++ b/.github/workflows/precheck.yml @@ -2,6 +2,10 @@ name: PR Precheck on: [pull_request] +permissions: + contents: read + issues: write + jobs: conflict-check: runs-on: ubuntu-24.04 @@ -11,19 +15,37 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} - name: Check Conflicts id: check-conflicts + env: + BASE_REF: ${{ github.event.pull_request.base.ref }} run: | - git fetch origin main - $conflicts = git diff --name-only --diff-filter=U origin/main...HEAD - if ($conflicts) { - echo "conflict=true" >> $env:GITHUB_OUTPUT - Write-Host "Conflicts detected in files: $conflicts" - } else { - echo "conflict=false" >> $env:GITHUB_OUTPUT - Write-Host "No conflicts detected" - } - shell: pwsh + set -euo pipefail + + git fetch origin "$BASE_REF":"refs/remotes/origin/$BASE_REF" --depth=1 + git config user.email "github-actions[bot]@users.noreply.github.com" + git config user.name "github-actions[bot]" + + if git merge --no-commit --no-ff "origin/$BASE_REF" > /tmp/precheck-merge.log 2>&1; then + echo "conflict=false" >> "$GITHUB_OUTPUT" + echo "No conflicts detected against origin/$BASE_REF" + git merge --abort > /dev/null 2>&1 || true + exit 0 + fi + + if git diff --name-only --diff-filter=U | grep -q .; then + echo "conflict=true" >> "$GITHUB_OUTPUT" + echo "Conflicts detected against origin/$BASE_REF:" + git diff --name-only --diff-filter=U + else + echo "conflict=false" >> "$GITHUB_OUTPUT" + echo "Merge check returned non-zero without unmerged files against origin/$BASE_REF" + cat /tmp/precheck-merge.log + fi + + git merge --abort > /dev/null 2>&1 || true + shell: bash labeler: runs-on: ubuntu-24.04 needs: conflict-check diff --git a/.github/workflows/ruff-pr.yml b/.github/workflows/ruff-pr.yml index 1b6f795b..1176eb0c 100644 --- a/.github/workflows/ruff-pr.yml +++ b/.github/workflows/ruff-pr.yml @@ -1,5 +1,15 @@ name: Ruff PR Check -on: [ pull_request ] +on: + pull_request: + paths: + - "*.py" + - "**/*.py" + - "pyproject.toml" + - "ruff.toml" + - ".ruff.toml" + - "setup.cfg" + - "tox.ini" + - ".pre-commit-config.yaml" jobs: ruff: runs-on: ubuntu-24.04 @@ -18,4 +28,3 @@ jobs: - name: Run Ruff Format Check run: ruff format --check --diff shell: pwsh - diff --git a/docs/i18n.md b/docs/i18n.md index 2dbddbb3..b7760358 100644 --- a/docs/i18n.md +++ b/docs/i18n.md @@ -85,6 +85,11 @@ Prompt 加载规则: 3. 在代码中用 `t()` 或 `tn()` 替换硬编码字符串。 4. 运行 `python scripts/i18n_validate.py` 校验结构。 +对于非 `zh-CN` 的目标 locale: + +- 不要手工把中文 source 文案直接复制进目标语言文件后提交。 +- 英文 locale 文件中不应保留中文字符;这类残留会被校验脚本拦截。 + ## 校验脚本 运行: @@ -101,6 +106,7 @@ python scripts/i18n_validate.py - 各语言 key 集合是否与 `zh-CN` 对齐 - 占位符集合是否一致 - plural 结构是否一致 +- 非 `zh-CN` locale 是否直接保留了包含中文字符的 source 文案 - prompt 模板已存在时,其占位符集合必须与 `prompts/zh-CN/` 对齐 对于 prompt 模板: diff --git a/locales/en-US/startup.json b/locales/en-US/startup.json index a9971eff..e313c6e1 100644 --- a/locales/en-US/startup.json +++ b/locales/en-US/startup.json @@ -1,6 +1,6 @@ { - "startup.agreement_confirm_prompt": "Type \"confirmed\" or the Chinese word \"同意\", or set environment variables \"EULA_AGREE={eula_hash}\" and \"PRIVACY_AGREE={privacy_hash}\" to continue", - "startup.agreement_confirm_retry": "Please type \"confirmed\" or \"同意\" to continue", + "startup.agreement_confirm_prompt": "Type \"confirmed\", or set environment variables \"EULA_AGREE={eula_hash}\" and \"PRIVACY_AGREE={privacy_hash}\" to continue", + "startup.agreement_confirm_retry": "Please type \"confirmed\" to continue", "startup.agreement_reconfirm": "The EULA or Privacy Policy has been updated. Please review and confirm again before continuing.", "startup.agreement_updated": "Updated the {agreement_name} confirmation file {file_hash}", "startup.brain_external_world_failed": "Failed to start the brain and external world: {error}", diff --git a/pytests/i18n_test/test_i18n_validate.py b/pytests/i18n_test/test_i18n_validate.py new file mode 100644 index 00000000..5ce8ea78 --- /dev/null +++ b/pytests/i18n_test/test_i18n_validate.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path + +import json + +SCRIPT_PATH = Path(__file__).resolve().parents[2] / "scripts" / "i18n_validate.py" +MODULE_SPEC = spec_from_file_location("i18n_validate_script", SCRIPT_PATH) +assert MODULE_SPEC is not None +assert MODULE_SPEC.loader is not None +I18N_VALIDATE = module_from_spec(MODULE_SPEC) +MODULE_SPEC.loader.exec_module(I18N_VALIDATE) + + +def write_locale_file(locales_root: Path, locale: str, file_name: str, payload: dict[str, object]) -> None: + locale_dir = locales_root / locale + locale_dir.mkdir(parents=True, exist_ok=True) + (locale_dir / file_name).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + +def test_validate_json_locales_rejects_han_characters_in_english_locale(tmp_path: Path) -> None: + locales_root = tmp_path / "locales" + write_locale_file(locales_root, "zh-CN", "core.json", {"consent.prompt": "输入\"同意\"继续"}) + write_locale_file(locales_root, "en-US", "core.json", {"consent.prompt": "Type \"confirmed\" or \"同意\" to continue"}) + + errors = I18N_VALIDATE.validate_json_locales(locales_root) + + assert any("consent.prompt" in error and "仍包含中文字符" in error for error in errors) + + +def test_validate_json_locales_rejects_untranslated_han_source_in_other_target_locales(tmp_path: Path) -> None: + locales_root = tmp_path / "locales" + write_locale_file(locales_root, "zh-CN", "core.json", {"greeting": "你好,世界"}) + write_locale_file(locales_root, "ja", "core.json", {"greeting": "你好,世界"}) + + errors = I18N_VALIDATE.validate_json_locales(locales_root) + + assert any("greeting" in error and "直接保留了包含中文字符的 source 文案" in error for error in errors) diff --git a/scripts/i18n_validate.py b/scripts/i18n_validate.py index d2c654d3..9b3a4313 100644 --- a/scripts/i18n_validate.py +++ b/scripts/i18n_validate.py @@ -3,6 +3,7 @@ from __future__ import annotations from pathlib import Path from string import Formatter +import re import sys PROJECT_ROOT = Path(__file__).resolve().parents[1] @@ -23,6 +24,7 @@ from src.common.prompt_i18n import ( # noqa: E402 ) FORMATTER = Formatter() +HAN_CHARACTER_PATTERN = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]") def extract_placeholders(template: str) -> set[str]: @@ -34,6 +36,41 @@ def extract_placeholders(template: str) -> set[str]: return placeholders +def contains_han_characters(text: str) -> bool: + return HAN_CHARACTER_PATTERN.search(text) is not None + + +def iter_translation_strings(value: TranslationValue) -> list[str]: + if isinstance(value, str): + return [value] + return [value[category] for category in sorted(value.keys())] + + +def locale_requires_latin_only_validation(locale: str) -> bool: + normalized_locale = locale.lower() + return normalized_locale == "en" or normalized_locale.startswith("en-") + + +def validate_locale_content( + key: str, + source_value: TranslationValue, + target_value: TranslationValue, + locale: str, + errors: list[str], +) -> None: + source_texts = iter_translation_strings(source_value) + target_texts = iter_translation_strings(target_value) + + if any( + source_text == target_text and contains_han_characters(source_text) + for source_text, target_text in zip(source_texts, target_texts, strict=False) + ): + errors.append(f"[{locale}] key '{key}' 直接保留了包含中文字符的 source 文案,请通过 Crowdin 提供目标语言翻译") + + if locale_requires_latin_only_validation(locale) and any(contains_han_characters(text) for text in target_texts): + errors.append(f"[{locale}] key '{key}' 仍包含中文字符,请移除源语言残留后再提交") + + def validate_translation_pair( key: str, source_value: TranslationValue, @@ -103,7 +140,11 @@ def validate_json_locales(locales_root: Path | None = None) -> list[str]: errors.append(f"[{locale}] 存在多余 key: {key}") for key in sorted(source_keys & locale_keys): - validate_translation_pair(key, source_catalog[key], catalog[key], locale, errors) + source_value = source_catalog[key] + target_value = catalog[key] + validate_translation_pair(key, source_value, target_value, locale, errors) + if isinstance(source_value, str) == isinstance(target_value, str): + validate_locale_content(key, source_value, target_value, locale, errors) return errors