diff --git a/.github/workflows/crowdin-sync.yml b/.github/workflows/crowdin-sync.yml new file mode 100644 index 00000000..2c17a99c --- /dev/null +++ b/.github/workflows/crowdin-sync.yml @@ -0,0 +1,32 @@ +name: Crowdin Sync + +on: + workflow_dispatch: + push: + branches: + - main + - r-dev + paths: + - "crowdin.yml" + - "locales/zh-CN/*.json" + +jobs: + sync: + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v4 + - name: Sync translations with Crowdin + uses: crowdin/github-action@v2 + with: + config: crowdin.yml + upload_sources: true + upload_translations: false + download_translations: true + create_pull_request: true + pull_request_title: "chore(i18n): sync Crowdin translations" + pull_request_body: "Automated translation sync from Crowdin." + commit_message: "chore(i18n): sync Crowdin translations" + env: + CROWDIN_PROJECT_ID: ${{ secrets.CROWDIN_PROJECT_ID }} + CROWDIN_PERSONAL_TOKEN: ${{ secrets.CROWDIN_PERSONAL_TOKEN }} diff --git a/.github/workflows/i18n-validate.yml b/.github/workflows/i18n-validate.yml new file mode 100644 index 00000000..cc64dd16 --- /dev/null +++ b/.github/workflows/i18n-validate.yml @@ -0,0 +1,28 @@ +name: i18n Validate + +on: + pull_request: + paths: + - "locales/**/*.json" + - "scripts/i18n_validate.py" + - "src/common/i18n/**/*.py" + push: + branches: + - main + - r-dev + paths: + - "locales/**/*.json" + - "scripts/i18n_validate.py" + - "src/common/i18n/**/*.py" + +jobs: + validate: + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Validate locale files + run: python scripts/i18n_validate.py diff --git a/crowdin.yml b/crowdin.yml new file mode 100644 index 00000000..2a10839a --- /dev/null +++ b/crowdin.yml @@ -0,0 +1,12 @@ +project_id_env: CROWDIN_PROJECT_ID +api_token_env: CROWDIN_PERSONAL_TOKEN +base_path: . +preserve_hierarchy: true + +files: + - source: /locales/zh-CN/*.json + translation: /locales/%locale%/%original_file_name% + languages_mapping: + locale: + en-US: en-US + ja: ja diff --git a/docs/i18n.md b/docs/i18n.md new file mode 100644 index 00000000..41157d3b --- /dev/null +++ b/docs/i18n.md @@ -0,0 +1,102 @@ +# i18n Guide + +MaiBot 现在使用 `JSON + Crowdin + Babel` 的国际化方案,不依赖 gettext 的 `.po/.mo` 运行时。 + +## 目录结构 + +翻译文件位于 `locales//*.json`,当前默认语言是 `zh-CN`。 + +建议按模块拆分文件: + +- `core.json` +- `startup.json` +- `config.json` + +## 在代码中使用 + +统一从 [src/common/i18n/__init__.py](/Users/sayaka/workspace/MaiBot/src/common/i18n/__init__.py) 导入: + +```python +from src.common.i18n import t, tn + +logger.info(t("startup.launching_script", script_file=script_file)) +logger.info(tn("core.tasks_cancelled", count)) +``` + +可用能力: + +- `t(key, locale=None, **kwargs)`:普通翻译 +- `tn(key, count, locale=None, **kwargs)`:plural 翻译 +- `set_locale(locale)` / `get_locale()`:设置或读取当前默认 locale +- `format_datetime_localized(...)` +- `format_number_localized(...)` +- `format_decimal_localized(...)` + +## locale 优先级 + +运行时按以下顺序决定 locale: + +1. 显式传入 `locale` +2. 当前上下文中的 locale 覆盖(如使用 `use_locale(...)`) +3. 环境变量 `MAIBOT_LOCALE` +4. 默认值 `zh-CN` + +## key 规范 + +- 使用稳定的点分 key,例如 `startup.env_created` +- 全部小写 +- 不要把中文原文直接当 key + +## 新增翻译的步骤 + +1. 先在 `locales/zh-CN/*.json` 添加 source 文案。 +2. 在 `locales/en-US/*.json` 中补上同名 key。 +3. 在代码中用 `t()` 或 `tn()` 替换硬编码字符串。 +4. 运行 `python scripts/i18n_validate.py` 校验结构。 + +## 校验脚本 + +运行: + +```bash +python scripts/i18n_validate.py +``` + +校验内容包括: + +- JSON 语法是否合法 +- 是否存在重复 key +- 是否存在空字符串 key +- 各语言 key 集合是否与 `zh-CN` 对齐 +- 占位符集合是否一致 +- plural 结构是否一致 + +## 候选扫描 + +如果你想继续做下一批迁移,可以运行: + +```bash +python scripts/i18n_extract_candidates.py +``` + +这个脚本会扫描仓库中的 Python 文件,输出仍然包含中文字符串常量的位置,方便人工挑选下一批适合迁移到 i18n 的文案。 + +## Crowdin + +项目根目录的 [crowdin.yml](/Users/sayaka/workspace/MaiBot/crowdin.yml) 使用 `locales/zh-CN/*.json` 作为 source。 + +GitHub Actions 中的 [crowdin-sync.yml](/Users/sayaka/workspace/MaiBot/.github/workflows/crowdin-sync.yml) 会负责和 Crowdin 同步。 + +## 当前迁移范围 + +这一批已经覆盖: + +- `bot.py` 启动、重启、退出与协议确认提示 +- `src/config` 中第一批配置加载、热重载、校验异常提示 +- `src/main.py` 的主要启动链路提示 + +暂不建议立即迁移: + +- 大段 prompt 模板 +- 内部协议字段 +- debug-only 文案 diff --git a/locales/en-US/config.json b/locales/en-US/config.json new file mode 100644 index 00000000..b2975f4b --- /dev/null +++ b/locales/en-US/config.json @@ -0,0 +1,39 @@ +{ + "config.added_count": "Added config items: {count}", + "config.added_item": "Added config entry: {attribute}", + "config.api_base_url_empty": "The API base URL cannot be empty. Please configure a valid base URL.", + "config.api_key_empty": "The API key cannot be empty. Please configure a valid API key.", + "config.api_provider_name_duplicate": "Duplicate API provider names were found. Please check the config file.", + "config.api_provider_name_empty": "The API provider name cannot be empty. Please configure a valid name.", + "config.api_providers_empty": "The API provider list cannot be empty. Please configure at least one valid API provider.", + "config.change_summary_header": "-------- Config File Change Summary --------", + "config.current_version": "Current MaiCore version: {version}", + "config.file_change_detected": "Detected config file changes and triggered hot reload", + "config.file_updated": "The {file_name} config file has been updated. Old: {old_version} -> New: {new_version}. Please review the new file to avoid losing important settings.", + "config.file_watcher_started": "Config file watcher started", + "config.file_watcher_stop_stats": "Config file watcher stop stats: batches={batches}, changes={changes}, ok={ok}, failed={failed}, timeout={timeout}, cooldown_skip={cooldown_skip}, restart={restart}", + "config.global_not_initialized": "global_config is not initialized", + "config.hot_reload_completed": "Config hot reload completed", + "config.invalid_inner_version": "The config file inner.version field has an invalid type", + "config.legacy_migrated": "A legacy config structure was detected and automatically migrated: {reason}. Please review and save the regenerated config file later.", + "config.loaded": "Fresh and delicious!", + "config.loading": "Tasting the config files...", + "config.missing_inner_version": "The config file is missing inner version information", + "config.model_api_provider_empty": "The API provider cannot be empty. Please configure a valid API provider.", + "config.model_api_provider_missing": "Model '{model_name}' references a missing api_provider '{api_provider}'", + "config.model_identifier_empty": "Model '{model_name}' must not have an empty model_identifier", + "config.model_identifier_empty_generic": "The model identifier cannot be empty. Please configure a valid model identifier.", + "config.model_name_duplicate": "Duplicate model names were found. Please check the config file.", + "config.model_name_empty": "The model name cannot be empty. Please configure a valid model name.", + "config.model_not_initialized": "model_config is not initialized", + "config.models_empty": "The model list cannot be empty. Please configure at least one valid model.", + "config.parse_failed": "Failed to parse config file {file_name}", + "config.reload_callback_failed": "A config reload callback failed: {error}", + "config.reload_failed": "Config reload failed: {error}", + "config.reload_skipped_too_frequent": "Config file changes happened too frequently, so this reload was skipped", + "config.reload_timeout": "Config hot reload timed out (>{timeout_seconds}s)", + "config.removed_count": "Removed config items: {count}", + "config.removed_item": "Removed config entry: {attribute}", + "config.version_update_detected": "A config version update was detected. Hot reload only refreshed in-memory data.", + "config.write_unsupported_type": "Config writing only supports ConfigBase subclasses" +} diff --git a/locales/en-US/core.json b/locales/en-US/core.json new file mode 100644 index 00000000..ef93ac20 --- /dev/null +++ b/locales/en-US/core.json @@ -0,0 +1,7 @@ +{ + "core.operation_failed": "Operation failed: {error}", + "core.tasks_cancelled": { + "one": "Cancelled {count} task", + "other": "Cancelled {count} tasks" + } +} diff --git a/locales/en-US/startup.json b/locales/en-US/startup.json new file mode 100644 index 00000000..a9971eff --- /dev/null +++ b/locales/en-US/startup.json @@ -0,0 +1,58 @@ +{ + "startup.agreement_confirm_prompt": "Type \"confirmed\" or the Chinese word \"同意\", or set environment variables \"EULA_AGREE={eula_hash}\" and \"PRIVACY_AGREE={privacy_hash}\" to continue", + "startup.agreement_confirm_retry": "Please type \"confirmed\" or \"同意\" to continue", + "startup.agreement_reconfirm": "The EULA or Privacy Policy has been updated. Please review and confirm again before continuing.", + "startup.agreement_updated": "Updated the {agreement_name} confirmation file {file_hash}", + "startup.brain_external_world_failed": "Failed to start the brain and external world: {error}", + "startup.chat_manager_initialized": "Chat manager initialized successfully", + "startup.child_process_force_kill": "The child process did not respond and was forcefully terminated...", + "startup.compiling_shaders": "Compiling shaders: 1/114514", + "startup.dev_branch_warning": "Warning: the Dev branch is in an unstable development state. Plugins and the WebUI may not work properly.", + "startup.easter_egg": "Many years later, facing the AI execution squad, Zhang San would recall that afternoon in 2023 when he discussed artificial intelligence at a meeting.", + "startup.emoji_manager_initialized": "Emoji manager initialized successfully", + "startup.env_auto_create_failed": "Failed to auto-create .env: {error}", + "startup.env_created": "No .env file was found. One has been created from template/template.env", + "startup.env_file_missing": "The .env file does not exist. Please create it and configure the required environment variables.", + "startup.env_template_missing": "No .env file was found, and template/template.env is also missing", + "startup.eula_name": "EULA", + "startup.eula_privacy_checked": "Finished checking the EULA and Privacy Policy", + "startup.event_loop_closed": "[Main] Event loop closed", + "startup.file_not_found": "{file_type} file does not exist", + "startup.graceful_shutdown_error": "An error occurred during graceful shutdown: {error}", + "startup.initialization_completed_banner": "\n--------------------------------\nAll systems have been initialized. {nickname} is now awake.\n--------------------------------\nIf you want to customize {nickname}, please visit: https://docs.mai-mai.org/manual/usage/\nIf you run into issues, please check the docs: https://docs.mai-mai.org/\n--------------------------------\nIf you want to write or learn about plugins, see the developer docs: https://docs.mai-mai.org/develop/\n--------------------------------\nIf you need model usage or MaiBot statistics, open the maibot_statistics.html file in the project root.\n", + "startup.initialization_completed_cycles": "Initialization completed after {init_time} neural firing cycles", + "startup.interrupt_received": "Interrupt signal received. Starting graceful shutdown...", + "startup.launching_script": "Starting {script_file}...", + "startup.logging_shutdown_error": "Failed to shut down logging: {error}", + "startup.main_error": "The main program encountered an exception: {error}", + "startup.opensource_free_notice": " This project is fully free and open source under the GPL-3.0 license", + "startup.opensource_group": " Official group chat: ", + "startup.opensource_group_value": "1006149251", + "startup.opensource_repo": " Official repository: ", + "startup.opensource_repo_value": "https://github.com/MaiM-with-u/MaiBot", + "startup.opensource_resale_warning": " Reselling this software or hiding its open source nature violates the license!", + "startup.opensource_scamming_notice": " If someone sold you this software, you were scammed!", + "startup.opensource_title": " ★ MaiBot - Open Source AI Chat Bot ★", + "startup.opensource_docs": " Official docs: ", + "startup.opensource_docs_value": "https://docs.mai-mai.org", + "startup.prepare_exit": "[Main] Preparing to exit...", + "startup.privacy_name": "Privacy Policy", + "startup.program_exited": "Program exited (exit code {return_code})", + "startup.remaining_tasks_cancelling": { + "one": "Cancelling {count} remaining task...", + "other": "Cancelling {count} remaining tasks..." + }, + "startup.remaining_tasks_cancelled": "All remaining tasks were cancelled successfully", + "startup.remaining_tasks_cancel_error": "An error occurred while waiting for tasks to cancel: {error}", + "startup.remaining_tasks_cancel_timeout": "Timed out while waiting for task cancellation. Continuing shutdown.", + "startup.restart_requested": "Restart requested (exit code {exit_code}). Restarting...", + "startup.restart_signal_received": "Restart signal received. Exiting and requesting restart...", + "startup.schedule_cancelled": "Scheduled tasks were cancelled", + "startup.shutdown_completed": "MaiBot shut down gracefully", + "startup.shutdown_failed": "MaiBot failed to shut down: {error}", + "startup.shutdown_started": "Starting graceful shutdown for MaiBot...", + "startup.waking_up": "Waking up {nickname}......", + "startup.webui_disabled": "WebUI is disabled", + "startup.webui_server_init_failed": "Failed to initialize the WebUI server: {error}", + "startup.worker_dir_set": "Working directory set to: {script_dir}" +} diff --git a/locales/zh-CN/config.json b/locales/zh-CN/config.json new file mode 100644 index 00000000..503c0210 --- /dev/null +++ b/locales/zh-CN/config.json @@ -0,0 +1,39 @@ +{ + "config.added_count": "新增配置数量: {count}", + "config.added_item": "配置文件中新增配置项: {attribute}", + "config.api_base_url_empty": "API基础URL不能为空,请在配置中设置有效的基础URL。", + "config.api_key_empty": "API密钥不能为空,请在配置中设置有效的API密钥。", + "config.api_provider_name_duplicate": "API提供商名称存在重复,请检查配置文件。", + "config.api_provider_name_empty": "API提供商名称不能为空,请在配置中设置有效的名称。", + "config.api_providers_empty": "API提供商列表不能为空,请在配置中设置有效的API提供商列表。", + "config.change_summary_header": "-------- 配置文件变更信息 --------", + "config.current_version": "MaiCore 当前版本: {version}", + "config.file_change_detected": "检测到配置文件变更,触发热重载", + "config.file_updated": "{file_name} 配置文件已经更新。Old: {old_version} -> New: {new_version},建议检查新配置文件中的内容,以免丢失重要信息", + "config.file_watcher_started": "配置文件监视器已启动", + "config.file_watcher_stop_stats": "配置文件监视器停止统计: batches={batches}, changes={changes}, ok={ok}, failed={failed}, timeout={timeout}, cooldown_skip={cooldown_skip}, restart={restart}", + "config.global_not_initialized": "global_config 未初始化", + "config.hot_reload_completed": "配置热重载完成", + "config.invalid_inner_version": "配置文件 inner.version 类型错误", + "config.legacy_migrated": "检测到旧版配置结构,已尝试自动修复: {reason}。建议稍后检查并保存生成的新配置文件。", + "config.loaded": "非常的新鲜,非常的美味!", + "config.loading": "正在品鉴配置文件...", + "config.missing_inner_version": "配置文件缺少 inner 版本信息", + "config.model_api_provider_empty": "API提供商不能为空,请在配置中设置有效的API提供商。", + "config.model_api_provider_missing": "模型 '{model_name}' 的 api_provider '{api_provider}' 不存在", + "config.model_identifier_empty": "模型 '{model_name}' 的 model_identifier 不能为空", + "config.model_identifier_empty_generic": "模型标识符不能为空,请在配置中设置有效的模型标识符。", + "config.model_name_duplicate": "模型名称存在重复,请检查配置文件。", + "config.model_name_empty": "模型名称不能为空,请在配置中设置有效的模型名称。", + "config.model_not_initialized": "model_config 未初始化", + "config.models_empty": "模型列表不能为空,请在配置中设置有效的模型列表。", + "config.parse_failed": "配置文件 {file_name} 解析失败", + "config.reload_callback_failed": "配置重载回调执行失败: {error}", + "config.reload_failed": "配置重载失败: {error}", + "config.reload_skipped_too_frequent": "文件变更触发过于频繁,已跳过本次重载", + "config.reload_timeout": "配置热重载超时(>{timeout_seconds}s)", + "config.removed_count": "移除配置数量: {count}", + "config.removed_item": "移除配置项: {attribute}", + "config.version_update_detected": "检测到配置版本更新,热重载仅更新内存数据", + "config.write_unsupported_type": "配置写入只支持 ConfigBase 子类" +} diff --git a/locales/zh-CN/core.json b/locales/zh-CN/core.json new file mode 100644 index 00000000..4ca25347 --- /dev/null +++ b/locales/zh-CN/core.json @@ -0,0 +1,7 @@ +{ + "core.operation_failed": "操作失败: {error}", + "core.tasks_cancelled": { + "one": "已取消 {count} 个任务", + "other": "已取消 {count} 个任务" + } +} diff --git a/locales/zh-CN/startup.json b/locales/zh-CN/startup.json new file mode 100644 index 00000000..715afd79 --- /dev/null +++ b/locales/zh-CN/startup.json @@ -0,0 +1,58 @@ +{ + "startup.agreement_confirm_prompt": "输入\"同意\"或\"confirmed\",或设置环境变量\"EULA_AGREE={eula_hash}\"和\"PRIVACY_AGREE={privacy_hash}\"继续运行", + "startup.agreement_confirm_retry": "请输入\"同意\"或\"confirmed\"以继续运行", + "startup.agreement_reconfirm": "EULA 或隐私条款内容已更新,请在阅读后重新确认,继续运行视为同意更新后的以上两款协议", + "startup.agreement_updated": "更新 {agreement_name} 确认文件 {file_hash}", + "startup.brain_external_world_failed": "启动大脑和外部世界失败: {error}", + "startup.chat_manager_initialized": "聊天管理器初始化成功", + "startup.child_process_force_kill": "子进程未响应,强制关闭...", + "startup.compiling_shaders": "正在编译着色器:1/114514", + "startup.dev_branch_warning": "警告:Dev 进入不稳定开发状态,任何插件与 WebUI 均可能无法正常工作!", + "startup.easter_egg": "多年以后,面对AI行刑队,张三将会回想起他2023年在会议上讨论人工智能的那个下午", + "startup.emoji_manager_initialized": "表情包管理器初始化成功", + "startup.env_auto_create_failed": "自动创建 .env 失败: {error}", + "startup.env_created": "未找到 .env,已从 template/template.env 自动创建", + "startup.env_file_missing": ".env 文件不存在,请创建并配置所需的环境变量", + "startup.env_template_missing": "未找到 .env 文件,也未找到模板 template/template.env", + "startup.eula_name": "EULA", + "startup.eula_privacy_checked": "检查 EULA 和隐私条款完成", + "startup.event_loop_closed": "[主程序] 事件循环已关闭", + "startup.file_not_found": "{file_type} 文件不存在", + "startup.graceful_shutdown_error": "优雅关闭时发生错误: {error}", + "startup.initialization_completed_banner": "\n--------------------------------\n全部系统初始化完成,{nickname} 已成功唤醒\n--------------------------------\n如果想要自定义 {nickname} 的功能,请查阅:https://docs.mai-mai.org/manual/usage/\n或者遇到了问题,请访问我们的文档:https://docs.mai-mai.org/\n--------------------------------\n如果你想要编写或了解插件相关内容,请访问开发文档 https://docs.mai-mai.org/develop/\n--------------------------------\n如果你需要查阅模型的消耗以及麦麦的统计数据,请访问根目录的 maibot_statistics.html 文件\n", + "startup.initialization_completed_cycles": "初始化完成,神经元放电 {init_time} 次", + "startup.interrupt_received": "收到中断信号,正在优雅关闭...", + "startup.launching_script": "正在启动 {script_file}...", + "startup.logging_shutdown_error": "关闭日志系统时出错: {error}", + "startup.main_error": "主程序发生异常: {error}", + "startup.opensource_free_notice": " 本项目是完全免费的开源软件,基于 GPL-3.0 协议发布", + "startup.opensource_group": " 官方群聊: ", + "startup.opensource_group_value": "1006149251", + "startup.opensource_repo": " 官方仓库: ", + "startup.opensource_repo_value": "https://github.com/MaiM-with-u/MaiBot", + "startup.opensource_resale_warning": " 将本软件作为「商品」倒卖、隐瞒开源性质均违反协议!", + "startup.opensource_scamming_notice": " 如果有人向你「出售本软件」,你被骗了!", + "startup.opensource_title": " ★ MaiBot - 开源 AI 聊天机器人 ★", + "startup.opensource_docs": " 官方文档: ", + "startup.opensource_docs_value": "https://docs.mai-mai.org", + "startup.prepare_exit": "[主程序] 准备退出...", + "startup.privacy_name": "隐私条款", + "startup.program_exited": "程序已退出(退出码 {return_code})", + "startup.remaining_tasks_cancelling": { + "one": "正在取消 {count} 个剩余任务...", + "other": "正在取消 {count} 个剩余任务..." + }, + "startup.remaining_tasks_cancelled": "所有剩余任务已成功取消", + "startup.remaining_tasks_cancel_error": "等待任务取消时发生异常: {error}", + "startup.remaining_tasks_cancel_timeout": "等待任务取消超时,强制继续关闭", + "startup.restart_requested": "检测到重启请求(退出码 {exit_code}),正在重启...", + "startup.restart_signal_received": "收到重启信号,准备退出并请求重启...", + "startup.schedule_cancelled": "调度任务已取消", + "startup.shutdown_completed": "麦麦优雅关闭完成", + "startup.shutdown_failed": "麦麦关闭失败: {error}", + "startup.shutdown_started": "正在优雅关闭麦麦...", + "startup.waking_up": "正在唤醒 {nickname}......", + "startup.webui_disabled": "WebUI 已禁用", + "startup.webui_server_init_failed": "初始化 WebUI 服务器失败: {error}", + "startup.worker_dir_set": "已设置工作目录为: {script_dir}" +} diff --git a/pytests/i18n_test/test_i18n.py b/pytests/i18n_test/test_i18n.py new file mode 100644 index 00000000..9c31f842 --- /dev/null +++ b/pytests/i18n_test/test_i18n.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from pathlib import Path + +import json + +import pytest + +from src.common.i18n.manager import I18nManager +from src.common.i18n.loaders import DuplicateTranslationKeyError, load_locale_catalog + + +def write_locale_file(locales_root: Path, locale: str, file_name: str, payload: dict[str, object]) -> None: + locale_dir = locales_root / locale + locale_dir.mkdir(parents=True, exist_ok=True) + file_path = locale_dir / file_name + file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + +def test_t_falls_back_to_default_locale(tmp_path: Path) -> None: + locales_root = tmp_path / "locales" + write_locale_file(locales_root, "zh-CN", "core.json", {"greeting": "你好,{name}"}) + write_locale_file(locales_root, "en-US", "core.json", {}) + + manager = I18nManager(locales_root=locales_root) + + assert manager.t("greeting", locale="en-US", name="Mai") == "你好,Mai" + + +def test_t_returns_key_when_missing_everywhere(tmp_path: Path) -> None: + locales_root = tmp_path / "locales" + write_locale_file(locales_root, "zh-CN", "core.json", {}) + write_locale_file(locales_root, "en-US", "core.json", {}) + + manager = I18nManager(locales_root=locales_root) + + assert manager.t("missing.key", locale="en-US") == "missing.key" + + +def test_tn_uses_plural_rules(tmp_path: Path) -> None: + locales_root = tmp_path / "locales" + write_locale_file( + locales_root, + "en-US", + "core.json", + { + "tasks.cancelled": { + "one": "Cancelled {count} task", + "other": "Cancelled {count} tasks", + } + }, + ) + + manager = I18nManager(default_locale="en-US", locales_root=locales_root) + + assert manager.tn("tasks.cancelled", 1) == "Cancelled 1 task" + assert manager.tn("tasks.cancelled", 2) == "Cancelled 2 tasks" + + +def test_load_locale_catalog_rejects_duplicate_keys(tmp_path: Path) -> None: + locales_root = tmp_path / "locales" + write_locale_file(locales_root, "zh-CN", "a.json", {"duplicate.key": "A"}) + write_locale_file(locales_root, "zh-CN", "b.json", {"duplicate.key": "B"}) + + with pytest.raises(DuplicateTranslationKeyError): + load_locale_catalog("zh-CN", locales_root) diff --git a/scripts/i18n_extract_candidates.py b/scripts/i18n_extract_candidates.py new file mode 100644 index 00000000..855b5429 --- /dev/null +++ b/scripts/i18n_extract_candidates.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from pathlib import Path + +import ast +import re + +PROJECT_ROOT = Path(__file__).resolve().parents[1] +DEFAULT_EXCLUDE_PARTS = { + ".git", + ".venv", + "dashboard", + "docs", + "docs-src", + "locales", +} +HAN_PATTERN = re.compile(r"[\u4e00-\u9fff]") + + +def should_skip(path: Path) -> bool: + return any(part in DEFAULT_EXCLUDE_PARTS for part in path.parts) + + +def iter_python_files(root: Path) -> list[Path]: + return sorted( + path + for path in root.rglob("*.py") + if path.is_file() and not should_skip(path.relative_to(root)) + ) + + +class CandidateExtractor(ast.NodeVisitor): + def __init__(self) -> None: + self._docstring_nodes: set[ast.AST] = set() + self.candidates: list[tuple[int, str]] = [] + + def visit_Module(self, node: ast.Module) -> None: + self._mark_docstring_node(node) + self.generic_visit(node) + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + self._mark_docstring_node(node) + self.generic_visit(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._mark_docstring_node(node) + self.generic_visit(node) + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._mark_docstring_node(node) + self.generic_visit(node) + + def visit_Constant(self, node: ast.Constant) -> None: + if node in self._docstring_nodes: + return + if isinstance(node.value, str) and HAN_PATTERN.search(node.value): + self.candidates.append((node.lineno, node.value.strip())) + self.generic_visit(node) + + def _mark_docstring_node(self, node: ast.Module | ast.ClassDef | ast.AsyncFunctionDef | ast.FunctionDef) -> None: + if not node.body: + return + first_stmt = node.body[0] + if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant): + if isinstance(first_stmt.value.value, str): + self._docstring_nodes.add(first_stmt.value) + + +def extract_candidates(file_path: Path) -> list[tuple[int, str]]: + source = file_path.read_text(encoding="utf-8") + tree = ast.parse(source, filename=str(file_path)) + extractor = CandidateExtractor() + extractor.visit(tree) + return extractor.candidates + + +def main() -> int: + for file_path in iter_python_files(PROJECT_ROOT): + for lineno, text in extract_candidates(file_path): + print(f"{file_path.relative_to(PROJECT_ROOT)}:{lineno}: {text}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/i18n_validate.py b/scripts/i18n_validate.py new file mode 100644 index 00000000..632ce12c --- /dev/null +++ b/scripts/i18n_validate.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from pathlib import Path +from string import Formatter + +import sys + +PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +from src.common.i18n.loaders import ( # noqa: E402 + DEFAULT_LOCALE, + TranslationValue, + discover_locales, + get_locales_root, + load_locale_catalog, +) + +FORMATTER = Formatter() + + +def extract_placeholders(template: str) -> set[str]: + placeholders: set[str] = set() + for _, field_name, _, _ in FORMATTER.parse(template): + if not field_name: + continue + placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0]) + return placeholders + + +def validate_translation_pair( + key: str, + source_value: TranslationValue, + target_value: TranslationValue, + locale: str, + errors: list[str], +) -> None: + if isinstance(source_value, str): + if not isinstance(target_value, str): + errors.append(f"[{locale}] key '{key}' 与 source 的类型不一致:source=string, target=plural") + return + if extract_placeholders(source_value) != extract_placeholders(target_value): + errors.append(f"[{locale}] key '{key}' 的占位符集合与 source 不一致") + return + + if not isinstance(target_value, dict): + errors.append(f"[{locale}] key '{key}' 与 source 的类型不一致:source=plural, target=string") + return + + source_categories = set(source_value.keys()) + target_categories = set(target_value.keys()) + if source_categories != target_categories: + errors.append( + f"[{locale}] key '{key}' 的 plural category 不一致:" + f"source={sorted(source_categories)}, target={sorted(target_categories)}" + ) + + for category in sorted(source_categories & target_categories): + source_placeholders = extract_placeholders(source_value[category]) + target_placeholders = extract_placeholders(target_value[category]) + if source_placeholders != target_placeholders: + errors.append(f"[{locale}] key '{key}' 的 plural category '{category}' 占位符集合与 source 不一致") + + +def validate_locales(locales_root: Path | None = None) -> list[str]: + resolved_locales_root = get_locales_root(locales_root) + locales = discover_locales(resolved_locales_root) + errors: list[str] = [] + + if DEFAULT_LOCALE not in locales: + errors.append(f"缺少默认 locale 目录: {DEFAULT_LOCALE}") + return errors + + catalogs: dict[str, dict[str, TranslationValue]] = {} + for locale in locales: + try: + catalogs[locale] = load_locale_catalog(locale, resolved_locales_root) + except Exception as exc: + errors.append(f"[{locale}] 加载失败: {exc}") + + source_catalog = catalogs.get(DEFAULT_LOCALE) + if source_catalog is None: + return errors + + source_keys = set(source_catalog.keys()) + for locale, catalog in catalogs.items(): + if locale == DEFAULT_LOCALE: + continue + + locale_keys = set(catalog.keys()) + missing_keys = sorted(source_keys - locale_keys) + extra_keys = sorted(locale_keys - source_keys) + + for key in missing_keys: + errors.append(f"[{locale}] 缺少 key: {key}") + for key in extra_keys: + errors.append(f"[{locale}] 存在多余 key: {key}") + + for key in sorted(source_keys & locale_keys): + validate_translation_pair(key, source_catalog[key], catalog[key], locale, errors) + + return errors + + +def main() -> int: + errors = validate_locales() + if errors: + print("i18n validation failed:") + for error in errors: + print(f" - {error}") + return 1 + + print("i18n validation passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/common/i18n/__init__.py b/src/common/i18n/__init__.py new file mode 100644 index 00000000..c909f628 --- /dev/null +++ b/src/common/i18n/__init__.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from collections.abc import Iterator +from datetime import date, datetime +from decimal import Decimal + +from .loaders import DEFAULT_LOCALE + + +def _get_manager(): + from .manager import I18nManager + + manager = getattr(_get_manager, "_manager", None) + if manager is None: + manager = I18nManager() + _get_manager._manager = manager + return manager + + +def set_locale(locale: str) -> str: + return _get_manager().set_locale(locale) + + +def get_locale() -> str: + return _get_manager().get_locale() + + +def reload_translations(locale: str | None = None) -> None: + _get_manager().reload(locale) + + +def t(key: str, locale: str | None = None, **kwargs: object) -> str: + return _get_manager().t(key, locale=locale, **kwargs) + + +def tn(key: str, count: int | float, locale: str | None = None, **kwargs: object) -> str: + return _get_manager().tn(key, count=count, locale=locale, **kwargs) + + +def use_locale(locale: str) -> Iterator[None]: + return _get_manager().use_locale(locale) + + +def format_datetime_localized(value: datetime | date, locale: str | None = None, format: str = "medium") -> str: + from .formatting import format_datetime_localized as _format_datetime_localized + + return _format_datetime_localized(value, locale=locale or get_locale(), format=format) + + +def format_number_localized(value: int | float | Decimal, locale: str | None = None) -> str: + from .formatting import format_number_localized as _format_number_localized + + return _format_number_localized(value, locale=locale or get_locale()) + + +def format_decimal_localized(value: int | float | Decimal, locale: str | None = None) -> str: + from .formatting import format_decimal_localized as _format_decimal_localized + + return _format_decimal_localized(value, locale=locale or get_locale()) + + +__all__ = [ + "DEFAULT_LOCALE", + "format_datetime_localized", + "format_decimal_localized", + "format_number_localized", + "get_locale", + "reload_translations", + "set_locale", + "t", + "tn", + "use_locale", +] diff --git a/src/common/i18n/exceptions.py b/src/common/i18n/exceptions.py new file mode 100644 index 00000000..9d2d924a --- /dev/null +++ b/src/common/i18n/exceptions.py @@ -0,0 +1,18 @@ +class I18nError(Exception): + """国际化基础异常。""" + + +class InvalidLocaleError(I18nError): + """Locale 格式非法。""" + + +class LocaleNotFoundError(I18nError): + """未找到指定 locale 的翻译目录。""" + + +class InvalidTranslationFileError(I18nError): + """翻译文件结构非法。""" + + +class DuplicateTranslationKeyError(I18nError): + """同一 locale 下存在重复的翻译 key。""" diff --git a/src/common/i18n/formatting.py b/src/common/i18n/formatting.py new file mode 100644 index 00000000..71114f0c --- /dev/null +++ b/src/common/i18n/formatting.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from datetime import date, datetime, time +from decimal import Decimal +from string import Formatter + +from babel import Locale +from babel.dates import format_datetime as babel_format_datetime +from babel.numbers import format_decimal as babel_format_decimal + +from .loaders import DEFAULT_LOCALE, to_babel_locale + +FORMATTER = Formatter() + + +def extract_placeholders(template: str) -> set[str]: + placeholders: set[str] = set() + for _, field_name, _, _ in FORMATTER.parse(template): + if not field_name: + continue + placeholders.add(field_name.split(".", maxsplit=1)[0].split("[", maxsplit=1)[0]) + return placeholders + + +def format_template(template: str, **kwargs: object) -> str: + return template.format(**kwargs) + + +def select_plural_category(locale: str, count: int | float | Decimal) -> str: + babel_locale = Locale.parse(to_babel_locale(locale)) + return str(babel_locale.plural_form(count)) + + +def format_datetime_localized(value: datetime | date, locale: str = DEFAULT_LOCALE, format: str = "medium") -> str: + if isinstance(value, date) and not isinstance(value, datetime): + value = datetime.combine(value, time.min) + return babel_format_datetime(value, format=format, locale=to_babel_locale(locale)) + + +def format_number_localized(value: int | float | Decimal, locale: str = DEFAULT_LOCALE) -> str: + return format_decimal_localized(value, locale=locale) + + +def format_decimal_localized(value: int | float | Decimal, locale: str = DEFAULT_LOCALE) -> str: + return babel_format_decimal(value, locale=to_babel_locale(locale)) diff --git a/src/common/i18n/loaders.py b/src/common/i18n/loaders.py new file mode 100644 index 00000000..a205a44a --- /dev/null +++ b/src/common/i18n/loaders.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from pathlib import Path + +import json + +from .exceptions import ( + DuplicateTranslationKeyError, + InvalidLocaleError, + InvalidTranslationFileError, + LocaleNotFoundError, +) + +DEFAULT_LOCALE = "zh-CN" +PLURAL_CATEGORIES = {"zero", "one", "two", "few", "many", "other"} +TranslationValue = str | dict[str, str] + + +def get_project_root() -> Path: + return Path(__file__).resolve().parents[3] + + +def get_locales_root(locales_root: Path | None = None) -> Path: + if locales_root is not None: + return locales_root.resolve() + return (get_project_root() / "locales").resolve() + + +def normalize_locale(locale: str) -> str: + cleaned_locale = locale.strip().replace("_", "-") + if not cleaned_locale: + raise InvalidLocaleError("Locale 不能为空") + + parts = [part for part in cleaned_locale.split("-") if part] + if not parts: + raise InvalidLocaleError(f"Locale 非法: {locale}") + + normalized_parts: list[str] = [] + for index, part in enumerate(parts): + if index == 0: + normalized_parts.append(part.lower()) + elif len(part) == 2: + normalized_parts.append(part.upper()) + elif len(part) == 4: + normalized_parts.append(part.title()) + else: + normalized_parts.append(part) + return "-".join(normalized_parts) + + +def to_babel_locale(locale: str) -> str: + return normalize_locale(locale).replace("-", "_") + + +def discover_locales(locales_root: Path | None = None) -> list[str]: + root = get_locales_root(locales_root) + if not root.exists(): + return [] + + locale_names = [path.name for path in root.iterdir() if path.is_dir()] + return sorted(locale_names) + + +def iter_locale_files(locale_dir: Path) -> list[Path]: + return sorted(path for path in locale_dir.glob("*.json") if path.is_file()) + + +def validate_translation_value(key: str, value: object, file_path: Path) -> TranslationValue: + if isinstance(value, str): + return value + + if not isinstance(value, dict): + raise InvalidTranslationFileError(f"{file_path} 中的 key '{key}' 必须是字符串或 plural 对象") + + if not value: + raise InvalidTranslationFileError(f"{file_path} 中的 key '{key}' 不能为空对象") + + validated_value: dict[str, str] = {} + for category, category_value in value.items(): + if category not in PLURAL_CATEGORIES: + raise InvalidTranslationFileError(f"{file_path} 中的 key '{key}' 使用了非法 plural category: '{category}'") + if not isinstance(category_value, str): + raise InvalidTranslationFileError( + f"{file_path} 中的 key '{key}' 的 plural category '{category}' 必须是字符串" + ) + validated_value[category] = category_value + return validated_value + + +def load_translation_file(file_path: Path) -> dict[str, TranslationValue]: + try: + raw_payload = json.loads(file_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise InvalidTranslationFileError(f"{file_path} 不是合法 JSON: {exc}") from exc + + if not isinstance(raw_payload, dict): + raise InvalidTranslationFileError(f"{file_path} 顶层必须是 JSON object") + + translations: dict[str, TranslationValue] = {} + for raw_key, raw_value in raw_payload.items(): + if not isinstance(raw_key, str): + raise InvalidTranslationFileError(f"{file_path} 中存在非字符串 key") + if not raw_key.strip(): + raise InvalidTranslationFileError(f"{file_path} 中存在空字符串 key") + translations[raw_key] = validate_translation_value(raw_key, raw_value, file_path) + return translations + + +def load_locale_catalog(locale: str, locales_root: Path | None = None) -> dict[str, TranslationValue]: + normalized_locale = normalize_locale(locale) + locale_dir = get_locales_root(locales_root) / normalized_locale + if not locale_dir.exists(): + raise LocaleNotFoundError(f"未找到 locale 目录: {locale_dir}") + + merged_translations: dict[str, TranslationValue] = {} + for file_path in iter_locale_files(locale_dir): + file_translations = load_translation_file(file_path) + for key, value in file_translations.items(): + if key in merged_translations: + raise DuplicateTranslationKeyError( + f"locale '{normalized_locale}' 中存在重复 key: '{key}',冲突文件包含 {file_path.name}" + ) + merged_translations[key] = value + return merged_translations diff --git a/src/common/i18n/manager.py b/src/common/i18n/manager.py new file mode 100644 index 00000000..2b943677 --- /dev/null +++ b/src/common/i18n/manager.py @@ -0,0 +1,198 @@ +from __future__ import annotations + +from contextlib import contextmanager +from contextvars import ContextVar +from pathlib import Path +from typing import Iterator + +import logging +import os +import threading + +from .exceptions import I18nError, InvalidLocaleError +from .formatting import format_template, select_plural_category +from .loaders import DEFAULT_LOCALE, TranslationValue, get_locales_root, load_locale_catalog, normalize_locale + +logger = logging.getLogger("maibot.i18n") + + +class I18nManager: + """基于 JSON 的轻量级国际化管理器。""" + + def __init__(self, default_locale: str = DEFAULT_LOCALE, locales_root: Path | None = None): + self._default_locale = normalize_locale(default_locale) + self._locales_root = get_locales_root(locales_root) + self._catalog_cache: dict[str, dict[str, TranslationValue]] = {} + self._locale_override: ContextVar[str | None] = ContextVar("maibot_locale", default=None) + self._warning_cache: set[tuple[str, str, str]] = set() + self._cache_lock = threading.RLock() + + def set_locale(self, locale: str) -> str: + self._default_locale = normalize_locale(locale) + return self._default_locale + + def get_locale(self) -> str: + override_locale = self._locale_override.get() + if override_locale: + return override_locale + + env_locale = os.getenv("MAIBOT_LOCALE") + if env_locale: + try: + return normalize_locale(env_locale) + except InvalidLocaleError: + self._log_once( + ("invalid_env_locale", "env", env_locale), + logging.WARNING, + "检测到非法 MAIBOT_LOCALE=%s,已回退到默认 locale %s", + env_locale, + self._default_locale, + ) + return self._default_locale + + @contextmanager + def use_locale(self, locale: str) -> Iterator[None]: + token = self._locale_override.set(normalize_locale(locale)) + try: + yield + finally: + self._locale_override.reset(token) + + def reload(self, locale: str | None = None) -> None: + with self._cache_lock: + if locale is None: + self._catalog_cache.clear() + return + self._catalog_cache.pop(normalize_locale(locale), None) + + def t(self, key: str, locale: str | None = None, **kwargs: object) -> str: + translation_value, _ = self._get_translation_value(key, locale) + if translation_value is None: + return key + + if isinstance(translation_value, dict): + template = translation_value.get("other") + if template is None: + self._log_once( + ("plural_missing_other", self.get_locale(), key), + logging.WARNING, + "翻译 key '%s' 缺少 other plural category,已回退到 key 本身", + key, + ) + return key + return self._format_translation(key, template, kwargs) + return self._format_translation(key, translation_value, kwargs) + + def tn(self, key: str, count: int | float, locale: str | None = None, **kwargs: object) -> str: + translation_value, translation_locale = self._get_translation_value(key, locale) + if translation_value is None: + return key + + if not isinstance(translation_value, dict): + self._log_once( + ("non_plural_key", translation_locale, key), + logging.WARNING, + "翻译 key '%s' 不是 plural 节点,已回退到普通 t()", + key, + ) + return self.t(key, locale=translation_locale, count=count, **kwargs) + + try: + plural_category = select_plural_category(translation_locale, count) + except Exception as exc: + logger.warning("为 key '%s' 选择 plural category 失败: %s,已回退到 other", key, exc) + plural_category = "other" + + template = translation_value.get(plural_category) or translation_value.get("other") + if template is None: + self._log_once( + ("plural_missing_template", translation_locale, key), + logging.WARNING, + "翻译 key '%s' 缺少 plural 模板,已回退到 key 本身", + key, + ) + return key + + formatting_kwargs = dict(kwargs) + formatting_kwargs["count"] = count + return self._format_translation(key, template, formatting_kwargs) + + def _format_translation(self, key: str, template: str, kwargs: dict[str, object]) -> str: + try: + return format_template(template, **kwargs) + except Exception as exc: + logger.error("翻译 key '%s' 格式化失败: %s", key, exc) + return template or key + + def _get_translation_value(self, key: str, locale: str | None) -> tuple[TranslationValue | None, str]: + target_locale = self._resolve_locale(locale) + target_catalog = self._get_catalog(target_locale) + if key in target_catalog: + return target_catalog[key], target_locale + + if target_locale != self._default_locale: + default_catalog = self._get_catalog(self._default_locale) + if key in default_catalog: + self._log_once( + ("missing_key_fallback", target_locale, key), + logging.WARNING, + "翻译 key '%s' 在 locale '%s' 中缺失,已回退到默认 locale '%s'", + key, + target_locale, + self._default_locale, + ) + return default_catalog[key], self._default_locale + + self._log_once( + ("missing_key", target_locale, key), + logging.WARNING, + "翻译 key '%s' 缺失,locale='%s',默认 locale='%s'", + key, + target_locale, + self._default_locale, + ) + return None, target_locale + + def _resolve_locale(self, locale: str | None) -> str: + if locale is None: + return self.get_locale() + + try: + return normalize_locale(locale) + except InvalidLocaleError: + self._log_once( + ("invalid_locale", "explicit", locale), + logging.WARNING, + "检测到非法 locale='%s',已回退到当前默认 locale %s", + locale, + self.get_locale(), + ) + return self.get_locale() + + def _get_catalog(self, locale: str) -> dict[str, TranslationValue]: + normalized_locale = normalize_locale(locale) + with self._cache_lock: + if normalized_locale in self._catalog_cache: + return self._catalog_cache[normalized_locale] + + try: + catalog = load_locale_catalog(normalized_locale, self._locales_root) + except I18nError as exc: + self._log_once( + ("load_failed", normalized_locale, exc.__class__.__name__), + logging.WARNING, + "加载 locale '%s' 失败: %s", + normalized_locale, + exc, + ) + catalog = {} + + self._catalog_cache[normalized_locale] = catalog + return catalog + + def _log_once(self, cache_key: tuple[str, str, str], level: int, message: str, *args: object) -> None: + with self._cache_lock: + if cache_key in self._warning_cache: + return + self._warning_cache.add(cache_key) + logger.log(level, message, *args)