feat：合并timing和plan展示，回复频率控制

2026-04-07 20:26:07 +08:00
parent 297b1bf5e3
commit f058bc3189
12 changed files with 409 additions and 1108 deletions
--- a/src/learners/expression_selector.py
+++ b/src/learners/expression_selector.py
@@ -1,587 +0,0 @@
-from typing import Any, Dict, List, Optional, Tuple
-
-import json
-import time
-
-from json_repair import repair_json
-
-from src.chat.utils.common_utils import TempMethodsExpression
-from src.common.database.database_model import Expression
-from src.common.logger import get_logger
-from src.common.utils.utils_session import SessionUtils
-from src.config.config import global_config
-from src.learners.learner_utils_old import weighted_sample
-from src.prompt.prompt_manager import prompt_manager
-from src.services.llm_service import LLMServiceClient
-
-logger = get_logger("expression_selector")
-
-
-class ExpressionSelector:
-    def __init__(self) -> None:
-        """初始化表达方式选择器。"""
-
-        self.llm_model = LLMServiceClient(
-            task_name="utils", request_type="expression.selector"
-        )
-
-    @staticmethod
-    def _get_runtime_manager() -> Any:
-        """获取插件运行时管理器。
-
-        Returns:
-            Any: 插件运行时管理器单例。
-        """
-
-        from src.plugin_runtime.integration import get_plugin_runtime_manager
-
-        return get_plugin_runtime_manager()
-
-    @staticmethod
-    def _coerce_int(value: Any, default: int) -> int:
-        """将任意值安全转换为整数。
-
-        Args:
-            value: 待转换的值。
-            default: 转换失败时的默认值。
-
-        Returns:
-            int: 转换后的整数结果。
-        """
-
-        try:
-            return int(value)
-        except (TypeError, ValueError):
-            return default
-
-    @staticmethod
-    def _normalize_selected_expressions(raw_expressions: Any) -> List[Dict[str, Any]]:
-        """从 Hook 载荷恢复表达方式选择结果。
-
-        Args:
-            raw_expressions: Hook 返回的表达方式列表。
-
-        Returns:
-            List[Dict[str, Any]]: 恢复后的表达方式列表。
-        """
-
-        if not isinstance(raw_expressions, list):
-            return []
-
-        normalized_expressions: List[Dict[str, Any]] = []
-        for raw_expression in raw_expressions:
-            if not isinstance(raw_expression, dict):
-                continue
-            expression_id = raw_expression.get("id")
-            situation = str(raw_expression.get("situation") or "").strip()
-            style = str(raw_expression.get("style") or "").strip()
-            source_id = str(raw_expression.get("source_id") or "").strip()
-            if not isinstance(expression_id, int) or not situation or not style or not source_id:
-                continue
-            normalized_expression = dict(raw_expression)
-            normalized_expression["id"] = expression_id
-            normalized_expression["situation"] = situation
-            normalized_expression["style"] = style
-            normalized_expression["source_id"] = source_id
-            normalized_expressions.append(normalized_expression)
-        return normalized_expressions
-
-    @staticmethod
-    def _normalize_selected_expression_ids(raw_ids: Any, expressions: List[Dict[str, Any]]) -> List[int]:
-        """规范化最终选中的表达方式 ID 列表。
-
-        Args:
-            raw_ids: Hook 返回的 ID 列表。
-            expressions: 当前最终表达方式列表。
-
-        Returns:
-            List[int]: 规范化后的 ID 列表。
-        """
-
-        if isinstance(raw_ids, list):
-            normalized_ids = [item for item in raw_ids if isinstance(item, int)]
-            if normalized_ids:
-                return normalized_ids
-        return [expression["id"] for expression in expressions if isinstance(expression.get("id"), int)]
-
-    def can_use_expression_for_chat(self, chat_id: str) -> bool:
-        """
-        检查指定聊天流是否允许使用表达
-
-        Args:
-            chat_id: 聊天流ID
-
-        Returns:
-            bool: 是否允许使用表达
-        """
-        try:
-            use_expression, _, _ = TempMethodsExpression.get_expression_config_for_chat(chat_id)
-            return use_expression
-        except Exception as e:
-            logger.error(f"检查表达使用权限失败: {e}")
-            return False
-
-    @staticmethod
-    def _parse_stream_config_to_chat_id(stream_config_str: str) -> Optional[str]:
-        """解析'platform:id:type'为chat_id，直接使用 ChatManager 提供的接口"""
-        try:
-            parts = stream_config_str.split(":")
-            if len(parts) != 3:
-                return None
-            platform = parts[0]
-            id_str = parts[1]
-            stream_type = parts[2]
-            is_group = stream_type == "group"
-            return SessionUtils.calculate_session_id(
-                platform, group_id=str(id_str) if is_group else None, user_id=None if is_group else str(id_str)
-            )
-        except Exception:
-            return None
-
-    def get_related_chat_ids(self, chat_id: str) -> List[str]:
-        """根据expression_groups配置，获取与当前chat_id相关的所有chat_id（包括自身）"""
-        groups = global_config.expression.expression_groups
-
-        # 检查是否存在全局共享组（包含"*"的组）
-        global_group_exists = any("*" in group for group in groups)
-
-        if global_group_exists:
-            # 如果存在全局共享组，则返回所有可用的chat_id
-            all_chat_ids = set()
-            for group in groups:
-                for stream_config_str in group:
-                    if chat_id_candidate := self._parse_stream_config_to_chat_id(stream_config_str):
-                        all_chat_ids.add(chat_id_candidate)
-            return list(all_chat_ids) if all_chat_ids else [chat_id]
-
-        # 否则使用现有的组逻辑
-        for group in groups:
-            group_chat_ids = []
-            for stream_config_str in group:
-                if chat_id_candidate := self._parse_stream_config_to_chat_id(stream_config_str):
-                    group_chat_ids.append(chat_id_candidate)
-            if chat_id in group_chat_ids:
-                return group_chat_ids
-        return [chat_id]
-
-    def _select_expressions_simple(self, chat_id: str, max_num: int) -> Tuple[List[Dict[str, Any]], List[int]]:
-        """
-        简单模式：只选择 count > 1 的项目，要求至少有10个才进行选择，随机选5个，不进行LLM选择
-
-        Args:
-            chat_id: 聊天流ID
-            max_num: 最大选择数量（此参数在此模式下不使用，固定选择5个）
-
-        Returns:
-            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
-        """
-        try:
-            # 支持多chat_id合并抽选
-            related_chat_ids = self.get_related_chat_ids(chat_id)
-
-            # 查询所有相关chat_id的表达方式，排除 rejected=1 的，且只选择 count > 1 的
-            # 如果 expression_checked_only 为 True，则只选择 checked=True 且 rejected=False 的
-            base_conditions = (
-                (Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected) & (Expression.count > 1)
-            )
-            if global_config.expression.expression_checked_only:
-                base_conditions = base_conditions & (Expression.checked)
-            style_query = Expression.select().where(base_conditions)
-
-            style_exprs = [
-                {
-                    "id": expr.id,
-                    "situation": expr.situation,
-                    "style": expr.style,
-                    "last_active_time": expr.last_active_time,
-                    "source_id": expr.chat_id,
-                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
-                    "count": expr.count if getattr(expr, "count", None) is not None else 1,
-                    "checked": expr.checked if getattr(expr, "checked", None) is not None else False,
-                }
-                for expr in style_query
-            ]
-
-            # 要求至少有一定数量的 count > 1 的表达方式才进行“完整简单模式”选择
-            min_required = 8
-            if len(style_exprs) < min_required:
-                # 高 count 样本不足：如果还有候选，就降级为随机选 3 个；如果一个都没有，则直接返回空
-                if not style_exprs:
-                    logger.info(f"聊天流 {chat_id} 没有满足 count > 1 且未被拒绝的表达方式，简单模式不进行选择")
-                    # 完全没有高 count 样本时，退化为全量随机抽样（不进入LLM流程）
-                    fallback_num = min(3, max_num) if max_num > 0 else 3
-                    if fallback_selected := self._random_expressions(chat_id, fallback_num):
-                        self.update_expressions_last_active_time(fallback_selected)
-                        selected_ids = [expr["id"] for expr in fallback_selected]
-                        logger.info(
-                            f"聊天流 {chat_id} 使用简单模式降级随机抽选 {len(fallback_selected)} 个表达（无 count>1 样本）"
-                        )
-                        return fallback_selected, selected_ids
-                    return [], []
-                logger.info(
-                    f"聊天流 {chat_id} count > 1 的表达方式不足 {min_required} 个（实际 {len(style_exprs)} 个），"
-                    f"简单模式降级为随机选择 3 个"
-                )
-                select_count = min(3, len(style_exprs))
-            else:
-                # 高 count 数量达标时，固定选择 5 个
-                select_count = 5
-            import random
-
-            selected_style = random.sample(style_exprs, select_count)
-
-            # 更新last_active_time
-            if selected_style:
-                self.update_expressions_last_active_time(selected_style)
-
-            selected_ids = [expr["id"] for expr in selected_style]
-            logger.debug(
-                f"think_level=0: 从 {len(style_exprs)} 个 count>1 的表达方式中随机选择了 {len(selected_style)} 个"
-            )
-            return selected_style, selected_ids
-
-        except Exception as e:
-            logger.error(f"简单模式选择表达方式失败: {e}")
-            return [], []
-
-    def _random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]:
-        """
-        随机选择表达方式
-
-        Args:
-            chat_id: 聊天室ID
-            total_num: 需要选择的数量
-
-        Returns:
-            List[Dict[str, Any]]: 随机选择的表达方式列表
-        """
-        try:
-            # 支持多chat_id合并抽选
-            related_chat_ids = self.get_related_chat_ids(chat_id)
-
-            # 优化：一次性查询所有相关chat_id的表达方式，排除 rejected=1 的表达
-            # 如果 expression_checked_only 为 True，则只选择 checked=True 且 rejected=False 的
-            base_conditions = (Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected)
-            if global_config.expression.expression_checked_only:
-                base_conditions = base_conditions & (Expression.checked)
-            style_query = Expression.select().where(base_conditions)
-
-            style_exprs = [
-                {
-                    "id": expr.id,
-                    "situation": expr.situation,
-                    "style": expr.style,
-                    "last_active_time": expr.last_active_time,
-                    "source_id": expr.chat_id,
-                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
-                    "count": expr.count if getattr(expr, "count", None) is not None else 1,
-                    "checked": expr.checked if getattr(expr, "checked", None) is not None else False,
-                }
-                for expr in style_query
-            ]
-
-            # 随机抽样
-            return weighted_sample(style_exprs, total_num) if style_exprs else []
-
-        except Exception as e:
-            logger.error(f"随机选择表达方式失败: {e}")
-            return []
-
-    async def select_suitable_expressions(
-        self,
-        chat_id: str,
-        chat_info: str,
-        max_num: int = 10,
-        target_message: Optional[str] = None,
-        reply_reason: Optional[str] = None,
-        think_level: int = 1,
-    ) -> Tuple[List[Dict[str, Any]], List[int]]:
-        """选择适合的表达方式。
-
-        Args:
-            chat_id: 聊天流ID
-            chat_info: 聊天内容信息
-            max_num: 最大选择数量
-            target_message: 目标消息内容
-            reply_reason: planner给出的回复理由
-            think_level: 思考级别，0/1
-
-        Returns:
-            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
-        """
-        # 检查是否允许在此聊天流中使用表达
-        if not self.can_use_expression_for_chat(chat_id):
-            logger.debug(f"聊天流 {chat_id} 不允许使用表达，返回空列表")
-            return [], []
-
-        before_select_result = await self._get_runtime_manager().invoke_hook(
-            "expression.select.before_select",
-            chat_id=chat_id,
-            chat_info=chat_info,
-            max_num=max_num,
-            target_message=target_message or "",
-            reply_reason=reply_reason or "",
-            think_level=think_level,
-        )
-        if before_select_result.aborted:
-            logger.info(f"聊天流 {chat_id} 的表达方式选择被 Hook 中止")
-            return [], []
-
-        before_select_kwargs = before_select_result.kwargs
-        chat_id = str(before_select_kwargs.get("chat_id", chat_id) or "").strip() or chat_id
-        chat_info = str(before_select_kwargs.get("chat_info", chat_info) or "")
-        max_num = max(self._coerce_int(before_select_kwargs.get("max_num"), max_num), 1)
-        raw_target_message = before_select_kwargs.get("target_message", target_message or "")
-        target_message = str(raw_target_message or "").strip() or None
-        raw_reply_reason = before_select_kwargs.get("reply_reason", reply_reason or "")
-        reply_reason = str(raw_reply_reason or "").strip() or None
-        think_level = self._coerce_int(before_select_kwargs.get("think_level"), think_level)
-
-        # 使用classic模式（随机选择+LLM选择）
-        logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式，think_level={think_level}")
-        selected_expressions, selected_ids = await self._select_expressions_classic(
-            chat_id, chat_info, max_num, target_message, reply_reason, think_level
-        )
-        after_selection_result = await self._get_runtime_manager().invoke_hook(
-            "expression.select.after_selection",
-            chat_id=chat_id,
-            chat_info=chat_info,
-            max_num=max_num,
-            target_message=target_message or "",
-            reply_reason=reply_reason or "",
-            think_level=think_level,
-            selected_expressions=[dict(item) for item in selected_expressions],
-            selected_expression_ids=list(selected_ids),
-        )
-        if after_selection_result.aborted:
-            logger.info(f"聊天流 {chat_id} 的表达方式选择结果被 Hook 中止")
-            return [], []
-
-        after_selection_kwargs = after_selection_result.kwargs
-        raw_selected_expressions = after_selection_kwargs.get("selected_expressions")
-        if raw_selected_expressions is not None:
-            selected_expressions = self._normalize_selected_expressions(raw_selected_expressions)
-        selected_ids = self._normalize_selected_expression_ids(
-            after_selection_kwargs.get("selected_expression_ids"),
-            selected_expressions,
-        )
-        if selected_expressions:
-            self.update_expressions_last_active_time(selected_expressions)
-        return selected_expressions, selected_ids
-
-    async def _select_expressions_classic(
-        self,
-        chat_id: str,
-        chat_info: str,
-        max_num: int = 10,
-        target_message: Optional[str] = None,
-        reply_reason: Optional[str] = None,
-        think_level: int = 1,
-    ) -> Tuple[List[Dict[str, Any]], List[int]]:
-        """
-        classic模式：随机选择+LLM选择
-
-        Args:
-            chat_id: 聊天流ID
-            chat_info: 聊天内容信息
-            max_num: 最大选择数量
-            target_message: 目标消息内容
-            reply_reason: planner给出的回复理由
-            think_level: 思考级别，0/1
-
-        Returns:
-            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
-        """
-        try:
-            # think_level == 0: 只选择 count > 1 的项目，随机选10个，不进行LLM选择
-            if think_level == 0:
-                return self._select_expressions_simple(chat_id, max_num)
-
-            # think_level == 1: 先选高count，再从所有表达方式中随机抽样
-            # 1. 获取所有表达方式并分离 count > 1 和 count <= 1 的
-            related_chat_ids = self.get_related_chat_ids(chat_id)
-            # 如果 expression_checked_only 为 True，则只选择 checked=True 且 rejected=False 的
-            base_conditions = (Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected)
-            if global_config.expression.expression_checked_only:
-                base_conditions = base_conditions & (Expression.checked)
-            style_query = Expression.select().where(base_conditions)
-
-            all_style_exprs = [
-                {
-                    "id": expr.id,
-                    "situation": expr.situation,
-                    "style": expr.style,
-                    "last_active_time": expr.last_active_time,
-                    "source_id": expr.chat_id,
-                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
-                    "count": expr.count if getattr(expr, "count", None) is not None else 1,
-                    "checked": expr.checked if getattr(expr, "checked", None) is not None else False,
-                }
-                for expr in style_query
-            ]
-
-            # 分离 count > 1 和 count <= 1 的表达方式
-            high_count_exprs = [expr for expr in all_style_exprs if (expr.get("count", 1) or 1) > 1]
-
-            # 根据 think_level 设置要求（仅支持 0/1，0 已在上方返回）
-            min_high_count = 10
-            min_total_count = 10
-            select_high_count = 5
-            select_random_count = 5
-
-            # 检查数量要求
-            # 对于高 count 表达：如果数量不足，不再直接停止，而是仅跳过“高 count 优先选择”
-            if len(high_count_exprs) < min_high_count:
-                logger.info(
-                    f"聊天流 {chat_id} count > 1 的表达方式不足 {min_high_count} 个（实际 {len(high_count_exprs)} 个），"
-                    f"将跳过高 count 优先选择，仅从全部表达中随机抽样"
-                )
-                high_count_valid = False
-            else:
-                high_count_valid = True
-
-            # 总量不足仍然直接返回，避免样本过少导致选择质量过低
-            if len(all_style_exprs) < min_total_count:
-                logger.info(
-                    f"聊天流 {chat_id} 总表达方式不足 {min_total_count} 个（实际 {len(all_style_exprs)} 个），不进行选择"
-                )
-                return [], []
-
-            # 先选取高count的表达方式（如果数量达标）
-            if high_count_valid:
-                selected_high = weighted_sample(high_count_exprs, min(len(high_count_exprs), select_high_count))
-            else:
-                selected_high = []
-
-            # 然后从所有表达方式中随机抽样（使用加权抽样）
-            remaining_num = select_random_count
-            selected_random = weighted_sample(all_style_exprs, min(len(all_style_exprs), remaining_num))
-
-            # 合并候选池（去重，避免重复）
-            candidate_exprs = selected_high.copy()
-            candidate_ids = {expr["id"] for expr in candidate_exprs}
-            for expr in selected_random:
-                if expr["id"] not in candidate_ids:
-                    candidate_exprs.append(expr)
-                    candidate_ids.add(expr["id"])
-
-            # 打乱顺序，避免高count的都在前面
-            import random
-
-            random.shuffle(candidate_exprs)
-
-            # 2. 构建所有表达方式的索引和情境列表
-            all_expressions: List[Dict[str, Any]] = []
-            all_situations: List[str] = []
-
-            # 添加style表达方式
-            for expr in candidate_exprs:
-                expr = expr.copy()
-                all_expressions.append(expr)
-                all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时，使用 {expr['style']}")
-
-            if not all_expressions:
-                logger.warning("没有找到可用的表达方式")
-                return [], []
-
-            all_situations_str = "\n".join(all_situations)
-
-            if target_message:
-                target_message_str = f'，现在你想要对这条消息进行回复："{target_message}"'
-                target_message_extra_block = "4.考虑你要回复的目标消息"
-            else:
-                target_message_str = ""
-                target_message_extra_block = ""
-
-            chat_context = f"以下是正在进行的聊天内容：{chat_info}"
-
-            # 构建reply_reason块
-            if reply_reason:
-                reply_reason_block = f"你的回复理由是：{reply_reason}"
-                chat_context = ""
-            else:
-                reply_reason_block = ""
-
-            # 3. 构建prompt（只包含情境，不包含完整的表达方式）
-            prompt_template = prompt_manager.get_prompt("expression_select")
-            prompt_template.add_context("bot_name", global_config.bot.nickname)
-            prompt_template.add_context("chat_observe_info", chat_context)
-            prompt_template.add_context("all_situations", all_situations_str)
-            prompt_template.add_context("max_num", str(max_num))
-            prompt_template.add_context("target_message", target_message_str)
-            prompt_template.add_context("target_message_extra_block", target_message_extra_block)
-            prompt_template.add_context("reply_reason_block", reply_reason_block)
-            prompt = await prompt_manager.render_prompt(prompt_template)
-
-            # 4. 调用LLM
-            generation_result = await self.llm_model.generate_response(prompt=prompt)
-            content = generation_result.response
-            # print(prompt)
-            # print(content)
-
-            if not content:
-                logger.warning("LLM返回空结果")
-                return [], []
-
-            # 5. 解析结果
-            result = repair_json(content)
-            if isinstance(result, str):
-                result = json.loads(result)
-
-            if not isinstance(result, dict) or "selected_situations" not in result:
-                logger.error("LLM返回格式错误")
-                logger.info(f"LLM返回结果: \n{content}")
-                return [], []
-
-            selected_indices = result["selected_situations"]
-
-            # 根据索引获取完整的表达方式
-            valid_expressions: List[Dict[str, Any]] = []
-            selected_ids = []
-            for idx in selected_indices:
-                if isinstance(idx, int) and 1 <= idx <= len(all_expressions):
-                    expression = all_expressions[idx - 1]  # 索引从1开始
-                    selected_ids.append(expression["id"])
-                    valid_expressions.append(expression)
-
-            # 对选中的所有表达方式，更新last_active_time
-            if valid_expressions:
-                self.update_expressions_last_active_time(valid_expressions)
-
-            logger.debug(f"从{len(all_expressions)}个情境中选择了{len(valid_expressions)}个")
-            return valid_expressions, selected_ids
-
-        except Exception as e:
-            logger.error(f"classic模式处理表达方式选择时出错: {e}")
-            return [], []
-
-    def update_expressions_last_active_time(self, expressions_to_update: List[Dict[str, Any]]):
-        """对一批表达方式更新last_active_time"""
-        if not expressions_to_update:
-            return
-        updates_by_key = {}
-        for expr in expressions_to_update:
-            source_id: str = expr.get("source_id")  # type: ignore
-            situation: str = expr.get("situation")  # type: ignore
-            style: str = expr.get("style")  # type: ignore
-            if not source_id or not situation or not style:
-                logger.warning(f"表达方式缺少必要字段，无法更新: {expr}")
-                continue
-            key = (source_id, situation, style)
-            if key not in updates_by_key:
-                updates_by_key[key] = expr
-        for chat_id, situation, style in updates_by_key:
-            query = Expression.select().where(
-                (Expression.chat_id == chat_id) & (Expression.situation == situation) & (Expression.style == style)
-            )
-            if query.exists():
-                expr_obj = query.get()
-                expr_obj.last_active_time = time.time()
-                expr_obj.save()
-                logger.debug("表达方式激活: 更新last_active_time in db")
-
-
-try:
-    expression_selector = ExpressionSelector()
-except Exception as e:
-    logger.error(f"ExpressionSelector初始化失败: {e}")
--- a/src/learners/learner_utils.py
+++ b/src/learners/learner_utils.py
@@ -1,134 +0,0 @@
-from json_repair import repair_json
-from typing import List, Tuple
-
-import re
-import json
-
-from src.common.logger import get_logger
-
-logger = get_logger("learner_utils")
-
-
-def fix_chinese_quotes_in_json(text):
-    """使用状态机修复 JSON 字符串值中的中文引号"""
-    result = []
-    i = 0
-    in_string = False
-    escape_next = False
-
-    while i < len(text):
-        char = text[i]
-        if escape_next:
-            # 当前字符是转义字符后的字符，直接添加
-            result.append(char)
-            escape_next = False
-            i += 1
-            continue
-        if char == "\\":
-            # 转义字符
-            result.append(char)
-            escape_next = True
-            i += 1
-            continue
-        if char == '"' and not escape_next:
-            # 遇到英文引号，切换字符串状态
-            in_string = not in_string
-            result.append(char)
-            i += 1
-            continue
-        if in_string and char in ["“", "”"]:
-            result.append('\\"')
-        else:
-            result.append(char)
-        i += 1
-
-    return "".join(result)
-
-
-def parse_expression_response(response: str) -> Tuple[List[Tuple[str, str, str]], List[Tuple[str, str]]]:
-    """
-    解析 LLM 返回的表达风格总结和黑话 JSON，提取两个列表。
-
-    期望的 JSON 结构：
-    [
-        {"situation": "AAAAA", "style": "BBBBB", "source_id": "3"},  // 表达方式
-        {"content": "词条", "source_id": "12"},  // 黑话
-        ...
-    ]
-
-    Returns:
-        Tuple[List[Tuple[str, str, str]], List[Tuple[str, str]]]:
-            第一个列表是表达方式 (situation, style, source_id)
-            第二个列表是黑话 (content, source_id)
-    """
-    if not response:
-        return [], []
-
-    raw = response.strip()
-
-    if match := re.search(r"```json\s*(.*?)\s*```", raw, re.DOTALL):
-        raw = match[1].strip()
-    else:
-        # 去掉可能存在的通用 ``` 包裹
-        raw = re.sub(r"^```\s*", "", raw, flags=re.MULTILINE)
-        raw = re.sub(r"```\s*$", "", raw, flags=re.MULTILINE)
-        raw = raw.strip()
-
-    parsed = None
-    expressions: List[Tuple[str, str, str]] = []  # (situation, style, source_id)
-    jargon_entries: List[Tuple[str, str]] = []  # (content, source_id)
-
-    try:
-        # 优先尝试直接解析
-        if raw.startswith("[") and raw.endswith("]"):
-            parsed = json.loads(raw)
-        else:
-            repaired = repair_json(raw)
-            parsed = json.loads(repaired) if isinstance(repaired, str) else repaired
-    except Exception as parse_error:
-        # 如果解析失败，尝试修复中文引号问题
-        # 使用状态机方法，在 JSON 字符串值内部将中文引号替换为转义的英文引号
-        try:
-            fixed_raw = fix_chinese_quotes_in_json(raw)
-
-            # 再次尝试解析
-            if fixed_raw.startswith("[") and fixed_raw.endswith("]"):
-                parsed = json.loads(fixed_raw)
-            else:
-                repaired = repair_json(fixed_raw)
-                parsed = json.loads(repaired) if isinstance(repaired, str) else repaired
-        except Exception as fix_error:
-            logger.error(f"解析表达风格 JSON 失败，初始错误: {type(parse_error).__name__}: {str(parse_error)}")
-            logger.error(f"修复中文引号后仍失败，错误: {type(fix_error).__name__}: {str(fix_error)}")
-            logger.error(f"解析表达风格 JSON 失败，原始响应：{response}")
-            logger.error(f"处理后的 JSON 字符串（前500字符）：{raw[:500]}")
-            return [], []
-
-    if isinstance(parsed, dict):
-        parsed_list = [parsed]
-    elif isinstance(parsed, list):
-        parsed_list = parsed
-    else:
-        logger.error(f"表达风格解析结果类型异常: {type(parsed)}, 内容: {parsed}")
-        return [], []
-
-    for item in parsed_list:
-        if not isinstance(item, dict):
-            continue
-
-        # 检查是否是表达方式条目（有 situation 和 style）
-        situation = str(item.get("situation", "")).strip()
-        style = str(item.get("style", "")).strip()
-        source_id = str(item.get("source_id", "")).strip()
-
-        if situation and style and source_id:
-            # 表达方式条目
-            expressions.append((situation, style, source_id))
-        elif item.get("content"):
-            # 黑话条目（有 content 字段）
-            content = str(item.get("content", "")).strip()
-            source_id = str(item.get("source_id", "")).strip()
-            if content and source_id:
-                jargon_entries.append((content, source_id))
-
-    return expressions, jargon_entries