feat:表达方式更新，现在会训练朴素贝叶斯模型来预测使用什么表达

2025-10-11 02:03:03 +08:00
parent 400296ade1
commit 958d6e04ee
20 changed files with 2372 additions and 443 deletions
--- a/src/express/expression_learner.py
+++ b/src/express/expression_learner.py
@@ -0,0 +1,577 @@
+import time
+import random
+import json
+import os
+import re
+from datetime import datetime
+import jieba
+from typing import List, Dict, Optional, Any, Tuple
+import traceback
+import difflib
+from src.common.logger import get_logger
+from src.common.database.database_model import Expression
+from src.llm_models.utils_model import LLMRequest
+from src.config.config import model_config, global_config
+from src.chat.utils.chat_message_builder import (
+    get_raw_msg_by_timestamp_with_chat_inclusive,
+    build_anonymous_messages,
+    build_bare_messages,
+)
+from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
+from src.chat.message_receive.chat_stream import get_chat_manager
+from src.express.style_learner import style_learner_manager
+from json_repair import repair_json
+
+
+# MAX_EXPRESSION_COUNT = 300
+
+logger = get_logger("expressor")
+
+
+def calculate_similarity(text1: str, text2: str) -> float:
+    """
+    计算两个文本的相似度，返回0-1之间的值
+    使用SequenceMatcher计算相似度
+    """
+    return difflib.SequenceMatcher(None, text1, text2).ratio()
+
+
+def format_create_date(timestamp: float) -> str:
+    """
+    将时间戳格式化为可读的日期字符串
+    """
+    try:
+        return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
+    except (ValueError, OSError):
+        return "未知时间"
+
+
+def init_prompt() -> None:
+    learn_style_prompt = """
+{chat_str}
+
+请从上面这段群聊中概括除了人名为"SELF"之外的人的语言风格
+1. 只考虑文字，不要考虑表情包和图片
+2. 不要涉及具体的人名，但是可以涉及具体名词
+3. 思考有没有特殊的梗，一并总结成语言风格
+4. 例子仅供参考，请严格根据群聊内容总结!!!
+注意：总结成如下格式的规律，总结的内容要详细，但具有概括性：
+例如：当"AAAAA"时，可以"BBBBB", AAAAA代表某个具体的场景，不超过20个字。BBBBB代表对应的语言风格，特定句式或表达方式，不超过20个字。
+
+例如：
+当"对某件事表示十分惊叹"时，使用"我嘞个xxxx"
+当"表示讽刺的赞同，不讲道理"时，使用"对对对"
+当"想说明某个具体的事实观点，但懒得明说，使用"懂的都懂"
+当"当涉及游戏相关时，夸赞，略带戏谑意味"时，使用"这么强！"
+
+请注意：不要总结你自己（SELF）的发言，尽量保证总结内容的逻辑性
+现在请你概括
+"""
+    Prompt(learn_style_prompt, "learn_style_prompt")
+
+    match_expression_context_prompt = """
+**聊天内容**
+{chat_str}
+
+**从聊天内容总结的表达方式pairs**
+{expression_pairs}
+
+请你为上面的每一条表达方式，找到该表达方式的原文句子，并输出匹配结果，expression_pair不能有重复，每个expression_pair仅输出一个最合适的context。
+如果找不到原句，就不输出该句的匹配结果。
+以json格式输出：
+格式如下：
+{{
+    "expression_pair": "表达方式pair的序号（数字）",
+    "context": "与表达方式对应的原文句子的原始内容，不要修改原文句子的内容",
+}}，
+{{
+    "expression_pair": "表达方式pair的序号（数字）",
+    "context": "与表达方式对应的原文句子的原始内容，不要修改原文句子的内容",
+}}，
+...
+
+现在请你输出匹配结果：
+"""
+    Prompt(match_expression_context_prompt, "match_expression_context_prompt")
+
+
+class ExpressionLearner:
+    def __init__(self, chat_id: str) -> None:
+        self.express_learn_model: LLMRequest = LLMRequest(
+            model_set=model_config.model_task_config.utils, request_type="expression.learner"
+        )
+        self.embedding_model: LLMRequest = LLMRequest(
+            model_set=model_config.model_task_config.embedding, request_type="expression.embedding"
+        )
+        self.chat_id = chat_id
+        self.chat_stream = get_chat_manager().get_stream(chat_id)
+        self.chat_name = get_chat_manager().get_stream_name(chat_id) or chat_id
+
+        # 维护每个chat的上次学习时间
+        self.last_learning_time: float = time.time()
+
+        # 学习参数
+        _, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat(
+            self.chat_id
+        )
+        self.min_messages_for_learning = 15 / self.learning_intensity  # 触发学习所需的最少消息数
+        self.min_learning_interval = 150 / self.learning_intensity
+
+    def should_trigger_learning(self) -> bool:
+        """
+        检查是否应该触发学习
+
+        Args:
+            chat_id: 聊天流ID
+
+        Returns:
+            bool: 是否应该触发学习
+        """
+        # 检查是否允许学习
+        if not self.enable_learning:
+            return False
+
+        # 检查时间间隔
+        time_diff = time.time() - self.last_learning_time
+        if time_diff < self.min_learning_interval:
+            return False
+
+        # 检查消息数量（只检查指定聊天流的消息）
+        recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
+            chat_id=self.chat_id,
+            timestamp_start=self.last_learning_time,
+            timestamp_end=time.time(),
+        )
+
+        if not recent_messages or len(recent_messages) < self.min_messages_for_learning:
+            return False
+
+        return True
+
+    async def trigger_learning_for_chat(self) -> bool:
+        """
+        为指定聊天流触发学习
+
+        Args:
+            chat_id: 聊天流ID
+
+        Returns:
+            bool: 是否成功触发学习
+        """
+        if not self.should_trigger_learning():
+            return False
+
+        try:
+            logger.info(f"为聊天流 {self.chat_name} 触发表达学习")
+
+            # 学习语言风格
+            learnt_style = await self.learn_and_store(num=25)
+
+            # 更新学习时间
+            self.last_learning_time = time.time()
+
+            if learnt_style:
+                logger.info(f"聊天流 {self.chat_name} 表达学习完成")
+                return True
+            else:
+                logger.warning(f"聊天流 {self.chat_name} 表达学习未获得有效结果")
+                return False
+
+        except Exception as e:
+            logger.error(f"为聊天流 {self.chat_name} 触发学习失败: {e}")
+            traceback.print_exc()
+            return False
+
+
+
+    async def learn_and_store(self, num: int = 10) -> List[Tuple[str, str, str]]:
+        """
+        学习并存储表达方式
+        """
+        res = await self.learn_expression(num)
+
+        if res is None:
+            logger.info("没有学习到表达风格")
+            return []
+        learnt_expressions = res
+        learnt_expressions_str = ""
+        for (
+            _chat_id,
+            situation,
+            style,
+            _context,
+            _up_content,
+        ) in learnt_expressions:
+            learnt_expressions_str += f"{situation}->{style}\n"
+
+        logger.info(f"在 {self.chat_name} 学习到表达风格:\n{learnt_expressions_str}")
+
+        # 按chat_id分组
+        chat_dict: Dict[str, List[Dict[str, Any]]] = {}
+        for (
+            chat_id,
+            situation,
+            style,
+            context,
+            up_content,
+        ) in learnt_expressions:
+            if chat_id not in chat_dict:
+                chat_dict[chat_id] = []
+            chat_dict[chat_id].append(
+                {
+                    "situation": situation,
+                    "style": style,
+                    "context": context,
+                    "up_content": up_content,
+                }
+            )
+
+        current_time = time.time()
+
+        # 存储到数据库 Expression 表并训练 style_learner
+        trained_chat_ids = set()  # 记录已训练的聊天室
+        
+        for chat_id, expr_list in chat_dict.items():
+            for new_expr in expr_list:
+                # 查找是否已存在相似表达方式
+                query = Expression.select().where(
+                    (Expression.chat_id == chat_id)
+                    & (Expression.type == "style")
+                    & (Expression.situation == new_expr["situation"])
+                    & (Expression.style == new_expr["style"])
+                )
+                if query.exists():
+                    expr_obj = query.get()
+                    # 50%概率替换内容
+                    if random.random() < 0.5:
+                        expr_obj.situation = new_expr["situation"]
+                        expr_obj.style = new_expr["style"]
+                        expr_obj.context = new_expr["context"]
+                        expr_obj.up_content = new_expr["up_content"]
+                    expr_obj.last_active_time = current_time
+                    expr_obj.save()
+                else:
+                    Expression.create(
+                        situation=new_expr["situation"],
+                        style=new_expr["style"],
+                        last_active_time=current_time,
+                        chat_id=chat_id,
+                        type="style",
+                        create_date=current_time,  # 手动设置创建日期
+                        context=new_expr["context"],
+                        up_content=new_expr["up_content"],
+                    )
+                
+                # 训练 style_learner（up_content 和 style 必定存在）
+                try:
+                    # 获取 learner 实例
+                    learner = style_learner_manager.get_learner(chat_id)
+                    
+                    # 先添加风格和对应的 situation（如果存在）
+                    if new_expr.get("situation"):
+                        learner.add_style(new_expr["style"], new_expr["situation"])
+                    else:
+                        learner.add_style(new_expr["style"])
+                    
+                    # 学习映射关系
+                    success = style_learner_manager.learn_mapping(
+                        chat_id, 
+                        new_expr["up_content"], 
+                        new_expr["style"]
+                    )
+                    if success:
+                        logger.debug(f"StyleLearner学习成功: {chat_id} - {new_expr['up_content']} -> {new_expr['style']}" + 
+                                   (f" (situation: {new_expr['situation']})" if new_expr.get("situation") else ""))
+                        trained_chat_ids.add(chat_id)
+                    else:
+                        logger.warning(f"StyleLearner学习失败: {chat_id} - {new_expr['up_content']} -> {new_expr['style']}")
+                except Exception as e:
+                    logger.error(f"StyleLearner学习异常: {chat_id} - {e}")
+            
+            # 限制最大数量
+            # exprs = list(
+            #     Expression.select()
+            #     .where((Expression.chat_id == chat_id) & (Expression.type == "style"))
+            #     .order_by(Expression.last_active_time.asc())
+            # )
+            # if len(exprs) > MAX_EXPRESSION_COUNT:
+                # 删除最久未活跃的多余表达方式
+                # for expr in exprs[: len(exprs) - MAX_EXPRESSION_COUNT]:
+                    # expr.delete_instance()
+        
+        # 保存训练好的 style_learner 模型
+        if trained_chat_ids:
+            try:
+                logger.info(f"开始保存 {len(trained_chat_ids)} 个聊天室的 StyleLearner 模型...")
+                save_success = style_learner_manager.save_all_models()
+                
+                if save_success:
+                    logger.info(f"StyleLearner 模型保存成功，涉及聊天室: {list(trained_chat_ids)}")
+                else:
+                    logger.warning("StyleLearner 模型保存失败")
+                    
+            except Exception as e:
+                logger.error(f"StyleLearner 模型保存异常: {e}")
+        
+        return learnt_expressions
+
+    async def match_expression_context(
+        self, expression_pairs: List[Tuple[str, str]], random_msg_match_str: str
+    ) -> List[Tuple[str, str, str]]:
+        # 为expression_pairs逐个条目赋予编号，并构建成字符串
+        numbered_pairs = []
+        for i, (situation, style) in enumerate(expression_pairs, 1):
+            numbered_pairs.append(f'{i}. 当"{situation}"时，使用"{style}"')
+
+        expression_pairs_str = "\n".join(numbered_pairs)
+
+        prompt = "match_expression_context_prompt"
+        prompt = await global_prompt_manager.format_prompt(
+            prompt,
+            expression_pairs=expression_pairs_str,
+            chat_str=random_msg_match_str,
+        )
+
+        response, _ = await self.express_learn_model.generate_response_async(prompt, temperature=0.3)
+
+        # print(f"match_expression_context_prompt: {prompt}")
+        # print(f"{response}")
+
+        # 解析JSON响应
+        match_responses = []
+        try:
+            response = response.strip()
+            # 检查是否已经是标准JSON数组格式
+            if response.startswith("[") and response.endswith("]"):
+                match_responses = json.loads(response)
+            else:
+                # 尝试直接解析多个JSON对象
+                try:
+                    # 如果是多个JSON对象用逗号分隔，包装成数组
+                    if response.startswith("{") and not response.startswith("["):
+                        response = "[" + response + "]"
+                        match_responses = json.loads(response)
+                    else:
+                        # 使用repair_json处理响应
+                        repaired_content = repair_json(response)
+
+                        # 确保repaired_content是列表格式
+                        if isinstance(repaired_content, str):
+                            try:
+                                parsed_data = json.loads(repaired_content)
+                                if isinstance(parsed_data, dict):
+                                    # 如果是字典，包装成列表
+                                    match_responses = [parsed_data]
+                                elif isinstance(parsed_data, list):
+                                    match_responses = parsed_data
+                                else:
+                                    match_responses = []
+                            except json.JSONDecodeError:
+                                match_responses = []
+                        elif isinstance(repaired_content, dict):
+                            # 如果是字典，包装成列表
+                            match_responses = [repaired_content]
+                        elif isinstance(repaired_content, list):
+                            match_responses = repaired_content
+                        else:
+                            match_responses = []
+                except json.JSONDecodeError:
+                    # 如果还是失败，尝试repair_json
+                    repaired_content = repair_json(response)
+                    if isinstance(repaired_content, str):
+                        parsed_data = json.loads(repaired_content)
+                        match_responses = parsed_data if isinstance(parsed_data, list) else [parsed_data]
+                    else:
+                        match_responses = repaired_content if isinstance(repaired_content, list) else [repaired_content]
+
+        except (json.JSONDecodeError, Exception) as e:
+            logger.error(f"解析匹配响应JSON失败: {e}, 响应内容: \n{response}")
+            return []
+
+        matched_expressions = []
+        used_pair_indices = set()  # 用于跟踪已经使用的expression_pair索引
+        
+        print(f"match_responses: {match_responses}")
+
+        for match_response in match_responses:
+            try:
+                # 获取表达方式序号
+                pair_index = int(match_response["expression_pair"]) - 1  # 转换为0-based索引
+
+                # 检查索引是否有效且未被使用过
+                if 0 <= pair_index < len(expression_pairs) and pair_index not in used_pair_indices:
+                    situation, style = expression_pairs[pair_index]
+                    context = match_response["context"]
+                    matched_expressions.append((situation, style, context))
+                    used_pair_indices.add(pair_index)  # 标记该索引已使用
+                    logger.debug(f"成功匹配表达方式 {pair_index + 1}: {situation} -> {style}")
+                elif pair_index in used_pair_indices:
+                    logger.debug(f"跳过重复的表达方式 {pair_index + 1}")
+            except (ValueError, KeyError, IndexError) as e:
+                logger.error(f"解析匹配条目失败: {e}, 条目: {match_response}")
+                continue
+
+        return matched_expressions
+
+    async def learn_expression(
+        self, num: int = 10
+    ) -> Optional[List[Tuple[str, str, str, List[str], str]]]:
+        """从指定聊天流学习表达方式
+
+        Args:
+            num: 学习数量
+        """
+        type_str = "语言风格"
+        prompt = "learn_style_prompt"
+
+        current_time = time.time()
+
+        # 获取上次学习之后的消息
+        random_msg = get_raw_msg_by_timestamp_with_chat_inclusive(
+            chat_id=self.chat_id,
+            timestamp_start=self.last_learning_time,
+            timestamp_end=current_time,
+            limit=num,
+        )
+        # print(random_msg)
+        if not random_msg or random_msg == []:
+            return None
+        # 转化成str
+        _chat_id: str = random_msg[0].chat_id
+        # random_msg_str: str = build_readable_messages(random_msg, timestamp_mode="normal")
+        random_msg_str: str = await build_anonymous_messages(random_msg)
+        random_msg_match_str: str = await build_bare_messages(random_msg)
+
+        prompt: str = await global_prompt_manager.format_prompt(
+            prompt,
+            chat_str=random_msg_str,
+        )
+
+        # print(f"random_msg_str:{random_msg_str}")
+        # logger.info(f"学习{type_str}的prompt: {prompt}")
+
+        try:
+            response, _ = await self.express_learn_model.generate_response_async(prompt, temperature=0.3)
+        except Exception as e:
+            logger.error(f"学习{type_str}失败: {e}")
+            return None
+
+        # logger.debug(f"学习{type_str}的response: {response}")
+
+        expressions: List[Tuple[str, str]] = self.parse_expression_response(response)
+
+        matched_expressions: List[Tuple[str, str, str]] = await self.match_expression_context(
+            expressions, random_msg_match_str
+        )
+        
+        print(f"matched_expressions: {matched_expressions}")
+
+        # 为每条消息构建与 build_bare_messages 相同规则的精简文本列表，保留到原消息索引的映射
+        bare_lines: List[Tuple[int, str]] = []  # (original_index, bare_content)
+        pic_pattern = r"\[picid:[^\]]+\]"
+        reply_pattern = r"回复<[^:<>]+:[^:<>]+>"
+        at_pattern = r"@<[^:<>]+:[^:<>]+>"
+        for idx, msg in enumerate(random_msg):
+            content = msg.processed_plain_text or ""
+            content = re.sub(pic_pattern, "[图片]", content)
+            content = re.sub(reply_pattern, "回复[某人]", content)
+            content = re.sub(at_pattern, "@[某人]", content)
+            content = content.strip()
+            if content:
+                bare_lines.append((idx, content))
+
+        # 将 matched_expressions 结合上一句 up_content（若不存在上一句则跳过）
+        filtered_with_up: List[Tuple[str, str, str, str]] = []  # (situation, style, context, up_content)
+        for situation, style, context in matched_expressions:
+            # 在 bare_lines 中找到第一处相似度达到85%的行
+            pos = None
+            for i, (_, c) in enumerate(bare_lines):
+                similarity = calculate_similarity(c, context)
+                if similarity >= 0.85:  # 85%相似度阈值
+                    pos = i
+                    break
+            
+            if pos is None or pos == 0:
+                # 没有匹配到或没有上一句，跳过该表达
+                continue
+            prev_original_idx = bare_lines[pos - 1][0]
+            up_content = (random_msg[prev_original_idx].processed_plain_text or "").strip()
+            if not up_content:
+                continue
+            filtered_with_up.append((situation, style, context, up_content))
+
+        if not filtered_with_up:
+            return None
+
+        results: List[Tuple[str, str, str, str]] = []
+        for (situation, style, context, up_content) in filtered_with_up:
+            results.append((self.chat_id, situation, style, context, up_content))
+
+        return results
+
+
+    def parse_expression_response(self, response: str) -> List[Tuple[str, str, str]]:
+        """
+        解析LLM返回的表达风格总结，每一行提取"当"和"使用"之间的内容，存储为(situation, style)元组
+        """
+        expressions: List[Tuple[str, str, str]] = []
+        for line in response.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            # 查找"当"和下一个引号
+            idx_when = line.find('当"')
+            if idx_when == -1:
+                continue
+            idx_quote1 = idx_when + 1
+            idx_quote2 = line.find('"', idx_quote1 + 1)
+            if idx_quote2 == -1:
+                continue
+            situation = line[idx_quote1 + 1 : idx_quote2]
+            # 查找"使用"
+            idx_use = line.find('使用"', idx_quote2)
+            if idx_use == -1:
+                continue
+            idx_quote3 = idx_use + 2
+            idx_quote4 = line.find('"', idx_quote3 + 1)
+            if idx_quote4 == -1:
+                continue
+            style = line[idx_quote3 + 1 : idx_quote4]
+            expressions.append((situation, style))
+        return expressions
+
+
+init_prompt()
+
+
+class ExpressionLearnerManager:
+    def __init__(self):
+        self.expression_learners = {}
+
+        self._ensure_expression_directories()
+
+    def get_expression_learner(self, chat_id: str) -> ExpressionLearner:
+        if chat_id not in self.expression_learners:
+            self.expression_learners[chat_id] = ExpressionLearner(chat_id)
+        return self.expression_learners[chat_id]
+
+    def _ensure_expression_directories(self):
+        """
+        确保表达方式相关的目录结构存在
+        """
+        base_dir = os.path.join("data", "expression")
+        directories_to_create = [
+            base_dir,
+            os.path.join(base_dir, "learnt_style"),
+            os.path.join(base_dir, "learnt_grammar"),
+        ]
+
+        for directory in directories_to_create:
+            try:
+                os.makedirs(directory, exist_ok=True)
+                logger.debug(f"确保目录存在: {directory}")
+            except Exception as e:
+                logger.error(f"创建目录失败 {directory}: {e}")
+
+
+expression_learner_manager = ExpressionLearnerManager()
--- a/src/express/expression_selector.py
+++ b/src/express/expression_selector.py
@@ -0,0 +1,520 @@
+import json
+import time
+import random
+import hashlib
+
+from typing import List, Dict, Optional, Any, Tuple
+from json_repair import repair_json
+
+from src.llm_models.utils_model import LLMRequest
+from src.config.config import global_config, model_config
+from src.common.logger import get_logger
+from src.common.database.database_model import Expression
+from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
+from src.express.style_learner import style_learner_manager
+
+logger = get_logger("expression_selector")
+
+
+def init_prompt():
+    expression_evaluation_prompt = """
+以下是正在进行的聊天内容：
+{chat_observe_info}
+
+你的名字是{bot_name}{target_message}
+
+以下是可选的表达情境：
+{all_situations}
+
+请你分析聊天内容的语境、情绪、话题类型，从上述情境中选择最适合当前聊天情境的，最多{max_num}个情境。
+考虑因素包括：
+1. 聊天的情绪氛围（轻松、严肃、幽默等）
+2. 话题类型（日常、技术、游戏、情感等）
+3. 情境与当前语境的匹配度
+{target_message_extra_block}
+
+请以JSON格式输出，只需要输出选中的情境编号：
+例如：
+{{
+    "selected_situations": [2, 3, 5, 7, 19]
+}}
+
+请严格按照JSON格式输出，不要包含其他内容：
+"""
+    Prompt(expression_evaluation_prompt, "expression_evaluation_prompt")
+
+
+def weighted_sample(population: List[Dict], k: int) -> List[Dict]:
+    """随机抽样"""
+    if not population or k <= 0:
+        return []
+
+    if len(population) <= k:
+        return population.copy()
+
+    # 使用随机抽样
+    selected = []
+    population_copy = population.copy()
+
+    for _ in range(k):
+        if not population_copy:
+            break
+
+        # 随机选择一个元素
+        chosen_idx = random.randint(0, len(population_copy) - 1)
+        selected.append(population_copy.pop(chosen_idx))
+
+    return selected
+
+
+class ExpressionSelector:
+    def __init__(self):
+        self.llm_model = LLMRequest(
+            model_set=model_config.model_task_config.utils_small, request_type="expression.selector"
+        )
+
+    def can_use_expression_for_chat(self, chat_id: str) -> bool:
+        """
+        检查指定聊天流是否允许使用表达
+
+        Args:
+            chat_id: 聊天流ID
+
+        Returns:
+            bool: 是否允许使用表达
+        """
+        try:
+            use_expression, _, _ = global_config.expression.get_expression_config_for_chat(chat_id)
+            return use_expression
+        except Exception as e:
+            logger.error(f"检查表达使用权限失败: {e}")
+            return False
+
+    @staticmethod
+    def _parse_stream_config_to_chat_id(stream_config_str: str) -> Optional[str]:
+        """解析'platform:id:type'为chat_id（与get_stream_id一致）"""
+        try:
+            parts = stream_config_str.split(":")
+            if len(parts) != 3:
+                return None
+            platform = parts[0]
+            id_str = parts[1]
+            stream_type = parts[2]
+            is_group = stream_type == "group"
+            if is_group:
+                components = [platform, str(id_str)]
+            else:
+                components = [platform, str(id_str), "private"]
+            key = "_".join(components)
+            return hashlib.md5(key.encode()).hexdigest()
+        except Exception:
+            return None
+
+    def get_related_chat_ids(self, chat_id: str) -> List[str]:
+        """根据expression_groups配置，获取与当前chat_id相关的所有chat_id（包括自身）"""
+        groups = global_config.expression.expression_groups
+
+        # 检查是否存在全局共享组（包含"*"的组）
+        global_group_exists = any("*" in group for group in groups)
+
+        if global_group_exists:
+            # 如果存在全局共享组，则返回所有可用的chat_id
+            all_chat_ids = set()
+            for group in groups:
+                for stream_config_str in group:
+                    if chat_id_candidate := self._parse_stream_config_to_chat_id(stream_config_str):
+                        all_chat_ids.add(chat_id_candidate)
+            return list(all_chat_ids) if all_chat_ids else [chat_id]
+
+        # 否则使用现有的组逻辑
+        for group in groups:
+            group_chat_ids = []
+            for stream_config_str in group:
+                if chat_id_candidate := self._parse_stream_config_to_chat_id(stream_config_str):
+                    group_chat_ids.append(chat_id_candidate)
+            if chat_id in group_chat_ids:
+                return group_chat_ids
+        return [chat_id]
+
+    def get_model_predicted_expressions(self, chat_id: str, chat_info: str, total_num: int = 10) -> List[Dict[str, Any]]:
+        """
+        使用 style_learner 模型预测最合适的表达方式
+        
+        Args:
+            chat_id: 聊天室ID
+            chat_info: 聊天内容信息
+            total_num: 需要预测的数量
+            
+        Returns:
+            List[Dict[str, Any]]: 预测的表达方式列表
+        """
+        try:
+            # 支持多chat_id合并预测
+            related_chat_ids = self.get_related_chat_ids(chat_id)
+            
+            # 从聊天信息中提取关键内容作为预测输入
+            # 这里可以进一步优化，提取更合适的预测输入
+            prediction_input = self._extract_prediction_input(chat_info)
+            
+            predicted_expressions = []
+            
+            # 为每个相关的chat_id进行预测
+            for related_chat_id in related_chat_ids:
+                try:
+                    # 使用 style_learner 预测最合适的风格
+                    best_style, scores = style_learner_manager.predict_style(
+                        related_chat_id, prediction_input, top_k=total_num
+                    )
+                    
+                    if best_style and scores:
+                        # 获取预测风格的完整信息
+                        learner = style_learner_manager.get_learner(related_chat_id)
+                        style_id, situation = learner.get_style_info(best_style)
+                        
+                        if style_id and situation:
+                            # 从数据库查找对应的表达记录
+                            expr_query = Expression.select().where(
+                                (Expression.chat_id == related_chat_id) &
+                                (Expression.type == "style") &
+                                (Expression.situation == situation) &
+                                (Expression.style == best_style)
+                            )
+                            
+                            if expr_query.exists():
+                                expr = expr_query.get()
+                                predicted_expressions.append({
+                                    "id": expr.id,
+                                    "situation": expr.situation,
+                                    "style": expr.style,
+                                    "last_active_time": expr.last_active_time,
+                                    "source_id": expr.chat_id,
+                                    "type": "style",
+                                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
+                                    "prediction_score": scores.get(best_style, 0.0),
+                                    "prediction_input": prediction_input
+                                })
+                                
+                except Exception as e:
+                    logger.warning(f"为聊天室 {related_chat_id} 预测表达方式失败: {e}")
+                    continue
+            
+            # 按预测分数排序，取前 total_num 个
+            predicted_expressions.sort(key=lambda x: x.get("prediction_score", 0.0), reverse=True)
+            selected_expressions = predicted_expressions[:total_num]
+            
+            logger.info(f"为聊天室 {chat_id} 预测到 {len(selected_expressions)} 个表达方式")
+            return selected_expressions
+            
+        except Exception as e:
+            logger.error(f"模型预测表达方式失败: {e}")
+            # 如果预测失败，回退到随机选择
+            return self._fallback_random_expressions(chat_id, total_num)
+    
+    def _extract_prediction_input(self, chat_info: str) -> str:
+        """
+        从聊天信息中提取用于预测的关键内容
+        
+        Args:
+            chat_info: 聊天内容信息
+            
+        Returns:
+            str: 提取的预测输入
+        """
+        try:
+            # 简单的提取策略：取最后几句话作为预测输入
+            lines = chat_info.strip().split('\n')
+            if not lines:
+                return ""
+            
+            # 取最后3行作为预测输入
+            recent_lines = lines[-3:]
+            prediction_input = ' '.join(recent_lines).strip()
+            
+            # 如果内容太长，截取前100个字符
+            if len(prediction_input) > 100:
+                prediction_input = prediction_input[:100]
+            
+            return prediction_input
+            
+        except Exception as e:
+            logger.warning(f"提取预测输入失败: {e}")
+            return ""
+    
+    def _fallback_random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]:
+        """
+        回退到随机选择表达方式
+        
+        Args:
+            chat_id: 聊天室ID
+            total_num: 需要选择的数量
+            
+        Returns:
+            List[Dict[str, Any]]: 随机选择的表达方式列表
+        """
+        try:
+            # 支持多chat_id合并抽选
+            related_chat_ids = self.get_related_chat_ids(chat_id)
+
+            # 优化：一次性查询所有相关chat_id的表达方式
+            style_query = Expression.select().where(
+                (Expression.chat_id.in_(related_chat_ids)) & (Expression.type == "style")
+            )
+
+            style_exprs = [
+                {
+                    "id": expr.id,
+                    "situation": expr.situation,
+                    "style": expr.style,
+                    "last_active_time": expr.last_active_time,
+                    "source_id": expr.chat_id,
+                    "type": "style",
+                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
+                }
+                for expr in style_query
+            ]
+
+            # 随机抽样
+            if style_exprs:
+                selected_style = weighted_sample(style_exprs, total_num)
+            else:
+                selected_style = []
+            
+            logger.info(f"回退到随机选择，为聊天室 {chat_id} 选择了 {len(selected_style)} 个表达方式")
+            return selected_style
+            
+        except Exception as e:
+            logger.error(f"随机选择表达方式失败: {e}")
+            return []
+
+
+    async def select_suitable_expressions(
+        self,
+        chat_id: str,
+        chat_info: str,
+        max_num: int = 10,
+        target_message: Optional[str] = None,
+    ) -> Tuple[List[Dict[str, Any]], List[int]]:
+        """
+        根据配置模式选择适合的表达方式
+        
+        Args:
+            chat_id: 聊天流ID
+            chat_info: 聊天内容信息
+            max_num: 最大选择数量
+            target_message: 目标消息内容
+            
+        Returns:
+            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
+        """
+        # 检查是否允许在此聊天流中使用表达
+        if not self.can_use_expression_for_chat(chat_id):
+            logger.debug(f"聊天流 {chat_id} 不允许使用表达，返回空列表")
+            return [], []
+
+        # 获取配置模式
+        expression_mode = global_config.expression.mode
+        
+        if expression_mode == "exp_model":
+            # exp_model模式：直接使用模型预测，不经过LLM
+            logger.debug(f"使用exp_model模式为聊天流 {chat_id} 选择表达方式")
+            return await self._select_expressions_model_only(chat_id, chat_info, max_num)
+        elif expression_mode == "classic":
+            # classic模式：随机选择+LLM选择
+            logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式")
+            return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message)
+        else:
+            logger.warning(f"未知的表达模式: {expression_mode}，回退到classic模式")
+            return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message)
+
+    async def _select_expressions_model_only(
+        self,
+        chat_id: str,
+        chat_info: str,
+        max_num: int = 10,
+    ) -> Tuple[List[Dict[str, Any]], List[int]]:
+        """
+        exp_model模式：直接使用模型预测，不经过LLM
+        
+        Args:
+            chat_id: 聊天流ID
+            chat_info: 聊天内容信息
+            max_num: 最大选择数量
+            
+        Returns:
+            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
+        """
+        try:
+            # 使用模型预测最合适的表达方式
+            style_exprs = self.get_model_predicted_expressions(chat_id, chat_info, max_num * 2)
+            
+            # if len(style_exprs) < 5:
+                # logger.info(f"聊天流 {chat_id} 表达方式正在积累中")
+                # return [], []
+            
+            # 直接取前max_num个预测结果
+            selected_expressions = style_exprs[:max_num]
+            selected_ids = [expr["id"] for expr in selected_expressions]
+            
+            # 更新last_active_time
+            if selected_expressions:
+                self.update_expressions_last_active_time(selected_expressions)
+            
+            logger.info(f"exp_model模式为聊天流 {chat_id} 选择了 {len(selected_expressions)} 个表达方式")
+            return selected_expressions, selected_ids
+            
+        except Exception as e:
+            logger.error(f"exp_model模式选择表达方式失败: {e}")
+            return [], []
+
+    async def _select_expressions_classic(
+        self,
+        chat_id: str,
+        chat_info: str,
+        max_num: int = 10,
+        target_message: Optional[str] = None,
+    ) -> Tuple[List[Dict[str, Any]], List[int]]:
+        """
+        classic模式：随机选择+LLM选择
+        
+        Args:
+            chat_id: 聊天流ID
+            chat_info: 聊天内容信息
+            max_num: 最大选择数量
+            target_message: 目标消息内容
+            
+        Returns:
+            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
+        """
+        try:
+            # 1. 使用模型预测最合适的表达方式
+            style_exprs = self.get_model_predicted_expressions(chat_id, chat_info, 20)
+
+            if len(style_exprs) < 10:
+                logger.info(f"聊天流 {chat_id} 表达方式正在积累中")
+                return [], []
+
+            # 2. 构建所有表达方式的索引和情境列表
+            all_expressions: List[Dict[str, Any]] = []
+            all_situations: List[str] = []
+
+            # 添加style表达方式
+            for expr in style_exprs:
+                expr = expr.copy()
+                all_expressions.append(expr)
+                all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时，使用 {expr['style']}")
+
+            if not all_expressions:
+                logger.warning("没有找到可用的表达方式")
+                return [], []
+
+            all_situations_str = "\n".join(all_situations)
+
+            if target_message:
+                target_message_str = f"，现在你想要回复消息：{target_message}"
+                target_message_extra_block = "4.考虑你要回复的目标消息"
+            else:
+                target_message_str = ""
+                target_message_extra_block = ""
+
+            # 3. 构建prompt（只包含情境，不包含完整的表达方式）
+            prompt = (await global_prompt_manager.get_prompt_async("expression_evaluation_prompt")).format(
+                bot_name=global_config.bot.nickname,
+                chat_observe_info=chat_info,
+                all_situations=all_situations_str,
+                max_num=max_num,
+                target_message=target_message_str,
+                target_message_extra_block=target_message_extra_block,
+            )
+
+            # 4. 调用LLM
+            content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
+
+            if not content:
+                logger.warning("LLM返回空结果")
+                return [], []
+
+            # 5. 解析结果
+            result = repair_json(content)
+            if isinstance(result, str):
+                result = json.loads(result)
+
+            if not isinstance(result, dict) or "selected_situations" not in result:
+                logger.error("LLM返回格式错误")
+                logger.info(f"LLM返回结果: \n{content}")
+                return [], []
+
+            selected_indices = result["selected_situations"]
+
+            # 根据索引获取完整的表达方式
+            valid_expressions: List[Dict[str, Any]] = []
+            selected_ids = []
+            for idx in selected_indices:
+                if isinstance(idx, int) and 1 <= idx <= len(all_expressions):
+                    expression = all_expressions[idx - 1]  # 索引从1开始
+                    selected_ids.append(expression["id"])
+                    valid_expressions.append(expression)
+
+            # 对选中的所有表达方式，更新last_active_time
+            if valid_expressions:
+                self.update_expressions_last_active_time(valid_expressions)
+
+            logger.info(f"classic模式从{len(all_expressions)}个情境中选择了{len(valid_expressions)}个")
+            return valid_expressions, selected_ids
+
+        except Exception as e:
+            logger.error(f"classic模式处理表达方式选择时出错: {e}")
+            return [], []
+
+    async def select_suitable_expressions_llm(
+        self,
+        chat_id: str,
+        chat_info: str,
+        max_num: int = 10,
+        target_message: Optional[str] = None,
+    ) -> Tuple[List[Dict[str, Any]], List[int]]:
+        """
+        使用LLM选择适合的表达方式（保持向后兼容）
+        
+        注意：此方法已被 select_suitable_expressions 替代，建议使用新方法
+        """
+        logger.warning("select_suitable_expressions_llm 方法已过时，请使用 select_suitable_expressions")
+        return await self.select_suitable_expressions(chat_id, chat_info, max_num, target_message)
+
+    def update_expressions_last_active_time(self, expressions_to_update: List[Dict[str, Any]]):
+        """对一批表达方式更新last_active_time"""
+        if not expressions_to_update:
+            return
+        updates_by_key = {}
+        for expr in expressions_to_update:
+            source_id: str = expr.get("source_id")  # type: ignore
+            expr_type: str = expr.get("type", "style")
+            situation: str = expr.get("situation")  # type: ignore
+            style: str = expr.get("style")  # type: ignore
+            if not source_id or not situation or not style:
+                logger.warning(f"表达方式缺少必要字段，无法更新: {expr}")
+                continue
+            key = (source_id, expr_type, situation, style)
+            if key not in updates_by_key:
+                updates_by_key[key] = expr
+        for chat_id, expr_type, situation, style in updates_by_key:
+            query = Expression.select().where(
+                (Expression.chat_id == chat_id)
+                & (Expression.type == expr_type)
+                & (Expression.situation == situation)
+                & (Expression.style == style)
+            )
+            if query.exists():
+                expr_obj = query.get()
+                expr_obj.last_active_time = time.time()
+                expr_obj.save()
+                logger.debug(
+                    "表达方式激活: 更新last_active_time in db"
+                )
+
+
+init_prompt()
+
+try:
+    expression_selector = ExpressionSelector()
+except Exception as e:
+    logger.error(f"ExpressionSelector初始化失败: {e}")
--- a/src/express/expressor_model/model.py
+++ b/src/express/expressor_model/model.py
@@ -0,0 +1,131 @@
+from typing import Dict, Optional, Tuple, List
+from collections import Counter, defaultdict
+import pickle
+import os
+
+from .tokenizer import Tokenizer
+from .online_nb import OnlineNaiveBayes
+
+class ExpressorModel:
+    """
+    直接使用朴素贝叶斯精排（可在线学习）
+    支持存储situation字段，不参与计算，仅与style对应
+    """
+
+    def __init__(self,
+                 alpha: float = 0.5,
+                 beta: float = 0.5,
+                 gamma: float = 1.0,
+                 vocab_size: int = 200000,
+                 use_jieba: bool = True):
+        self.tokenizer = Tokenizer(stopwords=set(), use_jieba=use_jieba)
+        self.nb = OnlineNaiveBayes(alpha=alpha, beta=beta, gamma=gamma, vocab_size=vocab_size)
+        self._candidates: Dict[str, str] = {}  # cid -> text (style)
+        self._situations: Dict[str, str] = {}  # cid -> situation (不参与计算)
+
+    def add_candidate(self, cid: str, text: str, situation: str = None):
+        """添加候选文本和对应的situation"""
+        self._candidates[cid] = text
+        if situation is not None:
+            self._situations[cid] = situation
+        
+        # 确保在nb模型中初始化该候选的计数
+        if cid not in self.nb.cls_counts:
+            self.nb.cls_counts[cid] = 0.0
+        if cid not in self.nb.token_counts:
+            self.nb.token_counts[cid] = defaultdict(float)
+
+    def add_candidates_bulk(self, items: List[Tuple[str, str]], situations: List[str] = None):
+        """批量添加候选文本和对应的situations"""
+        for i, (cid, text) in enumerate(items):
+            situation = situations[i] if situations and i < len(situations) else None
+            self.add_candidate(cid, text, situation)
+
+    def predict(self, text: str, k: int = None) -> Tuple[Optional[str], Dict[str, float]]:
+        """直接对所有候选进行朴素贝叶斯评分"""
+        toks = self.tokenizer.tokenize(text)
+        if not toks:
+            return None, {}
+        
+        if not self._candidates:
+            return None, {}
+
+        # 对所有候选进行评分
+        tf = Counter(toks)
+        all_cids = list(self._candidates.keys())
+        scores = self.nb.score_batch(tf, all_cids)
+
+        # 取最高分
+        if not scores:
+            return None, {}
+        best = max(scores.items(), key=lambda x: x[1])[0]
+        return best, scores
+
+    def update_positive(self, text: str, cid: str):
+        """更新正反馈学习"""
+        toks = self.tokenizer.tokenize(text)
+        if not toks:
+            return
+        tf = Counter(toks)
+        self.nb.update_positive(tf, cid)
+
+    def decay(self, factor: float):
+        self.nb.decay(factor=factor)
+    
+    def get_situation(self, cid: str) -> Optional[str]:
+        """获取候选对应的situation"""
+        return self._situations.get(cid)
+    
+    def get_style(self, cid: str) -> Optional[str]:
+        """获取候选对应的style"""
+        return self._candidates.get(cid)
+    
+    def get_candidate_info(self, cid: str) -> Tuple[Optional[str], Optional[str]]:
+        """获取候选的style和situation信息"""
+        return self._candidates.get(cid), self._situations.get(cid)
+    
+    def get_all_candidates(self) -> Dict[str, Tuple[str, Optional[str]]]:
+        """获取所有候选的style和situation信息"""
+        return {cid: (style, self._situations.get(cid)) 
+                for cid, style in self._candidates.items()}
+
+    def save(self, path: str):
+        """保存模型"""
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, "wb") as f:
+            pickle.dump({
+                "candidates": self._candidates,
+                "situations": self._situations,
+                "nb": {
+                    "cls_counts": dict(self.nb.cls_counts),
+                    "token_counts": {cid: dict(tc) for cid, tc in self.nb.token_counts.items()},
+                    "alpha": self.nb.alpha,
+                    "beta": self.nb.beta,
+                    "gamma": self.nb.gamma,
+                    "V": self.nb.V,
+                }
+            }, f)
+
+    def load(self, path: str):
+        """加载模型"""
+        with open(path, "rb") as f:
+            obj = pickle.load(f)
+        # 还原候选文本
+        self._candidates = obj["candidates"]
+        # 还原situations（兼容旧版本）
+        self._situations = obj.get("situations", {})
+        # 还原朴素贝叶斯模型
+        self.nb.cls_counts = obj["nb"]["cls_counts"]
+        self.nb.token_counts = defaultdict_dict(obj["nb"]["token_counts"])
+        self.nb.alpha = obj["nb"]["alpha"]
+        self.nb.beta = obj["nb"]["beta"]
+        self.nb.gamma = obj["nb"]["gamma"]
+        self.nb.V = obj["nb"]["V"]
+        self.nb._logZ.clear()
+
+def defaultdict_dict(d: Dict[str, Dict[str, float]]):
+    from collections import defaultdict
+    outer = defaultdict(lambda: defaultdict(float))
+    for k, inner in d.items():
+        outer[k].update(inner)
+    return outer
--- a/src/express/expressor_model/online_nb.py
+++ b/src/express/expressor_model/online_nb.py
@@ -0,0 +1,60 @@
+import math
+from typing import Dict, List
+from collections import defaultdict, Counter
+
+class OnlineNaiveBayes:
+    def __init__(self, alpha: float = 0.5, beta: float = 0.5, gamma: float = 1.0, vocab_size: int = 200000):
+        self.alpha = alpha
+        self.beta = beta
+        self.gamma = gamma
+        self.V = vocab_size
+
+        self.cls_counts: Dict[str, float] = defaultdict(float)                 # cid -> total token count
+        self.token_counts: Dict[str, Dict[str, float]] = defaultdict(lambda: defaultdict(float))  # cid -> term -> count
+        self._logZ: Dict[str, float] = {}                                      # cache log(∑counts + Vα)
+
+    def _invalidate(self, cid: str):
+        if cid in self._logZ:
+            del self._logZ[cid]
+
+    def _logZ_c(self, cid: str) -> float:
+        if cid not in self._logZ:
+            Z = self.cls_counts[cid] + self.V * self.alpha
+            self._logZ[cid] = math.log(max(Z, 1e-12))
+        return self._logZ[cid]
+
+    def score_batch(self, tf: Counter, cids: List[str]) -> Dict[str, float]:
+        total_cls = sum(self.cls_counts.values())
+        n_cls = max(1, len(self.cls_counts))
+        denom_prior = math.log(total_cls + self.beta * n_cls)
+
+        out: Dict[str, float] = {}
+        for cid in cids:
+            prior = math.log(self.cls_counts[cid] + self.beta) - denom_prior
+            s = prior
+            logZ = self._logZ_c(cid)
+            tc = self.token_counts[cid]
+            for term, qtf in tf.items():
+                num = tc.get(term, 0.0) + self.alpha
+                s += qtf * (math.log(num) - logZ)
+            out[cid] = s
+        return out
+
+    def update_positive(self, tf: Counter, cid: str):
+        inc = 0.0
+        tc = self.token_counts[cid]
+        for term, c in tf.items():
+            tc[term] += float(c)
+            inc += float(c)
+        self.cls_counts[cid] += inc
+        self._invalidate(cid)
+
+    def decay(self, factor: float = None):
+        g = self.gamma if factor is None else factor
+        if g >= 1.0:
+            return
+        for cid in list(self.cls_counts.keys()):
+            self.cls_counts[cid] *= g
+            for term in list(self.token_counts[cid].keys()):
+                self.token_counts[cid][term] *= g
+            self._invalidate(cid)
--- a/src/express/expressor_model/tokenizer.py
+++ b/src/express/expressor_model/tokenizer.py
@@ -0,0 +1,28 @@
+import re
+from typing import List, Optional, Set
+
+try:
+    import jieba
+    _HAS_JIEBA = True
+except Exception:
+    _HAS_JIEBA = False
+
+_WORD_RE = re.compile(r"[A-Za-z0-9_]+")
+
+def simple_en_tokenize(text: str) -> List[str]:
+    return _WORD_RE.findall(text.lower())
+
+class Tokenizer:
+    def __init__(self, stopwords: Optional[Set[str]] = None, use_jieba: bool = True):
+        self.stopwords = stopwords or set()
+        self.use_jieba = use_jieba and _HAS_JIEBA
+
+    def tokenize(self, text: str) -> List[str]:
+        text = (text or "").strip()
+        if not text:
+            return []
+        if self.use_jieba:
+            toks = [t.strip().lower() for t in jieba.cut(text) if t.strip()]
+        else:
+            toks = simple_en_tokenize(text)
+        return [t for t in toks if t not in self.stopwords]
--- a/src/express/style_learner.py
+++ b/src/express/style_learner.py
@@ -0,0 +1,628 @@
+"""
+多聊天室表达风格学习系统
+支持为每个chat_id维护独立的表达模型，学习从up_content到style的映射
+"""
+
+import os
+import pickle
+import traceback
+from typing import Dict, List, Optional, Tuple
+from collections import defaultdict
+import asyncio
+
+from src.common.logger import get_logger
+from .expressor_model.model import ExpressorModel
+
+logger = get_logger("style_learner")
+
+
+class StyleLearner:
+    """
+    单个聊天室的表达风格学习器
+    学习从up_content到style的映射关系
+    支持动态管理风格集合（最多2000个）
+    """
+    
+    def __init__(self, chat_id: str, model_config: Optional[Dict] = None):
+        self.chat_id = chat_id
+        self.model_config = model_config or {
+            "alpha": 0.5,
+            "beta": 0.5, 
+            "gamma": 0.99,  # 衰减因子，支持遗忘
+            "vocab_size": 200000,
+            "use_jieba": True
+        }
+        
+        # 初始化表达模型
+        self.expressor = ExpressorModel(**self.model_config)
+        
+        # 动态风格管理
+        self.max_styles = 2000  # 每个chat_id最多2000个风格
+        self.style_to_id: Dict[str, str] = {}  # style文本 -> style_id
+        self.id_to_style: Dict[str, str] = {}  # style_id -> style文本
+        self.id_to_situation: Dict[str, str] = {}  # style_id -> situation文本
+        self.next_style_id = 0  # 下一个可用的style_id
+        
+        # 学习统计
+        self.learning_stats = {
+            "total_samples": 0,
+            "style_counts": defaultdict(int),
+            "last_update": None,
+            "style_usage_frequency": defaultdict(int)  # 风格使用频率
+        }
+    
+    def add_style(self, style: str, situation: str = None) -> bool:
+        """
+        动态添加一个新的风格
+        
+        Args:
+            style: 风格文本
+            situation: 对应的situation文本（可选）
+            
+        Returns:
+            bool: 添加是否成功
+        """
+        try:
+            # 检查是否已存在
+            if style in self.style_to_id:
+                logger.debug(f"[{self.chat_id}] 风格 '{style}' 已存在")
+                return True
+            
+            # 检查是否超过最大限制
+            if len(self.style_to_id) >= self.max_styles:
+                logger.warning(f"[{self.chat_id}] 已达到最大风格数量限制 ({self.max_styles})")
+                return False
+            
+            # 生成新的style_id
+            style_id = f"style_{self.next_style_id}"
+            self.next_style_id += 1
+            
+            # 添加到映射
+            self.style_to_id[style] = style_id
+            self.id_to_style[style_id] = style
+            if situation:
+                self.id_to_situation[style_id] = situation
+            
+            # 添加到expressor模型
+            self.expressor.add_candidate(style_id, style, situation)
+            
+            logger.info(f"[{self.chat_id}] 已添加风格: '{style}' (ID: {style_id})" + 
+                       (f", situation: '{situation}'" if situation else ""))
+            return True
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 添加风格失败: {e}")
+            return False
+    
+    def remove_style(self, style: str) -> bool:
+        """
+        删除一个风格
+        
+        Args:
+            style: 要删除的风格文本
+            
+        Returns:
+            bool: 删除是否成功
+        """
+        try:
+            if style not in self.style_to_id:
+                logger.warning(f"[{self.chat_id}] 风格 '{style}' 不存在")
+                return False
+            
+            style_id = self.style_to_id[style]
+            
+            # 从映射中删除
+            del self.style_to_id[style]
+            del self.id_to_style[style_id]
+            if style_id in self.id_to_situation:
+                del self.id_to_situation[style_id]
+            
+            # 从expressor模型中删除（通过重新构建）
+            self._rebuild_expressor()
+            
+            logger.info(f"[{self.chat_id}] 已删除风格: '{style}' (ID: {style_id})")
+            return True
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 删除风格失败: {e}")
+            return False
+    
+    def update_style(self, old_style: str, new_style: str) -> bool:
+        """
+        更新一个风格
+        
+        Args:
+            old_style: 原风格文本
+            new_style: 新风格文本
+            
+        Returns:
+            bool: 更新是否成功
+        """
+        try:
+            if old_style not in self.style_to_id:
+                logger.warning(f"[{self.chat_id}] 原风格 '{old_style}' 不存在")
+                return False
+            
+            if new_style in self.style_to_id and new_style != old_style:
+                logger.warning(f"[{self.chat_id}] 新风格 '{new_style}' 已存在")
+                return False
+            
+            style_id = self.style_to_id[old_style]
+            
+            # 更新映射
+            del self.style_to_id[old_style]
+            self.style_to_id[new_style] = style_id
+            self.id_to_style[style_id] = new_style
+            
+            # 更新expressor模型（保留原有的situation）
+            situation = self.id_to_situation.get(style_id)
+            self.expressor.add_candidate(style_id, new_style, situation)
+            
+            logger.info(f"[{self.chat_id}] 已更新风格: '{old_style}' -> '{new_style}'")
+            return True
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 更新风格失败: {e}")
+            return False
+    
+    def add_styles_batch(self, styles: List[str], situations: List[str] = None) -> int:
+        """
+        批量添加风格
+        
+        Args:
+            styles: 风格文本列表
+            situations: 对应的situation文本列表（可选）
+            
+        Returns:
+            int: 成功添加的数量
+        """
+        success_count = 0
+        for i, style in enumerate(styles):
+            situation = situations[i] if situations and i < len(situations) else None
+            if self.add_style(style, situation):
+                success_count += 1
+        
+        logger.info(f"[{self.chat_id}] 批量添加风格: {success_count}/{len(styles)} 成功")
+        return success_count
+    
+    def get_all_styles(self) -> List[str]:
+        """获取所有已注册的风格"""
+        return list(self.style_to_id.keys())
+    
+    def get_style_count(self) -> int:
+        """获取当前风格数量"""
+        return len(self.style_to_id)
+    
+    def get_situation(self, style: str) -> Optional[str]:
+        """
+        获取风格对应的situation
+        
+        Args:
+            style: 风格文本
+            
+        Returns:
+            Optional[str]: 对应的situation，如果不存在则返回None
+        """
+        if style not in self.style_to_id:
+            return None
+        
+        style_id = self.style_to_id[style]
+        return self.id_to_situation.get(style_id)
+    
+    def get_style_info(self, style: str) -> Tuple[Optional[str], Optional[str]]:
+        """
+        获取风格的完整信息
+        
+        Args:
+            style: 风格文本
+            
+        Returns:
+            Tuple[Optional[str], Optional[str]]: (style_id, situation)
+        """
+        if style not in self.style_to_id:
+            return None, None
+        
+        style_id = self.style_to_id[style]
+        situation = self.id_to_situation.get(style_id)
+        return style_id, situation
+    
+    def get_all_style_info(self) -> Dict[str, Tuple[str, Optional[str]]]:
+        """
+        获取所有风格的完整信息
+        
+        Returns:
+            Dict[str, Tuple[str, Optional[str]]]: {style: (style_id, situation)}
+        """
+        result = {}
+        for style, style_id in self.style_to_id.items():
+            situation = self.id_to_situation.get(style_id)
+            result[style] = (style_id, situation)
+        return result
+    
+    def _rebuild_expressor(self):
+        """重新构建expressor模型（删除风格后使用）"""
+        try:
+            # 重新创建expressor
+            self.expressor = ExpressorModel(**self.model_config)
+            
+            # 重新添加所有风格和situation
+            for style_id, style_text in self.id_to_style.items():
+                situation = self.id_to_situation.get(style_id)
+                self.expressor.add_candidate(style_id, style_text, situation)
+            
+            logger.debug(f"[{self.chat_id}] 已重新构建expressor模型")
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 重新构建expressor失败: {e}")
+    
+    def learn_mapping(self, up_content: str, style: str) -> bool:
+        """
+        学习一个up_content到style的映射
+        如果style不存在，会自动添加
+        
+        Args:
+            up_content: 输入内容
+            style: 对应的style文本
+            
+        Returns:
+            bool: 学习是否成功
+        """
+        try:
+            # 如果style不存在，先添加它
+            if style not in self.style_to_id:
+                if not self.add_style(style):
+                    logger.warning(f"[{self.chat_id}] 无法添加风格 '{style}'，学习失败")
+                    return False
+            
+            # 获取style_id
+            style_id = self.style_to_id[style]
+            
+            # 使用正反馈学习
+            self.expressor.update_positive(up_content, style_id)
+            
+            # 更新统计
+            self.learning_stats["total_samples"] += 1
+            self.learning_stats["style_counts"][style_id] += 1
+            self.learning_stats["style_usage_frequency"][style] += 1
+            self.learning_stats["last_update"] = asyncio.get_event_loop().time()
+            
+            logger.debug(f"[{self.chat_id}] 学习映射: '{up_content}' -> '{style}'")
+            return True
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 学习映射失败: {e}")
+            traceback.print_exc()
+            return False
+    
+    def predict_style(self, up_content: str, top_k: int = 5) -> Tuple[Optional[str], Dict[str, float]]:
+        """
+        根据up_content预测最合适的style
+        
+        Args:
+            up_content: 输入内容
+            top_k: 返回前k个候选
+            
+        Returns:
+            Tuple[最佳style文本, 所有候选的分数]
+        """
+        try:
+            best_style_id, scores = self.expressor.predict(up_content, k=top_k)
+            
+            if best_style_id is None:
+                return None, {}
+            
+            # 将style_id转换为style文本
+            best_style = self.id_to_style.get(best_style_id)
+            
+            # 转换所有分数
+            style_scores = {}
+            for sid, score in scores.items():
+                style_text = self.id_to_style.get(sid)
+                if style_text:
+                    style_scores[style_text] = score
+            
+            return best_style, style_scores
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 预测style失败: {e}")
+            traceback.print_exc()
+            return None, {}
+    
+    def decay_learning(self, factor: Optional[float] = None) -> None:
+        """
+        对学习到的知识进行衰减（遗忘）
+        
+        Args:
+            factor: 衰减因子，None则使用配置中的gamma
+        """
+        self.expressor.decay(factor)
+        logger.debug(f"[{self.chat_id}] 执行知识衰减")
+    
+    def get_stats(self) -> Dict:
+        """获取学习统计信息"""
+        return {
+            "chat_id": self.chat_id,
+            "total_samples": self.learning_stats["total_samples"],
+            "style_count": len(self.style_to_id),
+            "max_styles": self.max_styles,
+            "style_counts": dict(self.learning_stats["style_counts"]),
+            "style_usage_frequency": dict(self.learning_stats["style_usage_frequency"]),
+            "last_update": self.learning_stats["last_update"],
+            "all_styles": list(self.style_to_id.keys())
+        }
+    
+    def save(self, base_path: str) -> bool:
+        """
+        保存模型到文件
+        
+        Args:
+            base_path: 基础路径，实际文件为 {base_path}/{chat_id}_style_model.pkl
+        """
+        try:
+            os.makedirs(base_path, exist_ok=True)
+            file_path = os.path.join(base_path, f"{self.chat_id}_style_model.pkl")
+            
+            # 保存模型和统计信息
+            save_data = {
+                "model_config": self.model_config,
+                "style_to_id": self.style_to_id,
+                "id_to_style": self.id_to_style,
+                "id_to_situation": self.id_to_situation,
+                "next_style_id": self.next_style_id,
+                "max_styles": self.max_styles,
+                "learning_stats": self.learning_stats
+            }
+            
+            # 先保存expressor模型
+            expressor_path = os.path.join(base_path, f"{self.chat_id}_expressor.pkl")
+            self.expressor.save(expressor_path)
+            
+            # 保存其他数据
+            with open(file_path, "wb") as f:
+                pickle.dump(save_data, f)
+            
+            logger.info(f"[{self.chat_id}] 模型已保存到 {file_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 保存模型失败: {e}")
+            return False
+    
+    def load(self, base_path: str) -> bool:
+        """
+        从文件加载模型
+        
+        Args:
+            base_path: 基础路径
+        """
+        try:
+            file_path = os.path.join(base_path, f"{self.chat_id}_style_model.pkl")
+            expressor_path = os.path.join(base_path, f"{self.chat_id}_expressor.pkl")
+            
+            if not os.path.exists(file_path) or not os.path.exists(expressor_path):
+                logger.warning(f"[{self.chat_id}] 模型文件不存在，将使用默认配置")
+                return False
+            
+            # 加载其他数据
+            with open(file_path, "rb") as f:
+                save_data = pickle.load(f)
+            
+            # 恢复配置和状态
+            self.model_config = save_data["model_config"]
+            self.style_to_id = save_data["style_to_id"]
+            self.id_to_style = save_data["id_to_style"]
+            self.id_to_situation = save_data.get("id_to_situation", {})  # 兼容旧版本
+            self.next_style_id = save_data["next_style_id"]
+            self.max_styles = save_data.get("max_styles", 2000)
+            self.learning_stats = save_data["learning_stats"]
+            
+            # 重新创建expressor并加载
+            self.expressor = ExpressorModel(**self.model_config)
+            self.expressor.load(expressor_path)
+            
+            logger.info(f"[{self.chat_id}] 模型已从 {file_path} 加载")
+            return True
+            
+        except Exception as e:
+            logger.error(f"[{self.chat_id}] 加载模型失败: {e}")
+            return False
+
+
+class StyleLearnerManager:
+    """
+    多聊天室表达风格学习管理器
+    为每个chat_id维护独立的StyleLearner实例
+    每个chat_id可以动态管理自己的风格集合（最多2000个）
+    """
+    
+    def __init__(self, model_save_path: str = "data/style_models"):
+        self.model_save_path = model_save_path
+        self.learners: Dict[str, StyleLearner] = {}
+        
+        # 自动保存配置
+        self.auto_save_interval = 300  # 5分钟
+        self._auto_save_task: Optional[asyncio.Task] = None
+        
+        logger.info("StyleLearnerManager 已初始化")
+    
+    def get_learner(self, chat_id: str, model_config: Optional[Dict] = None) -> StyleLearner:
+        """
+        获取或创建指定chat_id的学习器
+        
+        Args:
+            chat_id: 聊天室ID
+            model_config: 模型配置，None则使用默认配置
+            
+        Returns:
+            StyleLearner实例
+        """
+        if chat_id not in self.learners:
+            # 创建新的学习器
+            learner = StyleLearner(chat_id, model_config)
+            
+            # 尝试加载已保存的模型
+            learner.load(self.model_save_path)
+            
+            self.learners[chat_id] = learner
+            logger.info(f"为 chat_id={chat_id} 创建新的StyleLearner")
+        
+        return self.learners[chat_id]
+    
+    def add_style(self, chat_id: str, style: str) -> bool:
+        """
+        为指定chat_id添加风格
+        
+        Args:
+            chat_id: 聊天室ID
+            style: 风格文本
+            
+        Returns:
+            bool: 添加是否成功
+        """
+        learner = self.get_learner(chat_id)
+        return learner.add_style(style)
+    
+    def remove_style(self, chat_id: str, style: str) -> bool:
+        """
+        为指定chat_id删除风格
+        
+        Args:
+            chat_id: 聊天室ID
+            style: 风格文本
+            
+        Returns:
+            bool: 删除是否成功
+        """
+        learner = self.get_learner(chat_id)
+        return learner.remove_style(style)
+    
+    def update_style(self, chat_id: str, old_style: str, new_style: str) -> bool:
+        """
+        为指定chat_id更新风格
+        
+        Args:
+            chat_id: 聊天室ID
+            old_style: 原风格文本
+            new_style: 新风格文本
+            
+        Returns:
+            bool: 更新是否成功
+        """
+        learner = self.get_learner(chat_id)
+        return learner.update_style(old_style, new_style)
+    
+    def get_chat_styles(self, chat_id: str) -> List[str]:
+        """
+        获取指定chat_id的所有风格
+        
+        Args:
+            chat_id: 聊天室ID
+            
+        Returns:
+            List[str]: 风格列表
+        """
+        learner = self.get_learner(chat_id)
+        return learner.get_all_styles()
+    
+    def learn_mapping(self, chat_id: str, up_content: str, style: str) -> bool:
+        """
+        学习一个映射关系
+        
+        Args:
+            chat_id: 聊天室ID
+            up_content: 输入内容
+            style: 对应的style
+            
+        Returns:
+            bool: 学习是否成功
+        """
+        learner = self.get_learner(chat_id)
+        return learner.learn_mapping(up_content, style)
+    
+    def predict_style(self, chat_id: str, up_content: str, top_k: int = 5) -> Tuple[Optional[str], Dict[str, float]]:
+        """
+        预测最合适的style
+        
+        Args:
+            chat_id: 聊天室ID
+            up_content: 输入内容
+            top_k: 返回前k个候选
+            
+        Returns:
+            Tuple[最佳style, 所有候选分数]
+        """
+        learner = self.get_learner(chat_id)
+        return learner.predict_style(up_content, top_k)
+    
+    def decay_all_learners(self, factor: Optional[float] = None) -> None:
+        """
+        对所有学习器执行衰减
+        
+        Args:
+            factor: 衰减因子
+        """
+        for learner in self.learners.values():
+            learner.decay_learning(factor)
+        logger.info("已对所有学习器执行衰减")
+    
+    def get_all_stats(self) -> Dict[str, Dict]:
+        """获取所有学习器的统计信息"""
+        return {chat_id: learner.get_stats() for chat_id, learner in self.learners.items()}
+    
+    def save_all_models(self) -> bool:
+        """保存所有模型"""
+        success_count = 0
+        for learner in self.learners.values():
+            if learner.save(self.model_save_path):
+                success_count += 1
+        
+        logger.info(f"已保存 {success_count}/{len(self.learners)} 个模型")
+        return success_count == len(self.learners)
+    
+    def load_all_models(self) -> int:
+        """加载所有已保存的模型"""
+        if not os.path.exists(self.model_save_path):
+            return 0
+        
+        loaded_count = 0
+        for filename in os.listdir(self.model_save_path):
+            if filename.endswith("_style_model.pkl"):
+                chat_id = filename.replace("_style_model.pkl", "")
+                learner = StyleLearner(chat_id)
+                if learner.load(self.model_save_path):
+                    self.learners[chat_id] = learner
+                    loaded_count += 1
+        
+        logger.info(f"已加载 {loaded_count} 个模型")
+        return loaded_count
+    
+    async def start_auto_save(self) -> None:
+        """启动自动保存任务"""
+        if self._auto_save_task is None or self._auto_save_task.done():
+            self._auto_save_task = asyncio.create_task(self._auto_save_loop())
+            logger.info("已启动自动保存任务")
+    
+    async def stop_auto_save(self) -> None:
+        """停止自动保存任务"""
+        if self._auto_save_task and not self._auto_save_task.done():
+            self._auto_save_task.cancel()
+            try:
+                await self._auto_save_task
+            except asyncio.CancelledError:
+                pass
+            logger.info("已停止自动保存任务")
+    
+    async def _auto_save_loop(self) -> None:
+        """自动保存循环"""
+        while True:
+            try:
+                await asyncio.sleep(self.auto_save_interval)
+                self.save_all_models()
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"自动保存失败: {e}")
+
+
+# 全局管理器实例
+style_learner_manager = StyleLearnerManager()