secret 神秘小测验加强版

2025-03-23 00:01:26 +08:00
parent 20a06df9e5
commit 4b6a315b8e
5 changed files with 704 additions and 3 deletions
--- a/src/plugins/personality/can_i_recog_u.py
+++ b/src/plugins/personality/can_i_recog_u.py
@@ -0,0 +1,351 @@
+"""
+基于聊天记录的人格特征分析系统
+"""
+
+from typing import Dict, List
+import json
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+import sys
+import random
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import numpy as np
+from datetime import datetime
+import matplotlib.font_manager as fm
+
+current_dir = Path(__file__).resolve().parent
+project_root = current_dir.parent.parent.parent
+env_path = project_root / ".env.prod"
+
+root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
+sys.path.append(root_path)
+
+from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES  # noqa: E402
+from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS  # noqa: E402
+from src.plugins.personality.offline_llm import LLMModel  # noqa: E402
+from src.plugins.personality.who_r_u import MessageAnalyzer  # noqa: E402
+
+# 加载环境变量
+if env_path.exists():
+    print(f"从 {env_path} 加载环境变量")
+    load_dotenv(env_path)
+else:
+    print(f"未找到环境变量文件: {env_path}")
+    print("将使用默认配置")
+
+class ChatBasedPersonalityEvaluator:
+    def __init__(self):
+        self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
+        self.scenarios = []
+        self.message_analyzer = MessageAnalyzer()
+        self.llm = LLMModel()
+        self.trait_scores_history = defaultdict(list)  # 记录每个特质的得分历史
+
+        # 为每个人格特质获取对应的场景
+        for trait in PERSONALITY_SCENES:
+            scenes = get_scene_by_factor(trait)
+            if not scenes:
+                continue
+            scene_keys = list(scenes.keys())
+            selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
+            
+            for scene_key in selected_scenes:
+                scene = scenes[scene_key]
+                other_traits = [t for t in PERSONALITY_SCENES if t != trait]
+                secondary_trait = random.choice(other_traits)
+                self.scenarios.append({
+                    "场景": scene["scenario"],
+                    "评估维度": [trait, secondary_trait],
+                    "场景编号": scene_key
+                })
+
+    def analyze_chat_context(self, messages: List[Dict]) -> str:
+        """
+        分析一组消息的上下文，生成场景描述
+        """
+        context = ""
+        for msg in messages:
+            nickname = msg.get('user_info', {}).get('user_nickname', '未知用户')
+            content = msg.get('processed_plain_text', msg.get('detailed_plain_text', ''))
+            if content:
+                context += f"{nickname}: {content}\n"
+        return context
+
+    def evaluate_chat_response(
+        self, user_nickname: str, chat_context: str, dimensions: List[str] = None) -> Dict[str, float]:
+        """
+        评估聊天内容在各个人格维度上的得分
+        """
+        # 使用所有维度进行评估
+        dimensions = list(self.personality_traits.keys())
+        
+        dimension_descriptions = []
+        for dim in dimensions:
+            desc = FACTOR_DESCRIPTIONS.get(dim, "")
+            if desc:
+                dimension_descriptions.append(f"- {dim}：{desc}")
+
+        dimensions_text = "\n".join(dimension_descriptions)
+
+        prompt = f"""请根据以下聊天记录，评估"{user_nickname}"在大五人格模型中的维度得分（1-6分）。
+
+聊天记录：
+{chat_context}
+
+需要评估的维度说明：
+{dimensions_text}
+
+请按照以下格式输出评估结果，注意，你的评价对象是"{user_nickname}"（仅输出JSON格式）：
+{{
+    "开放性": 分数,
+    "严谨性": 分数,
+    "外向性": 分数,
+    "宜人性": 分数,
+    "神经质": 分数
+}}
+
+评分标准：
+1 = 非常不符合该维度特征
+2 = 比较不符合该维度特征
+3 = 有点不符合该维度特征
+4 = 有点符合该维度特征
+5 = 比较符合该维度特征
+6 = 非常符合该维度特征
+
+如果你觉得某个维度没有相关信息或者无法判断，请输出0分
+
+请根据聊天记录的内容和语气，结合维度说明进行评分。如果维度可以评分，确保分数在1-6之间。如果没有体现，请输出0分"""
+
+        try:
+            ai_response, _ = self.llm.generate_response(prompt)
+            start_idx = ai_response.find("{")
+            end_idx = ai_response.rfind("}") + 1
+            if start_idx != -1 and end_idx != 0:
+                json_str = ai_response[start_idx:end_idx]
+                scores = json.loads(json_str)
+                return {k: max(0, min(6, float(v))) for k, v in scores.items()}
+            else:
+                print("AI响应格式不正确，使用默认评分")
+                return {dim: 0 for dim in dimensions}
+        except Exception as e:
+            print(f"评估过程出错：{str(e)}")
+            return {dim: 0 for dim in dimensions}
+
+    def evaluate_user_personality(self, qq_id: str, num_samples: int = 10, context_length: int = 5) -> Dict:
+        """
+        基于用户的聊天记录评估人格特征
+        
+        Args:
+            qq_id (str): 用户QQ号
+            num_samples (int): 要分析的聊天片段数量
+            context_length (int): 每个聊天片段的上下文长度
+            
+        Returns:
+            Dict: 评估结果
+        """
+        # 获取用户的随机消息及其上下文
+        chat_contexts, user_nickname = self.message_analyzer.get_user_random_contexts(
+            qq_id, num_messages=num_samples, context_length=context_length)
+        if not chat_contexts:
+            return {"error": f"没有找到QQ号 {qq_id} 的消息记录"}
+
+        # 初始化评分
+        final_scores = defaultdict(float)
+        dimension_counts = defaultdict(int)
+        chat_samples = []
+        
+        # 清空历史记录
+        self.trait_scores_history.clear()
+
+        # 分析每个聊天上下文
+        for chat_context in chat_contexts:
+            # 评估这段聊天内容的所有维度
+            scores = self.evaluate_chat_response(user_nickname, chat_context)
+            
+            # 记录样本
+            chat_samples.append({
+                "聊天内容": chat_context,
+                "评估维度": list(self.personality_traits.keys()),
+                "评分": scores
+            })
+
+            # 更新总分和历史记录
+            for dimension, score in scores.items():
+                if score > 0:  # 只统计大于0的有效分数
+                    final_scores[dimension] += score
+                    dimension_counts[dimension] += 1
+                self.trait_scores_history[dimension].append(score)
+
+        # 计算平均分
+        average_scores = {}
+        for dimension in self.personality_traits:
+            if dimension_counts[dimension] > 0:
+                average_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
+            else:
+                average_scores[dimension] = 0  # 如果没有有效分数，返回0
+
+        # 生成趋势图
+        self._generate_trend_plot(qq_id, user_nickname)
+
+        result = {
+            "用户QQ": qq_id,
+            "用户昵称": user_nickname,
+            "样本数量": len(chat_samples),
+            "人格特征评分": average_scores,
+            "维度评估次数": dict(dimension_counts),
+            "详细样本": chat_samples,
+            "特质得分历史": {k: v for k, v in self.trait_scores_history.items()}
+        }
+
+        # 保存结果
+        os.makedirs("results", exist_ok=True)
+        result_file = f"results/personality_result_{qq_id}.json"
+        with open(result_file, "w", encoding="utf-8") as f:
+            json.dump(result, f, ensure_ascii=False, indent=2)
+
+        return result
+
+    def _generate_trend_plot(self, qq_id: str, user_nickname: str):
+        """
+        生成人格特质累计平均分变化趋势图
+        """
+        # 查找系统中可用的中文字体
+        chinese_fonts = []
+        for f in fm.fontManager.ttflist:
+            try:
+                if '简' in f.name or 'SC' in f.name or '黑' in f.name or '宋' in f.name or '微软' in f.name:
+                    chinese_fonts.append(f.name)
+            except Exception:
+                continue
+        
+        if chinese_fonts:
+            plt.rcParams['font.sans-serif'] = chinese_fonts + ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
+        else:
+            # 如果没有找到中文字体，使用默认字体，并将中文昵称转换为拼音或英文
+            try:
+                from pypinyin import lazy_pinyin
+                user_nickname = ''.join(lazy_pinyin(user_nickname))
+            except ImportError:
+                user_nickname = "User"  # 如果无法转换为拼音，使用默认英文
+                
+        plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
+        
+        plt.figure(figsize=(12, 6))
+        plt.style.use('bmh')  # 使用内置的bmh样式，它有类似seaborn的美观效果
+        
+        colors = {
+            "开放性": "#FF9999",
+            "严谨性": "#66B2FF",
+            "外向性": "#99FF99",
+            "宜人性": "#FFCC99",
+            "神经质": "#FF99CC"
+        }
+        
+        # 计算每个维度在每个时间点的累计平均分
+        cumulative_averages = {}
+        for trait, scores in self.trait_scores_history.items():
+            if not scores:
+                continue
+                
+            averages = []
+            total = 0
+            valid_count = 0
+            for score in scores:
+                if score > 0:  # 只计算大于0的有效分数
+                    total += score
+                    valid_count += 1
+                    if valid_count > 0:
+                        averages.append(total / valid_count)
+                else:
+                    # 如果当前分数无效，使用前一个有效的平均分
+                    if averages:
+                        averages.append(averages[-1])
+                    else:
+                        continue  # 跳过无效分数
+                        
+            if averages:  # 只有在有有效分数的情况下才添加到累计平均中
+                cumulative_averages[trait] = averages
+        
+        # 绘制每个维度的累计平均分变化趋势
+        for trait, averages in cumulative_averages.items():
+            x = range(1, len(averages) + 1)
+            plt.plot(x, averages, 'o-', label=trait, color=colors.get(trait), linewidth=2, markersize=8)
+            
+            # 添加趋势线
+            z = np.polyfit(x, averages, 1)
+            p = np.poly1d(z)
+            plt.plot(x, p(x), '--', color=colors.get(trait), alpha=0.5)
+
+        plt.title(f"{user_nickname} 的人格特质累计平均分变化趋势", fontsize=14, pad=20)
+        plt.xlabel("评估次数", fontsize=12)
+        plt.ylabel("累计平均分", fontsize=12)
+        plt.grid(True, linestyle='--', alpha=0.7)
+        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+        plt.ylim(0, 7)
+        plt.tight_layout()
+
+        # 保存图表
+        os.makedirs("results/plots", exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        plot_file = f"results/plots/personality_trend_{qq_id}_{timestamp}.png"
+        plt.savefig(plot_file, dpi=300, bbox_inches='tight')
+        plt.close()
+
+def analyze_user_personality(qq_id: str, num_samples: int = 10, context_length: int = 5) -> str:
+    """
+    分析用户人格特征的便捷函数
+    
+    Args:
+        qq_id (str): 用户QQ号
+        num_samples (int): 要分析的聊天片段数量
+        context_length (int): 每个聊天片段的上下文长度
+    
+    Returns:
+        str: 格式化的分析结果
+    """
+    evaluator = ChatBasedPersonalityEvaluator()
+    result = evaluator.evaluate_user_personality(qq_id, num_samples, context_length)
+    
+    if "error" in result:
+        return result["error"]
+    
+    # 格式化输出
+    output = f"QQ号 {qq_id} ({result['用户昵称']}) 的人格特征分析结果：\n"
+    output += "=" * 50 + "\n\n"
+    
+    output += "人格特征评分：\n"
+    for trait, score in result["人格特征评分"].items():
+        if score == 0:
+            output += f"{trait}: 数据不足，无法判断 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
+        else:
+            output += f"{trait}: {score}/6 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
+        
+        # 添加变化趋势描述
+        if trait in result["特质得分历史"] and len(result["特质得分历史"][trait]) > 1:
+            scores = [s for s in result["特质得分历史"][trait] if s != 0]  # 过滤掉无效分数
+            if len(scores) > 1:  # 确保有足够的有效分数计算趋势
+                trend = np.polyfit(range(len(scores)), scores, 1)[0]
+                if abs(trend) < 0.1:
+                    trend_desc = "保持稳定"
+                elif trend > 0:
+                    trend_desc = "呈上升趋势"
+                else:
+                    trend_desc = "呈下降趋势"
+                output += f"    变化趋势: {trend_desc} (斜率: {trend:.2f})\n"
+    
+    output += f"\n分析样本数量：{result['样本数量']}\n"
+    output += f"结果已保存至：results/personality_result_{qq_id}.json\n"
+    output += "变化趋势图已保存至：results/plots/目录\n"
+    
+    return output
+
+if __name__ == "__main__":
+    # 测试代码
+    # test_qq = ""  # 替换为要测试的QQ号
+    # print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
+    # test_qq = ""
+    # print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
+    test_qq = "1026294844"
+    print(analyze_user_personality(test_qq, num_samples=30, context_length=30))