Files
mai-bot/src/plugins/personality/can_i_recog_u.py
2025-03-23 00:01:26 +08:00

352 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
基于聊天记录的人格特征分析系统
"""
from typing import Dict, List
import json
import os
from pathlib import Path
from dotenv import load_dotenv
import sys
import random
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import matplotlib.font_manager as fm
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent.parent
env_path = project_root / ".env.prod"
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
sys.path.append(root_path)
from src.plugins.personality.scene import get_scene_by_factor, PERSONALITY_SCENES # noqa: E402
from src.plugins.personality.questionnaire import FACTOR_DESCRIPTIONS # noqa: E402
from src.plugins.personality.offline_llm import LLMModel # noqa: E402
from src.plugins.personality.who_r_u import MessageAnalyzer # noqa: E402
# 加载环境变量
if env_path.exists():
print(f"{env_path} 加载环境变量")
load_dotenv(env_path)
else:
print(f"未找到环境变量文件: {env_path}")
print("将使用默认配置")
class ChatBasedPersonalityEvaluator:
def __init__(self):
self.personality_traits = {"开放性": 0, "严谨性": 0, "外向性": 0, "宜人性": 0, "神经质": 0}
self.scenarios = []
self.message_analyzer = MessageAnalyzer()
self.llm = LLMModel()
self.trait_scores_history = defaultdict(list) # 记录每个特质的得分历史
# 为每个人格特质获取对应的场景
for trait in PERSONALITY_SCENES:
scenes = get_scene_by_factor(trait)
if not scenes:
continue
scene_keys = list(scenes.keys())
selected_scenes = random.sample(scene_keys, min(3, len(scene_keys)))
for scene_key in selected_scenes:
scene = scenes[scene_key]
other_traits = [t for t in PERSONALITY_SCENES if t != trait]
secondary_trait = random.choice(other_traits)
self.scenarios.append({
"场景": scene["scenario"],
"评估维度": [trait, secondary_trait],
"场景编号": scene_key
})
def analyze_chat_context(self, messages: List[Dict]) -> str:
"""
分析一组消息的上下文,生成场景描述
"""
context = ""
for msg in messages:
nickname = msg.get('user_info', {}).get('user_nickname', '未知用户')
content = msg.get('processed_plain_text', msg.get('detailed_plain_text', ''))
if content:
context += f"{nickname}: {content}\n"
return context
def evaluate_chat_response(
self, user_nickname: str, chat_context: str, dimensions: List[str] = None) -> Dict[str, float]:
"""
评估聊天内容在各个人格维度上的得分
"""
# 使用所有维度进行评估
dimensions = list(self.personality_traits.keys())
dimension_descriptions = []
for dim in dimensions:
desc = FACTOR_DESCRIPTIONS.get(dim, "")
if desc:
dimension_descriptions.append(f"- {dim}{desc}")
dimensions_text = "\n".join(dimension_descriptions)
prompt = f"""请根据以下聊天记录,评估"{user_nickname}"在大五人格模型中的维度得分1-6分
聊天记录:
{chat_context}
需要评估的维度说明:
{dimensions_text}
请按照以下格式输出评估结果,注意,你的评价对象是"{user_nickname}"仅输出JSON格式
{{
"开放性": 分数,
"严谨性": 分数,
"外向性": 分数,
"宜人性": 分数,
"神经质": 分数
}}
评分标准:
1 = 非常不符合该维度特征
2 = 比较不符合该维度特征
3 = 有点不符合该维度特征
4 = 有点符合该维度特征
5 = 比较符合该维度特征
6 = 非常符合该维度特征
如果你觉得某个维度没有相关信息或者无法判断请输出0分
请根据聊天记录的内容和语气结合维度说明进行评分。如果维度可以评分确保分数在1-6之间。如果没有体现请输出0分"""
try:
ai_response, _ = self.llm.generate_response(prompt)
start_idx = ai_response.find("{")
end_idx = ai_response.rfind("}") + 1
if start_idx != -1 and end_idx != 0:
json_str = ai_response[start_idx:end_idx]
scores = json.loads(json_str)
return {k: max(0, min(6, float(v))) for k, v in scores.items()}
else:
print("AI响应格式不正确使用默认评分")
return {dim: 0 for dim in dimensions}
except Exception as e:
print(f"评估过程出错:{str(e)}")
return {dim: 0 for dim in dimensions}
def evaluate_user_personality(self, qq_id: str, num_samples: int = 10, context_length: int = 5) -> Dict:
"""
基于用户的聊天记录评估人格特征
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
Dict: 评估结果
"""
# 获取用户的随机消息及其上下文
chat_contexts, user_nickname = self.message_analyzer.get_user_random_contexts(
qq_id, num_messages=num_samples, context_length=context_length)
if not chat_contexts:
return {"error": f"没有找到QQ号 {qq_id} 的消息记录"}
# 初始化评分
final_scores = defaultdict(float)
dimension_counts = defaultdict(int)
chat_samples = []
# 清空历史记录
self.trait_scores_history.clear()
# 分析每个聊天上下文
for chat_context in chat_contexts:
# 评估这段聊天内容的所有维度
scores = self.evaluate_chat_response(user_nickname, chat_context)
# 记录样本
chat_samples.append({
"聊天内容": chat_context,
"评估维度": list(self.personality_traits.keys()),
"评分": scores
})
# 更新总分和历史记录
for dimension, score in scores.items():
if score > 0: # 只统计大于0的有效分数
final_scores[dimension] += score
dimension_counts[dimension] += 1
self.trait_scores_history[dimension].append(score)
# 计算平均分
average_scores = {}
for dimension in self.personality_traits:
if dimension_counts[dimension] > 0:
average_scores[dimension] = round(final_scores[dimension] / dimension_counts[dimension], 2)
else:
average_scores[dimension] = 0 # 如果没有有效分数返回0
# 生成趋势图
self._generate_trend_plot(qq_id, user_nickname)
result = {
"用户QQ": qq_id,
"用户昵称": user_nickname,
"样本数量": len(chat_samples),
"人格特征评分": average_scores,
"维度评估次数": dict(dimension_counts),
"详细样本": chat_samples,
"特质得分历史": {k: v for k, v in self.trait_scores_history.items()}
}
# 保存结果
os.makedirs("results", exist_ok=True)
result_file = f"results/personality_result_{qq_id}.json"
with open(result_file, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
return result
def _generate_trend_plot(self, qq_id: str, user_nickname: str):
"""
生成人格特质累计平均分变化趋势图
"""
# 查找系统中可用的中文字体
chinese_fonts = []
for f in fm.fontManager.ttflist:
try:
if '' in f.name or 'SC' in f.name or '' in f.name or '' in f.name or '微软' in f.name:
chinese_fonts.append(f.name)
except Exception:
continue
if chinese_fonts:
plt.rcParams['font.sans-serif'] = chinese_fonts + ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
else:
# 如果没有找到中文字体,使用默认字体,并将中文昵称转换为拼音或英文
try:
from pypinyin import lazy_pinyin
user_nickname = ''.join(lazy_pinyin(user_nickname))
except ImportError:
user_nickname = "User" # 如果无法转换为拼音,使用默认英文
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
plt.figure(figsize=(12, 6))
plt.style.use('bmh') # 使用内置的bmh样式它有类似seaborn的美观效果
colors = {
"开放性": "#FF9999",
"严谨性": "#66B2FF",
"外向性": "#99FF99",
"宜人性": "#FFCC99",
"神经质": "#FF99CC"
}
# 计算每个维度在每个时间点的累计平均分
cumulative_averages = {}
for trait, scores in self.trait_scores_history.items():
if not scores:
continue
averages = []
total = 0
valid_count = 0
for score in scores:
if score > 0: # 只计算大于0的有效分数
total += score
valid_count += 1
if valid_count > 0:
averages.append(total / valid_count)
else:
# 如果当前分数无效,使用前一个有效的平均分
if averages:
averages.append(averages[-1])
else:
continue # 跳过无效分数
if averages: # 只有在有有效分数的情况下才添加到累计平均中
cumulative_averages[trait] = averages
# 绘制每个维度的累计平均分变化趋势
for trait, averages in cumulative_averages.items():
x = range(1, len(averages) + 1)
plt.plot(x, averages, 'o-', label=trait, color=colors.get(trait), linewidth=2, markersize=8)
# 添加趋势线
z = np.polyfit(x, averages, 1)
p = np.poly1d(z)
plt.plot(x, p(x), '--', color=colors.get(trait), alpha=0.5)
plt.title(f"{user_nickname} 的人格特质累计平均分变化趋势", fontsize=14, pad=20)
plt.xlabel("评估次数", fontsize=12)
plt.ylabel("累计平均分", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.ylim(0, 7)
plt.tight_layout()
# 保存图表
os.makedirs("results/plots", exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
plot_file = f"results/plots/personality_trend_{qq_id}_{timestamp}.png"
plt.savefig(plot_file, dpi=300, bbox_inches='tight')
plt.close()
def analyze_user_personality(qq_id: str, num_samples: int = 10, context_length: int = 5) -> str:
"""
分析用户人格特征的便捷函数
Args:
qq_id (str): 用户QQ号
num_samples (int): 要分析的聊天片段数量
context_length (int): 每个聊天片段的上下文长度
Returns:
str: 格式化的分析结果
"""
evaluator = ChatBasedPersonalityEvaluator()
result = evaluator.evaluate_user_personality(qq_id, num_samples, context_length)
if "error" in result:
return result["error"]
# 格式化输出
output = f"QQ号 {qq_id} ({result['用户昵称']}) 的人格特征分析结果:\n"
output += "=" * 50 + "\n\n"
output += "人格特征评分:\n"
for trait, score in result["人格特征评分"].items():
if score == 0:
output += f"{trait}: 数据不足,无法判断 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
else:
output += f"{trait}: {score}/6 (评估次数: {result['维度评估次数'].get(trait, 0)})\n"
# 添加变化趋势描述
if trait in result["特质得分历史"] and len(result["特质得分历史"][trait]) > 1:
scores = [s for s in result["特质得分历史"][trait] if s != 0] # 过滤掉无效分数
if len(scores) > 1: # 确保有足够的有效分数计算趋势
trend = np.polyfit(range(len(scores)), scores, 1)[0]
if abs(trend) < 0.1:
trend_desc = "保持稳定"
elif trend > 0:
trend_desc = "呈上升趋势"
else:
trend_desc = "呈下降趋势"
output += f" 变化趋势: {trend_desc} (斜率: {trend:.2f})\n"
output += f"\n分析样本数量:{result['样本数量']}\n"
output += f"结果已保存至results/personality_result_{qq_id}.json\n"
output += "变化趋势图已保存至results/plots/目录\n"
return output
if __name__ == "__main__":
# 测试代码
# test_qq = "" # 替换为要测试的QQ号
# print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
# test_qq = ""
# print(analyze_user_personality(test_qq, num_samples=30, context_length=20))
test_qq = "1026294844"
print(analyze_user_personality(test_qq, num_samples=30, context_length=30))