feat:可以对不同chat自定义一段额外prompt
This commit is contained in:
@@ -940,13 +940,13 @@ class EmojiManager:
|
||||
image_base64 = get_image_manager().transform_gif(image_base64) # type: ignore
|
||||
if not image_base64:
|
||||
raise RuntimeError("GIF表情包转换失败")
|
||||
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析"
|
||||
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,从互联网梗,meme的角度去分析,精简回答"
|
||||
description, _ = await self.vlm.generate_response_for_image(
|
||||
prompt, image_base64, "jpg", temperature=0.5
|
||||
)
|
||||
else:
|
||||
prompt = (
|
||||
"这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析"
|
||||
"这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析,精简回答"
|
||||
)
|
||||
description, _ = await self.vlm.generate_response_for_image(
|
||||
prompt, image_base64, image_format, temperature=0.5
|
||||
|
||||
@@ -17,12 +17,12 @@ from src.chat.planner_actions.planner import ActionPlanner
|
||||
from src.chat.planner_actions.action_modifier import ActionModifier
|
||||
from src.chat.planner_actions.action_manager import ActionManager
|
||||
from src.chat.heart_flow.hfc_utils import CycleDetail
|
||||
from src.chat.heart_flow.hfc_utils import send_typing, stop_typing
|
||||
from src.express.expression_learner import expression_learner_manager
|
||||
from src.chat.frequency_control.frequency_control import frequency_control_manager
|
||||
from src.memory_system.question_maker import QuestionMaker
|
||||
from src.memory_system.questions import global_conflict_tracker
|
||||
from src.memory_system.curious import check_and_make_question
|
||||
from src.jargon import extract_and_store_jargon
|
||||
from src.person_info.person_info import Person
|
||||
from src.plugin_system.base.component_types import EventType, ActionInfo
|
||||
from src.plugin_system.core import events_manager
|
||||
@@ -336,7 +336,9 @@ class HeartFChatting:
|
||||
asyncio.create_task(frequency_control_manager.get_or_create_frequency_control(self.stream_id).trigger_frequency_adjust())
|
||||
|
||||
# 添加curious检测任务 - 检测聊天记录中的矛盾、冲突或需要提问的内容
|
||||
asyncio.create_task(check_and_make_question(self.stream_id, recent_messages_list))
|
||||
asyncio.create_task(check_and_make_question(self.stream_id))
|
||||
# 添加jargon提取任务 - 提取聊天中的黑话/俚语并入库(内部自行取消息并带冷却)
|
||||
asyncio.create_task(extract_and_store_jargon(self.stream_id))
|
||||
|
||||
|
||||
cycle_timers, thinking_id = self.start_cycle()
|
||||
|
||||
@@ -20,6 +20,8 @@ logger = get_logger("database_model")
|
||||
|
||||
# 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。
|
||||
# 这允许您在一个地方为所有模型指定数据库。
|
||||
|
||||
|
||||
class BaseModel(Model):
|
||||
class Meta:
|
||||
# 将下面的 'db' 替换为您实际的数据库实例变量名。
|
||||
@@ -343,30 +345,45 @@ class MemoryConflict(BaseModel):
|
||||
|
||||
class Meta:
|
||||
table_name = "memory_conflicts"
|
||||
|
||||
class Jargon(BaseModel):
|
||||
"""
|
||||
用于存储俚语的模型
|
||||
"""
|
||||
content = TextField()
|
||||
raw_content = TextField(null=True)
|
||||
type = TextField(null=True)
|
||||
translation = TextField(null=True)
|
||||
meaning = TextField(null=True)
|
||||
chat_id = TextField(index=True)
|
||||
is_global = BooleanField(default=False)
|
||||
count = IntegerField(default=0)
|
||||
|
||||
class Meta:
|
||||
table_name = "jargon"
|
||||
|
||||
|
||||
MODELS = [
|
||||
ChatStreams,
|
||||
LLMUsage,
|
||||
Emoji,
|
||||
Messages,
|
||||
Images,
|
||||
ImageDescriptions,
|
||||
OnlineTime,
|
||||
PersonInfo,
|
||||
Expression,
|
||||
ActionRecords,
|
||||
MemoryChest,
|
||||
MemoryConflict,
|
||||
Jargon,
|
||||
]
|
||||
|
||||
def create_tables():
|
||||
"""
|
||||
创建所有在模型中定义的数据库表。
|
||||
"""
|
||||
with db:
|
||||
db.create_tables(
|
||||
[
|
||||
ChatStreams,
|
||||
LLMUsage,
|
||||
Emoji,
|
||||
Messages,
|
||||
Images,
|
||||
ImageDescriptions,
|
||||
OnlineTime,
|
||||
PersonInfo,
|
||||
Expression,
|
||||
ActionRecords, # 添加 ActionRecords 到初始化列表
|
||||
MemoryChest,
|
||||
MemoryConflict, # 添加记忆冲突表
|
||||
]
|
||||
)
|
||||
db.create_tables(MODELS)
|
||||
|
||||
|
||||
def initialize_database(sync_constraints=False):
|
||||
@@ -379,24 +396,9 @@ def initialize_database(sync_constraints=False):
|
||||
如果为 True,会检查并修复字段的 NULL 约束不一致问题。
|
||||
"""
|
||||
|
||||
models = [
|
||||
ChatStreams,
|
||||
LLMUsage,
|
||||
Emoji,
|
||||
Messages,
|
||||
Images,
|
||||
ImageDescriptions,
|
||||
OnlineTime,
|
||||
PersonInfo,
|
||||
Expression,
|
||||
ActionRecords, # 添加 ActionRecords 到初始化列表
|
||||
MemoryChest,
|
||||
MemoryConflict,
|
||||
]
|
||||
|
||||
try:
|
||||
with db: # 管理 table_exists 检查的连接
|
||||
for model in models:
|
||||
for model in MODELS:
|
||||
table_name = model._meta.table_name
|
||||
if not db.table_exists(model):
|
||||
logger.warning(f"表 '{table_name}' 未找到,正在创建...")
|
||||
@@ -476,24 +478,9 @@ def sync_field_constraints():
|
||||
如果发现不一致,会自动修复字段约束。
|
||||
"""
|
||||
|
||||
models = [
|
||||
ChatStreams,
|
||||
LLMUsage,
|
||||
Emoji,
|
||||
Messages,
|
||||
Images,
|
||||
ImageDescriptions,
|
||||
OnlineTime,
|
||||
PersonInfo,
|
||||
Expression,
|
||||
ActionRecords,
|
||||
MemoryChest,
|
||||
MemoryConflict,
|
||||
]
|
||||
|
||||
try:
|
||||
with db:
|
||||
for model in models:
|
||||
for model in MODELS:
|
||||
table_name = model._meta.table_name
|
||||
if not db.table_exists(model):
|
||||
logger.warning(f"表 '{table_name}' 不存在,跳过约束检查")
|
||||
@@ -660,26 +647,11 @@ def check_field_constraints():
|
||||
用于在修复前预览需要修复的内容。
|
||||
"""
|
||||
|
||||
models = [
|
||||
ChatStreams,
|
||||
LLMUsage,
|
||||
Emoji,
|
||||
Messages,
|
||||
Images,
|
||||
ImageDescriptions,
|
||||
OnlineTime,
|
||||
PersonInfo,
|
||||
Expression,
|
||||
ActionRecords,
|
||||
MemoryChest,
|
||||
MemoryConflict,
|
||||
]
|
||||
|
||||
inconsistencies = {}
|
||||
|
||||
try:
|
||||
with db:
|
||||
for model in models:
|
||||
for model in MODELS:
|
||||
table_name = model._meta.table_name
|
||||
if not db.table_exists(model):
|
||||
continue
|
||||
|
||||
7
src/jargon/__init__.py
Normal file
7
src/jargon/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from .jargon_miner import extract_and_store_jargon
|
||||
|
||||
__all__ = [
|
||||
"extract_and_store_jargon",
|
||||
]
|
||||
|
||||
|
||||
230
src/jargon/jargon_miner.py
Normal file
230
src/jargon/jargon_miner.py
Normal file
@@ -0,0 +1,230 @@
|
||||
import time
|
||||
import json
|
||||
from typing import List
|
||||
from json_repair import repair_json
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Jargon
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import model_config
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
from src.chat.utils.chat_message_builder import (
|
||||
build_anonymous_messages,
|
||||
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||
)
|
||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||
|
||||
|
||||
logger = get_logger("jargon")
|
||||
|
||||
|
||||
def _init_prompt() -> None:
|
||||
prompt_str = """
|
||||
**聊天内容**
|
||||
{chat_str}
|
||||
|
||||
请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
|
||||
- 必须为对话中真实出现过的短词或短语
|
||||
- 必须是你无法理解含义的词语,或者出现频率较高的词语
|
||||
- 必须是这几种类别之一:英文或中文缩写、中文拼音短语、字母数字混合、意义不明但频繁的词汇
|
||||
- 排除:人名、@、明显的表情/图片占位、纯标点、常规功能词(如的、了、呢、啊等)
|
||||
- 每个词条长度建议 2-8 个字符(不强制),尽量短小
|
||||
- 合并重复项,去重
|
||||
|
||||
分类规则:
|
||||
- p(拼音缩写):由字母或字母和汉字构成的,疑似拼音简写词,例如:nb、yyds、xswl
|
||||
- c(中文缩写):中文词语的缩写,用几个汉字概括一个词汇或含义,例如:社死、内卷
|
||||
- e(英文缩写):英文词语的缩写,用英文字母概括一个词汇或含义,例如:CPU、GPU、API
|
||||
|
||||
以 JSON 数组输出,元素为对象(严格按以下结构):
|
||||
[
|
||||
{{"content": "词条", "raw_content": "包含该词条的完整句子", "type": "p"}},
|
||||
{{"content": "词条2", "raw_content": "包含该词条的完整句子", "type": "c"}}
|
||||
]
|
||||
|
||||
现在请输出:
|
||||
"""
|
||||
Prompt(prompt_str, "extract_jargon_prompt")
|
||||
|
||||
|
||||
_init_prompt()
|
||||
|
||||
|
||||
class JargonMiner:
|
||||
def __init__(self, chat_id: str) -> None:
|
||||
self.chat_id = chat_id
|
||||
self.last_learning_time: float = time.time()
|
||||
# 频率控制,可按需调整
|
||||
self.min_messages_for_learning: int = 20
|
||||
self.min_learning_interval: float = 30
|
||||
|
||||
self.llm = LLMRequest(
|
||||
model_set=model_config.model_task_config.utils,
|
||||
request_type="jargon.extract",
|
||||
)
|
||||
|
||||
def should_trigger(self) -> bool:
|
||||
# 冷却时间检查
|
||||
if time.time() - self.last_learning_time < self.min_learning_interval:
|
||||
return False
|
||||
|
||||
# 拉取最近消息数量是否足够
|
||||
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||
chat_id=self.chat_id,
|
||||
timestamp_start=self.last_learning_time,
|
||||
timestamp_end=time.time(),
|
||||
)
|
||||
return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning)
|
||||
|
||||
async def run_once(self) -> None:
|
||||
try:
|
||||
if not self.should_trigger():
|
||||
return
|
||||
|
||||
chat_stream = get_chat_manager().get_stream(self.chat_id)
|
||||
if not chat_stream:
|
||||
return
|
||||
|
||||
# 拉取学习窗口内的消息
|
||||
messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||
chat_id=self.chat_id,
|
||||
timestamp_start=self.last_learning_time,
|
||||
timestamp_end=time.time(),
|
||||
limit=20,
|
||||
)
|
||||
if not messages:
|
||||
return
|
||||
|
||||
chat_str: str = await build_anonymous_messages(messages)
|
||||
if not chat_str.strip():
|
||||
return
|
||||
|
||||
prompt: str = await global_prompt_manager.format_prompt(
|
||||
"extract_jargon_prompt",
|
||||
chat_str=chat_str,
|
||||
)
|
||||
|
||||
response, _ = await self.llm.generate_response_async(prompt, temperature=0.2)
|
||||
if not response:
|
||||
return
|
||||
|
||||
logger.info(f"jargon提取提示词: {prompt}")
|
||||
logger.info(f"jargon提取结果: {response}")
|
||||
|
||||
# 解析为JSON
|
||||
entries: List[dict] = []
|
||||
try:
|
||||
resp = response.strip()
|
||||
parsed = None
|
||||
if resp.startswith("[") and resp.endswith("]"):
|
||||
parsed = json.loads(resp)
|
||||
else:
|
||||
repaired = repair_json(resp)
|
||||
if isinstance(repaired, str):
|
||||
parsed = json.loads(repaired)
|
||||
else:
|
||||
parsed = repaired
|
||||
|
||||
if isinstance(parsed, dict):
|
||||
parsed = [parsed]
|
||||
|
||||
if not isinstance(parsed, list):
|
||||
return
|
||||
|
||||
for item in parsed:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
content = str(item.get("content", "")).strip()
|
||||
raw_content = str(item.get("raw_content", "")).strip()
|
||||
type_str = str(item.get("type", "")).strip().lower()
|
||||
|
||||
# 验证type是否为有效值
|
||||
if type_str not in ["p", "c", "e"]:
|
||||
type_str = "p" # 默认值
|
||||
|
||||
if content:
|
||||
entries.append({
|
||||
"content": content,
|
||||
"raw_content": raw_content,
|
||||
"type": type_str
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"解析jargon JSON失败: {e}; 原始: {response}")
|
||||
return
|
||||
|
||||
if not entries:
|
||||
return
|
||||
|
||||
# 去重并写入DB(按 chat_id + content 去重)
|
||||
# 使用content作为去重键
|
||||
seen = set()
|
||||
uniq_entries = []
|
||||
for entry in entries:
|
||||
content_key = entry["content"]
|
||||
if content_key not in seen:
|
||||
seen.add(content_key)
|
||||
uniq_entries.append(entry)
|
||||
|
||||
saved = 0
|
||||
updated = 0
|
||||
for entry in uniq_entries:
|
||||
content = entry["content"]
|
||||
raw_content = entry["raw_content"]
|
||||
type_str = entry["type"]
|
||||
try:
|
||||
query = (
|
||||
Jargon.select()
|
||||
.where((Jargon.chat_id == self.chat_id) & (Jargon.content == content))
|
||||
)
|
||||
if query.exists():
|
||||
obj = query.get()
|
||||
try:
|
||||
obj.count = (obj.count or 0) + 1
|
||||
except Exception:
|
||||
obj.count = 1
|
||||
# 更新raw_content和type(如果为空或需要更新)
|
||||
if raw_content and not obj.raw_content:
|
||||
obj.raw_content = raw_content
|
||||
if type_str and not obj.type:
|
||||
obj.type = type_str
|
||||
obj.save()
|
||||
updated += 1
|
||||
else:
|
||||
Jargon.create(
|
||||
content=content,
|
||||
raw_content=raw_content,
|
||||
type=type_str,
|
||||
chat_id=self.chat_id,
|
||||
is_global=False,
|
||||
count=1
|
||||
)
|
||||
saved += 1
|
||||
except Exception as e:
|
||||
logger.error(f"保存jargon失败: chat_id={self.chat_id}, content={content}, err={e}")
|
||||
continue
|
||||
|
||||
if saved or updated:
|
||||
logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated} 条,chat_id={self.chat_id}")
|
||||
self.last_learning_time = time.time()
|
||||
except Exception as e:
|
||||
logger.error(f"JargonMiner 运行失败: {e}")
|
||||
|
||||
|
||||
class JargonMinerManager:
|
||||
def __init__(self) -> None:
|
||||
self._miners: dict[str, JargonMiner] = {}
|
||||
|
||||
def get_miner(self, chat_id: str) -> JargonMiner:
|
||||
if chat_id not in self._miners:
|
||||
self._miners[chat_id] = JargonMiner(chat_id)
|
||||
return self._miners[chat_id]
|
||||
|
||||
|
||||
miner_manager = JargonMinerManager()
|
||||
|
||||
|
||||
async def extract_and_store_jargon(chat_id: str) -> None:
|
||||
miner = miner_manager.get_miner(chat_id)
|
||||
await miner.run_once()
|
||||
|
||||
|
||||
@@ -444,7 +444,7 @@ def _default_normal_response_parser(
|
||||
choice0 = resp.choices[0]
|
||||
reason = getattr(choice0, "finish_reason", None)
|
||||
if reason and reason == "length":
|
||||
print(resp)
|
||||
# print(resp)
|
||||
_model_name = resp.model
|
||||
# 统一日志格式
|
||||
logger.info(
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import time
|
||||
import asyncio
|
||||
from typing import List, Optional, Tuple
|
||||
from typing import List, Optional
|
||||
from src.common.logger import get_logger
|
||||
from src.chat.utils.chat_message_builder import (
|
||||
get_raw_msg_before_timestamp_with_chat,
|
||||
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||
build_readable_messages_with_id,
|
||||
)
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
@@ -25,7 +24,21 @@ class CuriousDetector:
|
||||
model_set=model_config.model_task_config.utils,
|
||||
request_type="curious_detector",
|
||||
)
|
||||
# 触发控制
|
||||
self.last_detection_time: float = time.time()
|
||||
self.min_interval_seconds: float = 60.0
|
||||
self.min_messages: int = 20
|
||||
|
||||
def should_trigger(self) -> bool:
|
||||
if time.time() - self.last_detection_time < self.min_interval_seconds:
|
||||
return False
|
||||
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||
chat_id=self.chat_id,
|
||||
timestamp_start=self.last_detection_time,
|
||||
timestamp_end=time.time(),
|
||||
)
|
||||
return bool(recent_messages and len(recent_messages) >= self.min_messages)
|
||||
|
||||
async def detect_questions(self, recent_messages: List) -> Optional[str]:
|
||||
"""
|
||||
检测最近消息中是否有需要提问的内容
|
||||
@@ -91,6 +104,9 @@ class CuriousDetector:
|
||||
|
||||
result_text, _ = await self.llm_request.generate_response_async(prompt, temperature=0.3)
|
||||
|
||||
logger.info(f"好奇心检测提示词: {prompt}")
|
||||
logger.info(f"好奇心检测结果: {result_text}")
|
||||
|
||||
if not result_text:
|
||||
return None
|
||||
|
||||
@@ -154,7 +170,20 @@ class CuriousDetector:
|
||||
return False
|
||||
|
||||
|
||||
async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
|
||||
class CuriousManager:
|
||||
def __init__(self) -> None:
|
||||
self._detectors: dict[str, CuriousDetector] = {}
|
||||
|
||||
def get_detector(self, chat_id: str) -> CuriousDetector:
|
||||
if chat_id not in self._detectors:
|
||||
self._detectors[chat_id] = CuriousDetector(chat_id)
|
||||
return self._detectors[chat_id]
|
||||
|
||||
|
||||
curious_manager = CuriousManager()
|
||||
|
||||
|
||||
async def check_and_make_question(chat_id: str) -> bool:
|
||||
"""
|
||||
检查聊天记录并生成问题(如果检测到需要提问的内容)
|
||||
|
||||
@@ -166,8 +195,20 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
|
||||
bool: 是否检测到并记录了问题
|
||||
"""
|
||||
try:
|
||||
detector = CuriousDetector(chat_id)
|
||||
|
||||
detector = curious_manager.get_detector(chat_id)
|
||||
if not detector.should_trigger():
|
||||
return False
|
||||
|
||||
# 拉取窗口内消息
|
||||
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||
chat_id=chat_id,
|
||||
timestamp_start=detector.last_detection_time,
|
||||
timestamp_end=time.time(),
|
||||
limit=80,
|
||||
)
|
||||
if not recent_messages:
|
||||
return False
|
||||
|
||||
# 检测是否需要提问
|
||||
question = await detector.detect_questions(recent_messages)
|
||||
|
||||
@@ -176,6 +217,7 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
|
||||
success = await detector.make_question_from_detection(question)
|
||||
if success:
|
||||
logger.info(f"成功检测并记录问题: {question}")
|
||||
detector.last_detection_time = time.time()
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user