feat:可以对不同chat自定义一段额外prompt

This commit is contained in:
SengokuCola
2025-11-05 00:35:16 +08:00
parent a4d43e1aee
commit 03e06c282c
7 changed files with 329 additions and 76 deletions

View File

@@ -940,13 +940,13 @@ class EmojiManager:
image_base64 = get_image_manager().transform_gif(image_base64) # type: ignore
if not image_base64:
raise RuntimeError("GIF表情包转换失败")
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析"
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,从互联网梗,meme的角度去分析,精简回答"
description, _ = await self.vlm.generate_response_for_image(
prompt, image_base64, "jpg", temperature=0.5
)
else:
prompt = (
"这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析"
"这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析,精简回答"
)
description, _ = await self.vlm.generate_response_for_image(
prompt, image_base64, image_format, temperature=0.5

View File

@@ -17,12 +17,12 @@ from src.chat.planner_actions.planner import ActionPlanner
from src.chat.planner_actions.action_modifier import ActionModifier
from src.chat.planner_actions.action_manager import ActionManager
from src.chat.heart_flow.hfc_utils import CycleDetail
from src.chat.heart_flow.hfc_utils import send_typing, stop_typing
from src.express.expression_learner import expression_learner_manager
from src.chat.frequency_control.frequency_control import frequency_control_manager
from src.memory_system.question_maker import QuestionMaker
from src.memory_system.questions import global_conflict_tracker
from src.memory_system.curious import check_and_make_question
from src.jargon import extract_and_store_jargon
from src.person_info.person_info import Person
from src.plugin_system.base.component_types import EventType, ActionInfo
from src.plugin_system.core import events_manager
@@ -336,7 +336,9 @@ class HeartFChatting:
asyncio.create_task(frequency_control_manager.get_or_create_frequency_control(self.stream_id).trigger_frequency_adjust())
# 添加curious检测任务 - 检测聊天记录中的矛盾、冲突或需要提问的内容
asyncio.create_task(check_and_make_question(self.stream_id, recent_messages_list))
asyncio.create_task(check_and_make_question(self.stream_id))
# 添加jargon提取任务 - 提取聊天中的黑话/俚语并入库(内部自行取消息并带冷却)
asyncio.create_task(extract_and_store_jargon(self.stream_id))
cycle_timers, thinking_id = self.start_cycle()

View File

@@ -20,6 +20,8 @@ logger = get_logger("database_model")
# 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。
# 这允许您在一个地方为所有模型指定数据库。
class BaseModel(Model):
class Meta:
# 将下面的 'db' 替换为您实际的数据库实例变量名。
@@ -343,30 +345,45 @@ class MemoryConflict(BaseModel):
class Meta:
table_name = "memory_conflicts"
class Jargon(BaseModel):
"""
用于存储俚语的模型
"""
content = TextField()
raw_content = TextField(null=True)
type = TextField(null=True)
translation = TextField(null=True)
meaning = TextField(null=True)
chat_id = TextField(index=True)
is_global = BooleanField(default=False)
count = IntegerField(default=0)
class Meta:
table_name = "jargon"
MODELS = [
ChatStreams,
LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
OnlineTime,
PersonInfo,
Expression,
ActionRecords,
MemoryChest,
MemoryConflict,
Jargon,
]
def create_tables():
"""
创建所有在模型中定义的数据库表。
"""
with db:
db.create_tables(
[
ChatStreams,
LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
OnlineTime,
PersonInfo,
Expression,
ActionRecords, # 添加 ActionRecords 到初始化列表
MemoryChest,
MemoryConflict, # 添加记忆冲突表
]
)
db.create_tables(MODELS)
def initialize_database(sync_constraints=False):
@@ -379,24 +396,9 @@ def initialize_database(sync_constraints=False):
如果为 True会检查并修复字段的 NULL 约束不一致问题。
"""
models = [
ChatStreams,
LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
OnlineTime,
PersonInfo,
Expression,
ActionRecords, # 添加 ActionRecords 到初始化列表
MemoryChest,
MemoryConflict,
]
try:
with db: # 管理 table_exists 检查的连接
for model in models:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
logger.warning(f"'{table_name}' 未找到,正在创建...")
@@ -476,24 +478,9 @@ def sync_field_constraints():
如果发现不一致,会自动修复字段约束。
"""
models = [
ChatStreams,
LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
OnlineTime,
PersonInfo,
Expression,
ActionRecords,
MemoryChest,
MemoryConflict,
]
try:
with db:
for model in models:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
logger.warning(f"'{table_name}' 不存在,跳过约束检查")
@@ -660,26 +647,11 @@ def check_field_constraints():
用于在修复前预览需要修复的内容。
"""
models = [
ChatStreams,
LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
OnlineTime,
PersonInfo,
Expression,
ActionRecords,
MemoryChest,
MemoryConflict,
]
inconsistencies = {}
try:
with db:
for model in models:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
continue

7
src/jargon/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
from .jargon_miner import extract_and_store_jargon
__all__ = [
"extract_and_store_jargon",
]

230
src/jargon/jargon_miner.py Normal file
View File

@@ -0,0 +1,230 @@
import time
import json
from typing import List
from json_repair import repair_json
from src.common.logger import get_logger
from src.common.database.database_model import Jargon
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
from src.chat.message_receive.chat_stream import get_chat_manager
from src.chat.utils.chat_message_builder import (
build_anonymous_messages,
get_raw_msg_by_timestamp_with_chat_inclusive,
)
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
logger = get_logger("jargon")
def _init_prompt() -> None:
prompt_str = """
**聊天内容**
{chat_str}
请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
- 必须为对话中真实出现过的短词或短语
- 必须是你无法理解含义的词语,或者出现频率较高的词语
- 必须是这几种类别之一:英文或中文缩写、中文拼音短语、字母数字混合、意义不明但频繁的词汇
- 排除:人名、@、明显的表情/图片占位、纯标点、常规功能词(如的、了、呢、啊等)
- 每个词条长度建议 2-8 个字符(不强制),尽量短小
- 合并重复项,去重
分类规则:
- p拼音缩写由字母或字母和汉字构成的疑似拼音简写词例如nb、yyds、xswl
- c中文缩写中文词语的缩写用几个汉字概括一个词汇或含义例如社死、内卷
- e英文缩写英文词语的缩写用英文字母概括一个词汇或含义例如CPU、GPU、API
以 JSON 数组输出,元素为对象(严格按以下结构):
[
{{"content": "词条", "raw_content": "包含该词条的完整句子", "type": "p"}},
{{"content": "词条2", "raw_content": "包含该词条的完整句子", "type": "c"}}
]
现在请输出:
"""
Prompt(prompt_str, "extract_jargon_prompt")
_init_prompt()
class JargonMiner:
def __init__(self, chat_id: str) -> None:
self.chat_id = chat_id
self.last_learning_time: float = time.time()
# 频率控制,可按需调整
self.min_messages_for_learning: int = 20
self.min_learning_interval: float = 30
self.llm = LLMRequest(
model_set=model_config.model_task_config.utils,
request_type="jargon.extract",
)
def should_trigger(self) -> bool:
# 冷却时间检查
if time.time() - self.last_learning_time < self.min_learning_interval:
return False
# 拉取最近消息数量是否足够
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id,
timestamp_start=self.last_learning_time,
timestamp_end=time.time(),
)
return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning)
async def run_once(self) -> None:
try:
if not self.should_trigger():
return
chat_stream = get_chat_manager().get_stream(self.chat_id)
if not chat_stream:
return
# 拉取学习窗口内的消息
messages = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id,
timestamp_start=self.last_learning_time,
timestamp_end=time.time(),
limit=20,
)
if not messages:
return
chat_str: str = await build_anonymous_messages(messages)
if not chat_str.strip():
return
prompt: str = await global_prompt_manager.format_prompt(
"extract_jargon_prompt",
chat_str=chat_str,
)
response, _ = await self.llm.generate_response_async(prompt, temperature=0.2)
if not response:
return
logger.info(f"jargon提取提示词: {prompt}")
logger.info(f"jargon提取结果: {response}")
# 解析为JSON
entries: List[dict] = []
try:
resp = response.strip()
parsed = None
if resp.startswith("[") and resp.endswith("]"):
parsed = json.loads(resp)
else:
repaired = repair_json(resp)
if isinstance(repaired, str):
parsed = json.loads(repaired)
else:
parsed = repaired
if isinstance(parsed, dict):
parsed = [parsed]
if not isinstance(parsed, list):
return
for item in parsed:
if not isinstance(item, dict):
continue
content = str(item.get("content", "")).strip()
raw_content = str(item.get("raw_content", "")).strip()
type_str = str(item.get("type", "")).strip().lower()
# 验证type是否为有效值
if type_str not in ["p", "c", "e"]:
type_str = "p" # 默认值
if content:
entries.append({
"content": content,
"raw_content": raw_content,
"type": type_str
})
except Exception as e:
logger.error(f"解析jargon JSON失败: {e}; 原始: {response}")
return
if not entries:
return
# 去重并写入DB按 chat_id + content 去重)
# 使用content作为去重键
seen = set()
uniq_entries = []
for entry in entries:
content_key = entry["content"]
if content_key not in seen:
seen.add(content_key)
uniq_entries.append(entry)
saved = 0
updated = 0
for entry in uniq_entries:
content = entry["content"]
raw_content = entry["raw_content"]
type_str = entry["type"]
try:
query = (
Jargon.select()
.where((Jargon.chat_id == self.chat_id) & (Jargon.content == content))
)
if query.exists():
obj = query.get()
try:
obj.count = (obj.count or 0) + 1
except Exception:
obj.count = 1
# 更新raw_content和type如果为空或需要更新
if raw_content and not obj.raw_content:
obj.raw_content = raw_content
if type_str and not obj.type:
obj.type = type_str
obj.save()
updated += 1
else:
Jargon.create(
content=content,
raw_content=raw_content,
type=type_str,
chat_id=self.chat_id,
is_global=False,
count=1
)
saved += 1
except Exception as e:
logger.error(f"保存jargon失败: chat_id={self.chat_id}, content={content}, err={e}")
continue
if saved or updated:
logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated}chat_id={self.chat_id}")
self.last_learning_time = time.time()
except Exception as e:
logger.error(f"JargonMiner 运行失败: {e}")
class JargonMinerManager:
def __init__(self) -> None:
self._miners: dict[str, JargonMiner] = {}
def get_miner(self, chat_id: str) -> JargonMiner:
if chat_id not in self._miners:
self._miners[chat_id] = JargonMiner(chat_id)
return self._miners[chat_id]
miner_manager = JargonMinerManager()
async def extract_and_store_jargon(chat_id: str) -> None:
miner = miner_manager.get_miner(chat_id)
await miner.run_once()

View File

@@ -444,7 +444,7 @@ def _default_normal_response_parser(
choice0 = resp.choices[0]
reason = getattr(choice0, "finish_reason", None)
if reason and reason == "length":
print(resp)
# print(resp)
_model_name = resp.model
# 统一日志格式
logger.info(

View File

@@ -1,9 +1,8 @@
import time
import asyncio
from typing import List, Optional, Tuple
from typing import List, Optional
from src.common.logger import get_logger
from src.chat.utils.chat_message_builder import (
get_raw_msg_before_timestamp_with_chat,
get_raw_msg_by_timestamp_with_chat_inclusive,
build_readable_messages_with_id,
)
from src.llm_models.utils_model import LLMRequest
@@ -25,7 +24,21 @@ class CuriousDetector:
model_set=model_config.model_task_config.utils,
request_type="curious_detector",
)
# 触发控制
self.last_detection_time: float = time.time()
self.min_interval_seconds: float = 60.0
self.min_messages: int = 20
def should_trigger(self) -> bool:
if time.time() - self.last_detection_time < self.min_interval_seconds:
return False
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id,
timestamp_start=self.last_detection_time,
timestamp_end=time.time(),
)
return bool(recent_messages and len(recent_messages) >= self.min_messages)
async def detect_questions(self, recent_messages: List) -> Optional[str]:
"""
检测最近消息中是否有需要提问的内容
@@ -91,6 +104,9 @@ class CuriousDetector:
result_text, _ = await self.llm_request.generate_response_async(prompt, temperature=0.3)
logger.info(f"好奇心检测提示词: {prompt}")
logger.info(f"好奇心检测结果: {result_text}")
if not result_text:
return None
@@ -154,7 +170,20 @@ class CuriousDetector:
return False
async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
class CuriousManager:
def __init__(self) -> None:
self._detectors: dict[str, CuriousDetector] = {}
def get_detector(self, chat_id: str) -> CuriousDetector:
if chat_id not in self._detectors:
self._detectors[chat_id] = CuriousDetector(chat_id)
return self._detectors[chat_id]
curious_manager = CuriousManager()
async def check_and_make_question(chat_id: str) -> bool:
"""
检查聊天记录并生成问题(如果检测到需要提问的内容)
@@ -166,8 +195,20 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
bool: 是否检测到并记录了问题
"""
try:
detector = CuriousDetector(chat_id)
detector = curious_manager.get_detector(chat_id)
if not detector.should_trigger():
return False
# 拉取窗口内消息
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=chat_id,
timestamp_start=detector.last_detection_time,
timestamp_end=time.time(),
limit=80,
)
if not recent_messages:
return False
# 检测是否需要提问
question = await detector.detect_questions(recent_messages)
@@ -176,6 +217,7 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
success = await detector.make_question_from_detection(question)
if success:
logger.info(f"成功检测并记录问题: {question}")
detector.last_detection_time = time.time()
return True
return False