全部prompt独立
This commit is contained in:
@@ -11,7 +11,7 @@ from src.config.config import model_config, global_config
|
||||
from src.chat.utils.chat_message_builder import (
|
||||
build_anonymous_messages,
|
||||
)
|
||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
from src.bw_learner.learner_utils import (
|
||||
filter_message_content,
|
||||
@@ -32,60 +32,6 @@ from src.bw_learner.expression_auto_check_task import (
|
||||
logger = get_logger("expressor")
|
||||
|
||||
|
||||
def init_prompt() -> None:
|
||||
learn_style_prompt = """{chat_str}
|
||||
你的名字是{bot_name},现在请你完成两个提取任务
|
||||
任务1:请从上面这段群聊中用户的语言风格和说话方式
|
||||
1. 只考虑文字,不要考虑表情包和图片
|
||||
2. 不要总结SELF的发言,因为这是你自己的发言,不要重复学习你自己的发言
|
||||
3. 不要涉及具体的人名,也不要涉及具体名词
|
||||
4. 思考有没有特殊的梗,一并总结成语言风格
|
||||
5. 例子仅供参考,请严格根据群聊内容总结!!!
|
||||
注意:总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
||||
例如:当"AAAAA"时,可以"BBBBB", AAAAA代表某个场景,不超过20个字。BBBBB代表对应的语言风格,特定句式或表达方式,不超过20个字。
|
||||
表达方式在3-5个左右,不要超过10个
|
||||
|
||||
|
||||
任务2:请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
|
||||
- 必须为对话中真实出现过的短词或短语
|
||||
- 必须是你无法理解含义的词语,没有明确含义的词语,请不要选择有明确含义,或者含义清晰的词语
|
||||
- 排除:人名、@、表情包/图片中的内容、纯标点、常规功能词(如的、了、呢、啊等)
|
||||
- 每个词条长度建议 2-8 个字符(不强制),尽量短小
|
||||
- 请你提取出可能的黑话,最多30个黑话,请尽量提取所有
|
||||
|
||||
黑话必须为以下几种类型:
|
||||
- 由字母构成的,汉语拼音首字母的简写词,例如:nb、yyds、xswl
|
||||
- 英文词语的缩写,用英文字母概括一个词汇或含义,例如:CPU、GPU、API
|
||||
- 中文词语的缩写,用几个汉字概括一个词汇或含义,例如:社死、内卷
|
||||
|
||||
输出要求:
|
||||
将表达方式,语言风格和黑话以 JSON 数组输出,每个元素为一个对象,结构如下(注意字段名):
|
||||
注意请不要输出重复内容,请对表达方式和黑话进行去重。
|
||||
|
||||
[
|
||||
{{"situation": "AAAAA", "style": "BBBBB", "source_id": "3"}},
|
||||
{{"situation": "CCCC", "style": "DDDD", "source_id": "7"}}
|
||||
{{"situation": "对某件事表示十分惊叹", "style": "使用 我嘞个xxxx", "source_id": "[消息编号]"}},
|
||||
{{"situation": "表示讽刺的赞同,不讲道理", "style": "对对对", "source_id": "[消息编号]"}},
|
||||
{{"situation": "当涉及游戏相关时,夸赞,略带戏谑意味", "style": "使用 这么强!", "source_id": "[消息编号]"}},
|
||||
{{"content": "词条", "source_id": "12"}},
|
||||
{{"content": "词条2", "source_id": "5"}}
|
||||
]
|
||||
|
||||
其中:
|
||||
表达方式条目:
|
||||
- situation:表示“在什么情境下”的简短概括(不超过20个字)
|
||||
- style:表示对应的语言风格或常用表达(不超过20个字)
|
||||
- source_id:该表达方式对应的“来源行编号”,即上方聊天记录中方括号里的数字(例如 [3]),请只输出数字本身,不要包含方括号
|
||||
黑话jargon条目:
|
||||
- content:表示黑话的内容
|
||||
- source_id:该黑话对应的“来源行编号”,即上方聊天记录中方括号里的数字(例如 [3]),请只输出数字本身,不要包含方括号
|
||||
|
||||
现在请你输出 JSON:
|
||||
"""
|
||||
Prompt(learn_style_prompt, "learn_style_prompt")
|
||||
|
||||
|
||||
class ExpressionLearner:
|
||||
def __init__(self, chat_id: str) -> None:
|
||||
self.express_learn_model: LLMRequest = LLMRequest(
|
||||
@@ -105,7 +51,7 @@ class ExpressionLearner:
|
||||
async def learn_and_store(
|
||||
self,
|
||||
messages: List[Any],
|
||||
) -> List[Tuple[str, str, str]]:
|
||||
) -> Optional[List[Tuple[str, str, str]]]:
|
||||
"""
|
||||
学习并存储表达方式
|
||||
|
||||
@@ -122,11 +68,11 @@ class ExpressionLearner:
|
||||
# 学习用(开启行编号,便于溯源)
|
||||
random_msg_str: str = await build_anonymous_messages(random_msg, show_ids=True)
|
||||
|
||||
prompt: str = await global_prompt_manager.format_prompt(
|
||||
"learn_style_prompt",
|
||||
bot_name=global_config.bot.nickname,
|
||||
chat_str=random_msg_str,
|
||||
)
|
||||
prompt_template = prompt_manager.get_prompt("learn_style_prompt")
|
||||
prompt_template.add_context("bot_name", global_config.bot.nickname)
|
||||
prompt_template.add_context("chat_str", random_msg_str)
|
||||
|
||||
prompt = await prompt_manager.render_prompt(prompt_template)
|
||||
|
||||
# print(f"random_msg_str:{random_msg_str}")
|
||||
# logger.info(f"学习{type_str}的prompt: {prompt}")
|
||||
@@ -186,14 +132,14 @@ class ExpressionLearner:
|
||||
|
||||
# 展示学到的表达方式
|
||||
learnt_expressions_str = ""
|
||||
for (situation,style) in learnt_expressions:
|
||||
for situation, style in learnt_expressions:
|
||||
learnt_expressions_str += f"{situation}->{style}\n"
|
||||
logger.info(f"在 {self.chat_name} 学习到表达风格:\n{learnt_expressions_str}")
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
# 存储到数据库 Expression 表
|
||||
for (situation,style) in learnt_expressions:
|
||||
for situation, style in learnt_expressions:
|
||||
await self._upsert_expression_record(
|
||||
situation=situation,
|
||||
style=style,
|
||||
@@ -209,11 +155,11 @@ class ExpressionLearner:
|
||||
) -> List[Tuple[str, str, str]]:
|
||||
"""
|
||||
过滤表达方式,移除不符合条件的条目
|
||||
|
||||
|
||||
Args:
|
||||
expressions: 表达方式列表,每个元素是 (situation, style, source_id)
|
||||
messages: 原始消息列表,用于溯源和验证
|
||||
|
||||
|
||||
Returns:
|
||||
过滤后的表达方式列表,每个元素是 (situation, style, context)
|
||||
"""
|
||||
@@ -255,9 +201,7 @@ class ExpressionLearner:
|
||||
|
||||
# 过滤掉包含 SELF 的内容(不学习)
|
||||
if "SELF" in (situation or "") or "SELF" in (style or "") or "SELF" in context:
|
||||
logger.info(
|
||||
f"跳过包含 SELF 的表达方式: situation={situation}, style={style}, source_id={source_id}"
|
||||
)
|
||||
logger.info(f"跳过包含 SELF 的表达方式: situation={situation}, style={style}, source_id={source_id}")
|
||||
continue
|
||||
|
||||
# 过滤掉 style 与机器人名称/昵称重复的表达
|
||||
@@ -269,19 +213,20 @@ class ExpressionLearner:
|
||||
continue
|
||||
|
||||
# 过滤掉包含 "表情:" 或 "表情:" 的内容
|
||||
if "表情:" in (situation or "") or "表情:" in (situation or "") or \
|
||||
"表情:" in (style or "") or "表情:" in (style or "") or \
|
||||
"表情:" in context or "表情:" in context:
|
||||
logger.info(
|
||||
f"跳过包含表情标记的表达方式: situation={situation}, style={style}, source_id={source_id}"
|
||||
)
|
||||
if (
|
||||
"表情:" in (situation or "")
|
||||
or "表情:" in (situation or "")
|
||||
or "表情:" in (style or "")
|
||||
or "表情:" in (style or "")
|
||||
or "表情:" in context
|
||||
or "表情:" in context
|
||||
):
|
||||
logger.info(f"跳过包含表情标记的表达方式: situation={situation}, style={style}, source_id={source_id}")
|
||||
continue
|
||||
|
||||
# 过滤掉包含 "[图片" 的内容
|
||||
if "[图片" in (situation or "") or "[图片" in (style or "") or "[图片" in context:
|
||||
logger.info(
|
||||
f"跳过包含图片标记的表达方式: situation={situation}, style={style}, source_id={source_id}"
|
||||
)
|
||||
logger.info(f"跳过包含图片标记的表达方式: situation={situation}, style={style}, source_id={source_id}")
|
||||
continue
|
||||
|
||||
filtered_expressions.append((situation, style))
|
||||
@@ -347,7 +292,7 @@ class ExpressionLearner:
|
||||
"""
|
||||
更新现有 Expression 记录(situation 完全匹配或相似的情况)
|
||||
将新的 situation 添加到 content_list,不合并 style
|
||||
|
||||
|
||||
Args:
|
||||
use_llm_summary: 是否使用 LLM 进行总结,完全匹配时为 False,相似匹配时为 True
|
||||
"""
|
||||
@@ -383,26 +328,28 @@ class ExpressionLearner:
|
||||
return []
|
||||
return [str(item) for item in data if isinstance(item, str)] if isinstance(data, list) else []
|
||||
|
||||
async def _find_similar_situation_expression(self, situation: str, similarity_threshold: float = 0.75) -> Tuple[Optional[Expression], float]:
|
||||
async def _find_similar_situation_expression(
|
||||
self, situation: str, similarity_threshold: float = 0.75
|
||||
) -> Tuple[Optional[Expression], float]:
|
||||
"""
|
||||
查找具有相似 situation 的 Expression 记录
|
||||
检查 content_list 中的每一项
|
||||
|
||||
|
||||
Args:
|
||||
situation: 要查找的 situation
|
||||
similarity_threshold: 相似度阈值,默认 0.75
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[Expression], float]:
|
||||
Tuple[Optional[Expression], float]:
|
||||
- 找到的最相似的 Expression 对象,如果没有找到则返回 None
|
||||
- 相似度值(如果找到匹配,范围在 similarity_threshold 到 1.0 之间)
|
||||
"""
|
||||
# 查询同一 chat_id 的所有记录
|
||||
all_expressions = Expression.select().where(Expression.chat_id == self.chat_id)
|
||||
|
||||
|
||||
best_match = None
|
||||
best_similarity = 0.0
|
||||
|
||||
|
||||
for expr in all_expressions:
|
||||
# 检查 content_list 中的每一项
|
||||
content_list = self._parse_content_list(expr.content_list)
|
||||
@@ -411,10 +358,12 @@ class ExpressionLearner:
|
||||
if similarity >= similarity_threshold and similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = expr
|
||||
|
||||
|
||||
if best_match:
|
||||
logger.debug(f"找到相似的 situation: 相似度={best_similarity:.3f}, 现有='{best_match.situation}', 新='{situation}'")
|
||||
|
||||
logger.debug(
|
||||
f"找到相似的 situation: 相似度={best_similarity:.3f}, 现有='{best_match.situation}', 新='{situation}'"
|
||||
)
|
||||
|
||||
return best_match, best_similarity
|
||||
|
||||
async def _compose_situation_text(self, content_list: List[str], fallback: str = "") -> str:
|
||||
@@ -442,8 +391,7 @@ class ExpressionLearner:
|
||||
if self.check_model is None:
|
||||
try:
|
||||
self.check_model = LLMRequest(
|
||||
model_set=model_config.model_task_config.tool_use,
|
||||
request_type="expression.check"
|
||||
model_set=model_config.model_task_config.tool_use, request_type="expression.check"
|
||||
)
|
||||
logger.debug("检查用 LLM 实例初始化成功")
|
||||
except Exception as e:
|
||||
@@ -452,7 +400,7 @@ class ExpressionLearner:
|
||||
async def _check_expression_immediately(self, expr_obj: Expression) -> None:
|
||||
"""
|
||||
立即检查表达方式(在 count 增加后调用)
|
||||
|
||||
|
||||
Args:
|
||||
expr_obj: 要检查的表达方式对象
|
||||
"""
|
||||
@@ -469,10 +417,7 @@ class ExpressionLearner:
|
||||
return
|
||||
|
||||
# 执行 LLM 评估
|
||||
suitable, reason, error = await single_expression_check(
|
||||
expr_obj.situation,
|
||||
expr_obj.style
|
||||
)
|
||||
suitable, reason, error = await single_expression_check(expr_obj.situation, expr_obj.style)
|
||||
|
||||
# 更新数据库
|
||||
expr_obj.checked = True
|
||||
@@ -497,48 +442,45 @@ class ExpressionLearner:
|
||||
def _check_cached_jargons_in_messages(self, messages: List[Any]) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
检查缓存中的 jargon 是否出现在 messages 中
|
||||
|
||||
|
||||
Args:
|
||||
messages: 消息列表
|
||||
|
||||
|
||||
Returns:
|
||||
List[Tuple[str, str]]: 匹配到的黑话条目列表,每个元素是 (content, source_id)
|
||||
"""
|
||||
if not messages:
|
||||
return []
|
||||
|
||||
|
||||
# 获取 jargon_miner 实例
|
||||
jargon_miner = miner_manager.get_miner(self.chat_id)
|
||||
|
||||
|
||||
# 获取缓存中的所有 jargon
|
||||
cached_jargons = jargon_miner.get_cached_jargons()
|
||||
if not cached_jargons:
|
||||
return []
|
||||
|
||||
|
||||
matched_entries: List[Tuple[str, str]] = []
|
||||
|
||||
|
||||
# 遍历 messages,检查缓存中的 jargon 是否出现
|
||||
for i, msg in enumerate(messages):
|
||||
# 跳过机器人自己的消息
|
||||
if is_bot_message(msg):
|
||||
continue
|
||||
|
||||
|
||||
# 获取消息文本
|
||||
msg_text = (
|
||||
getattr(msg, "processed_plain_text", None) or
|
||||
""
|
||||
).strip()
|
||||
|
||||
msg_text = (getattr(msg, "processed_plain_text", None) or "").strip()
|
||||
|
||||
if not msg_text:
|
||||
continue
|
||||
|
||||
|
||||
# 检查每个缓存中的 jargon 是否出现在消息文本中
|
||||
for jargon in cached_jargons:
|
||||
if not jargon or not jargon.strip():
|
||||
continue
|
||||
|
||||
|
||||
jargon_content = jargon.strip()
|
||||
|
||||
|
||||
# 使用正则匹配,考虑单词边界(类似 jargon_explainer 中的逻辑)
|
||||
pattern = re.escape(jargon_content)
|
||||
# 对于中文,使用更宽松的匹配;对于英文/数字,使用单词边界
|
||||
@@ -548,12 +490,12 @@ class ExpressionLearner:
|
||||
else:
|
||||
# 纯英文/数字,使用单词边界
|
||||
search_pattern = r"\b" + pattern + r"\b"
|
||||
|
||||
|
||||
if re.search(search_pattern, msg_text, re.IGNORECASE):
|
||||
# 找到匹配,构建条目(source_id 从 1 开始,因为 build_anonymous_messages 的编号从 1 开始)
|
||||
source_id = str(i + 1)
|
||||
matched_entries.append((jargon_content, source_id))
|
||||
|
||||
|
||||
return matched_entries
|
||||
|
||||
async def _process_jargon_entries(self, jargon_entries: List[Tuple[str, str]], messages: List[Any]) -> None:
|
||||
@@ -621,9 +563,6 @@ class ExpressionLearner:
|
||||
await jargon_miner.process_extracted_entries(entries)
|
||||
|
||||
|
||||
init_prompt()
|
||||
|
||||
|
||||
class ExpressionLearnerManager:
|
||||
def __init__(self):
|
||||
self.expression_learners = {}
|
||||
|
||||
@@ -8,7 +8,7 @@ from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import global_config, model_config
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Expression
|
||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.bw_learner.learner_utils import weighted_sample
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
from src.chat.utils.common_utils import TempMethodsExpression
|
||||
@@ -16,33 +16,6 @@ from src.chat.utils.common_utils import TempMethodsExpression
|
||||
logger = get_logger("expression_selector")
|
||||
|
||||
|
||||
def init_prompt():
|
||||
expression_evaluation_prompt = """{chat_observe_info}
|
||||
|
||||
你的名字是{bot_name}{target_message}
|
||||
{reply_reason_block}
|
||||
|
||||
以下是可选的表达情境:
|
||||
{all_situations}
|
||||
|
||||
请你分析聊天内容的语境、情绪、话题类型,从上述情境中选择最适合当前聊天情境的,最多{max_num}个情境。
|
||||
考虑因素包括:
|
||||
1.聊天的情绪氛围(轻松、严肃、幽默等)
|
||||
2.话题类型(日常、技术、游戏、情感等)
|
||||
3.情境与当前语境的匹配度
|
||||
{target_message_extra_block}
|
||||
|
||||
请以JSON格式输出,只需要输出选中的情境编号:
|
||||
例如:
|
||||
{{
|
||||
"selected_situations": [2, 3, 5, 7, 19]
|
||||
}}
|
||||
|
||||
请严格按照JSON格式输出,不要包含其他内容:
|
||||
"""
|
||||
Prompt(expression_evaluation_prompt, "expression_evaluation_prompt")
|
||||
|
||||
|
||||
class ExpressionSelector:
|
||||
def __init__(self):
|
||||
self.llm_model = LLMRequest(
|
||||
@@ -125,7 +98,9 @@ class ExpressionSelector:
|
||||
|
||||
# 查询所有相关chat_id的表达方式,排除 rejected=1 的,且只选择 count > 1 的
|
||||
# 如果 expression_checked_only 为 True,则只选择 checked=True 且 rejected=False 的
|
||||
base_conditions = (Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected) & (Expression.count > 1)
|
||||
base_conditions = (
|
||||
(Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected) & (Expression.count > 1)
|
||||
)
|
||||
if global_config.expression.expression_checked_only:
|
||||
base_conditions = base_conditions & (Expression.checked)
|
||||
style_query = Expression.select().where(base_conditions)
|
||||
@@ -149,9 +124,7 @@ class ExpressionSelector:
|
||||
if len(style_exprs) < min_required:
|
||||
# 高 count 样本不足:如果还有候选,就降级为随机选 3 个;如果一个都没有,则直接返回空
|
||||
if not style_exprs:
|
||||
logger.info(
|
||||
f"聊天流 {chat_id} 没有满足 count > 1 且未被拒绝的表达方式,简单模式不进行选择"
|
||||
)
|
||||
logger.info(f"聊天流 {chat_id} 没有满足 count > 1 且未被拒绝的表达方式,简单模式不进行选择")
|
||||
# 完全没有高 count 样本时,退化为全量随机抽样(不进入LLM流程)
|
||||
fallback_num = min(3, max_num) if max_num > 0 else 3
|
||||
fallback_selected = self._random_expressions(chat_id, fallback_num)
|
||||
@@ -405,15 +378,15 @@ class ExpressionSelector:
|
||||
reply_reason_block = ""
|
||||
|
||||
# 3. 构建prompt(只包含情境,不包含完整的表达方式)
|
||||
prompt = (await global_prompt_manager.get_prompt_async("expression_evaluation_prompt")).format(
|
||||
bot_name=global_config.bot.nickname,
|
||||
chat_observe_info=chat_context,
|
||||
all_situations=all_situations_str,
|
||||
max_num=max_num,
|
||||
target_message=target_message_str,
|
||||
target_message_extra_block=target_message_extra_block,
|
||||
reply_reason_block=reply_reason_block,
|
||||
)
|
||||
prompt_template = prompt_manager.get_prompt("expression_evaluation_prompt")
|
||||
prompt_template.add_context("bot_name", global_config.bot.nickname)
|
||||
prompt_template.add_context("chat_observe_info", chat_context)
|
||||
prompt_template.add_context("all_situations", all_situations_str)
|
||||
prompt_template.add_context("max_num", str(max_num))
|
||||
prompt_template.add_context("target_message", target_message_str)
|
||||
prompt_template.add_context("target_message_extra_block", target_message_extra_block)
|
||||
prompt_template.add_context("reply_reason_block", reply_reason_block)
|
||||
prompt = await prompt_manager.render_prompt(prompt_template)
|
||||
|
||||
# 4. 调用LLM
|
||||
content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt)
|
||||
@@ -482,9 +455,6 @@ class ExpressionSelector:
|
||||
expr_obj.save()
|
||||
logger.debug("表达方式激活: 更新last_active_time in db")
|
||||
|
||||
|
||||
init_prompt()
|
||||
|
||||
try:
|
||||
expression_selector = ExpressionSelector()
|
||||
except Exception as e:
|
||||
|
||||
@@ -200,11 +200,6 @@ class JargonExplainer:
|
||||
explanations_text = "\n".join(jargon_explanations)
|
||||
|
||||
# 使用LLM概括黑话解释
|
||||
# summarize_prompt = await global_prompt_manager.format_prompt(
|
||||
# "jargon_explainer_summarize_prompt",
|
||||
# chat_context=chat_context,
|
||||
# jargon_explanations=explanations_text,
|
||||
# )
|
||||
prompt_of_summarize = prompt_manager.get_prompt("jargon_explainer_summarize_prompt")
|
||||
prompt_of_summarize.add_context("chat_context", lambda _: chat_context)
|
||||
prompt_of_summarize.add_context("jargon_explanations", lambda _: explanations_text)
|
||||
|
||||
@@ -11,7 +11,7 @@ from src.common.database.database_model import Jargon
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.config.config import model_config, global_config
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.bw_learner.learner_utils import (
|
||||
parse_chat_id_list,
|
||||
chat_id_list_contains,
|
||||
@@ -45,100 +45,6 @@ def _is_single_char_jargon(content: str) -> bool:
|
||||
)
|
||||
|
||||
|
||||
# def _init_prompt() -> None:
|
||||
# prompt_str = """
|
||||
# **聊天内容,其中的{bot_name}的发言内容是你自己的发言,[msg_id] 是消息ID**
|
||||
# {chat_str}
|
||||
|
||||
# 请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
|
||||
# - 必须为对话中真实出现过的短词或短语
|
||||
# - 必须是你无法理解含义的词语,没有明确含义的词语,请不要选择有明确含义,或者含义清晰的词语
|
||||
# - 排除:人名、@、表情包/图片中的内容、纯标点、常规功能词(如的、了、呢、啊等)
|
||||
# - 每个词条长度建议 2-8 个字符(不强制),尽量短小
|
||||
|
||||
# 黑话必须为以下几种类型:
|
||||
# - 由字母构成的,汉语拼音首字母的简写词,例如:nb、yyds、xswl
|
||||
# - 英文词语的缩写,用英文字母概括一个词汇或含义,例如:CPU、GPU、API
|
||||
# - 中文词语的缩写,用几个汉字概括一个词汇或含义,例如:社死、内卷
|
||||
|
||||
# 以 JSON 数组输出,元素为对象(严格按以下结构):
|
||||
# 请你提取出可能的黑话,最多30个黑话,请尽量提取所有
|
||||
# [
|
||||
# {{"content": "词条", "msg_id": "m12"}}, // msg_id 必须与上方聊天中展示的ID完全一致
|
||||
# {{"content": "词条2", "msg_id": "m15"}}
|
||||
# ]
|
||||
|
||||
# 现在请输出:
|
||||
# """
|
||||
# Prompt(prompt_str, "extract_jargon_prompt")
|
||||
|
||||
|
||||
def _init_inference_prompts() -> None:
|
||||
"""初始化含义推断相关的prompt"""
|
||||
# Prompt 1: 基于raw_content和content推断
|
||||
prompt1_str = """
|
||||
**词条内容**
|
||||
{content}
|
||||
**词条出现的上下文。其中的{bot_name}的发言内容是你自己的发言**
|
||||
{raw_content_list}
|
||||
{previous_meaning_section}
|
||||
|
||||
请根据上下文,推断"{content}"这个词条的含义。
|
||||
- 如果这是一个黑话、俚语或网络用语,请推断其含义
|
||||
- 如果含义明确(常规词汇),也请说明
|
||||
- {bot_name} 的发言内容可能包含错误,请不要参考其发言内容
|
||||
- 如果上下文信息不足,无法推断含义,请设置 no_info 为 true
|
||||
{previous_meaning_instruction}
|
||||
|
||||
以 JSON 格式输出:
|
||||
{{
|
||||
"meaning": "详细含义说明(包含使用场景、来源、具体解释等)",
|
||||
"no_info": false
|
||||
}}
|
||||
注意:如果信息不足无法推断,请设置 "no_info": true,此时 meaning 可以为空字符串
|
||||
"""
|
||||
Prompt(prompt1_str, "jargon_inference_with_context_prompt")
|
||||
|
||||
# Prompt 2: 仅基于content推断
|
||||
prompt2_str = """
|
||||
**词条内容**
|
||||
{content}
|
||||
|
||||
请仅根据这个词条本身,推断其含义。
|
||||
- 如果这是一个黑话、俚语或网络用语,请推断其含义
|
||||
- 如果含义明确(常规词汇),也请说明
|
||||
|
||||
以 JSON 格式输出:
|
||||
{{
|
||||
"meaning": "详细含义说明(包含使用场景、来源、具体解释等)"
|
||||
}}
|
||||
"""
|
||||
Prompt(prompt2_str, "jargon_inference_content_only_prompt")
|
||||
|
||||
# Prompt 3: 比较两个推断结果
|
||||
prompt3_str = """
|
||||
**推断结果1(基于上下文)**
|
||||
{inference1}
|
||||
|
||||
**推断结果2(仅基于词条)**
|
||||
{inference2}
|
||||
|
||||
请比较这两个推断结果,判断它们是否相同或类似。
|
||||
- 如果两个推断结果的"含义"相同或类似,说明这个词条不是黑话(含义明确)
|
||||
- 如果两个推断结果有差异,说明这个词条可能是黑话(需要上下文才能理解)
|
||||
|
||||
以 JSON 格式输出:
|
||||
{{
|
||||
"is_similar": true/false,
|
||||
"reason": "判断理由"
|
||||
}}
|
||||
"""
|
||||
Prompt(prompt3_str, "jargon_compare_inference_prompt")
|
||||
|
||||
|
||||
_init_inference_prompts()
|
||||
|
||||
|
||||
def _should_infer_meaning(jargon_obj: Jargon) -> bool:
|
||||
"""
|
||||
判断是否需要进行含义推断
|
||||
@@ -282,22 +188,18 @@ class JargonMiner:
|
||||
previous_meaning_section = ""
|
||||
previous_meaning_instruction = ""
|
||||
if current_count in [24, 60, 100] and previous_meaning:
|
||||
previous_meaning_section = f"""
|
||||
**上一次推断的含义(仅供参考)**
|
||||
{previous_meaning}
|
||||
"""
|
||||
previous_meaning_section = f"\n**上一次推断的含义(仅供参考)**\n{previous_meaning}"
|
||||
previous_meaning_instruction = (
|
||||
"- 请参考上一次推断的含义,结合新的上下文信息,给出更准确或更新的推断结果"
|
||||
)
|
||||
|
||||
prompt1 = await global_prompt_manager.format_prompt(
|
||||
"jargon_inference_with_context_prompt",
|
||||
content=content,
|
||||
bot_name=global_config.bot.nickname,
|
||||
raw_content_list=raw_content_text,
|
||||
previous_meaning_section=previous_meaning_section,
|
||||
previous_meaning_instruction=previous_meaning_instruction,
|
||||
)
|
||||
prompt1_template = prompt_manager.get_prompt("jargon_inference_with_context_prompt")
|
||||
prompt1_template.add_context("bot_name", global_config.bot.nickname)
|
||||
prompt1_template.add_context("content", str(content))
|
||||
prompt1_template.add_context("raw_content_list", raw_content_text)
|
||||
prompt1_template.add_context("previous_meaning_section", previous_meaning_section)
|
||||
prompt1_template.add_context("previous_meaning_instruction", previous_meaning_instruction)
|
||||
prompt1 = await prompt_manager.render_prompt(prompt1_template)
|
||||
|
||||
response1, _ = await self.llm_inference.generate_response_async(prompt1, temperature=0.3)
|
||||
if not response1:
|
||||
@@ -331,10 +233,9 @@ class JargonMiner:
|
||||
return
|
||||
|
||||
# 步骤2: 仅基于content推断
|
||||
prompt2 = await global_prompt_manager.format_prompt(
|
||||
"jargon_inference_content_only_prompt",
|
||||
content=content,
|
||||
)
|
||||
prompt2_template = prompt_manager.get_prompt("jargon_inference_content_only_prompt")
|
||||
prompt2_template.add_context("content", str(content))
|
||||
prompt2 = await prompt_manager.render_prompt(prompt2_template)
|
||||
|
||||
response2, _ = await self.llm_inference.generate_response_async(prompt2, temperature=0.3)
|
||||
if not response2:
|
||||
@@ -374,11 +275,10 @@ class JargonMiner:
|
||||
logger.debug(f"jargon {content} 推断1结果: {response1}")
|
||||
|
||||
# 步骤3: 比较两个推断结果
|
||||
prompt3 = await global_prompt_manager.format_prompt(
|
||||
"jargon_compare_inference_prompt",
|
||||
inference1=json.dumps(inference1, ensure_ascii=False),
|
||||
inference2=json.dumps(inference2, ensure_ascii=False),
|
||||
)
|
||||
prompt3_template = prompt_manager.get_prompt("jargon_compare_inference_prompt")
|
||||
prompt3_template.add_context("inference1", json.dumps(inference1, ensure_ascii=False))
|
||||
prompt3_template.add_context("inference2", json.dumps(inference2, ensure_ascii=False))
|
||||
prompt3 = await prompt_manager.render_prompt(prompt3_template)
|
||||
|
||||
if global_config.debug.show_jargon_prompt:
|
||||
logger.info(f"jargon {content} 比较提示词: {prompt3}")
|
||||
@@ -449,9 +349,7 @@ class JargonMiner:
|
||||
traceback.print_exc()
|
||||
|
||||
async def process_extracted_entries(
|
||||
self,
|
||||
entries: List[Dict[str, List[str]]],
|
||||
person_name_filter: Optional[Callable[[str], bool]] = None
|
||||
self, entries: List[Dict[str, List[str]]], person_name_filter: Optional[Callable[[str], bool]] = None
|
||||
) -> None:
|
||||
"""
|
||||
处理已提取的黑话条目(从 expression_learner 路由过来的)
|
||||
@@ -468,14 +366,14 @@ class JargonMiner:
|
||||
merged_entries: OrderedDict[str, Dict[str, List[str]]] = OrderedDict()
|
||||
for entry in entries:
|
||||
content_key = entry["content"]
|
||||
|
||||
|
||||
# 检查是否包含人物名称
|
||||
# logger.info(f"process_extracted_entries 检查是否包含人物名称: {content_key}")
|
||||
# logger.info(f"person_name_filter: {person_name_filter}")
|
||||
if person_name_filter and person_name_filter(content_key):
|
||||
logger.info(f"process_extracted_entries 跳过包含人物名称的黑话: {content_key}")
|
||||
continue
|
||||
|
||||
|
||||
raw_list = entry.get("raw_content", []) or []
|
||||
if content_key in merged_entries:
|
||||
merged_entries[content_key]["raw_content"].extend(raw_list)
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Optional, Dict, TYPE_CHECKING
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import Expression
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
from src.config.config import model_config
|
||||
from src.chat.message_receive.chat_stream import ChatStream
|
||||
from src.chat.utils.chat_message_builder import (
|
||||
@@ -30,37 +30,6 @@ class ReflectTracker:
|
||||
# LLM for judging response
|
||||
self.judge_model = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="reflect.tracker")
|
||||
|
||||
self._init_prompts()
|
||||
|
||||
def _init_prompts(self):
|
||||
judge_prompt = """
|
||||
你是一个表达反思助手。Bot之前询问了表达方式是否合适。
|
||||
你需要根据提供的上下文对话,判断是否对该表达方式做出了肯定或否定的评价。
|
||||
|
||||
**询问内容**
|
||||
情景: {situation}
|
||||
风格: {style}
|
||||
|
||||
**上下文对话**
|
||||
{context_block}
|
||||
|
||||
**判断要求**
|
||||
1. 判断对话中是否包含对上述询问的回答。
|
||||
2. 如果是,判断是肯定(Approve)还是否定(Reject),或者是提供了修改意见。
|
||||
3. 如果不是回答,或者是无关内容,请返回 "Ignore"。
|
||||
4. 如果是否定并提供了修改意见,请提取修正后的情景和风格。
|
||||
|
||||
请输出JSON格式:
|
||||
```json
|
||||
{{
|
||||
"judgment": "Approve" | "Reject" | "Ignore",
|
||||
"corrected_situation": "...", // 如果有修改意见,提取修正后的情景,否则留空
|
||||
"corrected_style": "..." // 如果有修改意见,提取修正后的风格,否则留空
|
||||
}}
|
||||
```
|
||||
"""
|
||||
Prompt(judge_prompt, "reflect_judge_prompt")
|
||||
|
||||
async def trigger_tracker(self) -> bool:
|
||||
"""
|
||||
触发追踪检查
|
||||
@@ -103,12 +72,11 @@ class ReflectTracker:
|
||||
|
||||
# LLM Judge
|
||||
try:
|
||||
prompt = await global_prompt_manager.format_prompt(
|
||||
"reflect_judge_prompt",
|
||||
situation=self.expression.situation,
|
||||
style=self.expression.style,
|
||||
context_block=context_block,
|
||||
)
|
||||
prompt_template = prompt_manager.get_prompt("reflect_judge_prompt")
|
||||
prompt_template.add_context("situation", str(self.expression.situation))
|
||||
prompt_template.add_context("style", str(self.expression.style))
|
||||
prompt_template.add_context("context_block", context_block)
|
||||
prompt = await prompt_manager.render_prompt(prompt_template)
|
||||
|
||||
logger.info(f"ReflectTracker LLM Prompt: {prompt}")
|
||||
|
||||
@@ -134,14 +102,14 @@ class ReflectTracker:
|
||||
if judgment == "Approve":
|
||||
self.expression.checked = True
|
||||
self.expression.rejected = False
|
||||
self.expression.modified_by = 'ai' # 通过LLM判断也标记为ai
|
||||
self.expression.modified_by = "ai" # 通过LLM判断也标记为ai
|
||||
self.expression.save()
|
||||
logger.info(f"Expression {self.expression.id} approved by operator.")
|
||||
return True
|
||||
|
||||
elif judgment == "Reject":
|
||||
self.expression.checked = True
|
||||
self.expression.modified_by = 'ai' # 通过LLM判断也标记为ai
|
||||
self.expression.modified_by = "ai" # 通过LLM判断也标记为ai
|
||||
corrected_situation = json_obj.get("corrected_situation")
|
||||
corrected_style = json_obj.get("corrected_style")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user