Files
mai-bot/src/mais4u/mais4u_chat/s4u_stream_generator.py

207 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from typing import AsyncGenerator
from src.llm_models.utils_model import LLMRequest, RequestType
from src.llm_models.payload_content.message import MessageBuilder
from src.config.config import model_config
from src.chat.message_receive.message import MessageRecvS4U
from src.mais4u.mais4u_chat.s4u_prompt import prompt_builder
from src.common.logger import get_logger
import re
logger = get_logger("s4u_stream_generator")
class S4UStreamGenerator:
def __init__(self):
# 使用LLMRequest替代AsyncOpenAIClient
self.llm_request = LLMRequest(
model_set=model_config.model_task_config.replyer,
request_type="s4u_replyer"
)
self.current_model_name = "unknown model"
self.partial_response = ""
# 正则表达式用于按句子切分,同时处理各种标点和边缘情况
# 匹配常见的句子结束符,但会忽略引号内和数字中的标点
self.sentence_split_pattern = re.compile(
r'([^\s\w"\'([{]*["\'([{].*?["\'}\])][^\s\w"\'([{]*|' # 匹配被引号/括号包裹的内容
r'[^.。!?\n\r]+(?:[.。!?\n\r](?![\'"])|$))', # 匹配直到句子结束符
re.UNICODE | re.DOTALL,
)
self.chat_stream = None
async def build_last_internal_message(self, message: MessageRecvS4U, previous_reply_context: str = ""):
# person_id = PersonInfoManager.get_person_id(
# message.chat_stream.user_info.platform, message.chat_stream.user_info.user_id
# )
# person_info_manager = get_person_info_manager()
# person_name = await person_info_manager.get_value(person_id, "person_name")
# if message.chat_stream.user_info.user_nickname:
# if person_name:
# sender_name = f"[{message.chat_stream.user_info.user_nickname}]你叫ta{person_name}"
# else:
# sender_name = f"[{message.chat_stream.user_info.user_nickname}]"
# else:
# sender_name = f"用户({message.chat_stream.user_info.user_id})"
# 构建prompt
if previous_reply_context:
message_txt = f"""
你正在回复用户的消息,但中途被打断了。这是已有的对话上下文:
[你已经对上一条消息说的话]: {previous_reply_context}
---
[这是用户发来的新消息, 你需要结合上下文,对此进行回复]:
{message.processed_plain_text}
"""
return True, message_txt
else:
message_txt = message.processed_plain_text
return False, message_txt
async def generate_response(
self, message: MessageRecvS4U, previous_reply_context: str = ""
) -> AsyncGenerator[str, None]:
"""根据当前模型类型选择对应的生成函数"""
# 从global_config中获取模型概率值并选择模型
self.partial_response = ""
message_txt = message.processed_plain_text
if not message.is_internal:
interupted, message_txt_added = await self.build_last_internal_message(message, previous_reply_context)
if interupted:
message_txt = message_txt_added
message.chat_stream = self.chat_stream
prompt = await prompt_builder.build_prompt_normal(
message=message,
message_txt=message_txt,
)
logger.info(
f"{self.current_model_name}思考:{message_txt[:30] + '...' if len(message_txt) > 30 else message_txt}"
) # noqa: E501
# 使用LLMRequest进行流式生成
async for chunk in self._generate_response_with_llm_request(prompt):
yield chunk
async def _generate_response_with_llm_request(self, prompt: str) -> AsyncGenerator[str, None]:
"""使用LLMRequest进行流式响应生成"""
# 构建消息
message_builder = MessageBuilder()
message_builder.add_text_content(prompt)
messages = [message_builder.build()]
# 选择模型
model_info, api_provider, client = self.llm_request._select_model()
self.current_model_name = model_info.name
# 如果模型支持强制流式模式,使用真正的流式处理
if model_info.force_stream_mode:
# 简化流式处理直接使用LLMRequest的流式功能
try:
# 直接调用LLMRequest的流式处理
response = await self.llm_request._execute_request(
api_provider=api_provider,
client=client,
request_type=RequestType.RESPONSE,
model_info=model_info,
message_list=messages,
)
# 处理响应内容
content = response.content or ""
if content:
# 将内容按句子分割并输出
async for chunk in self._process_content_streaming(content):
yield chunk
except Exception as e:
logger.error(f"流式请求执行失败: {e}")
# 如果流式请求失败,回退到普通模式
response = await self.llm_request._execute_request(
api_provider=api_provider,
client=client,
request_type=RequestType.RESPONSE,
model_info=model_info,
message_list=messages,
)
content = response.content or ""
async for chunk in self._process_content_streaming(content):
yield chunk
else:
# 如果不支持流式,使用普通方式然后模拟流式输出
response = await self.llm_request._execute_request(
api_provider=api_provider,
client=client,
request_type=RequestType.RESPONSE,
model_info=model_info,
message_list=messages,
)
content = response.content or ""
async for chunk in self._process_content_streaming(content):
yield chunk
async def _process_buffer_streaming(self, buffer: str) -> AsyncGenerator[str, None]:
"""实时处理缓冲区内容,输出完整句子"""
# 使用正则表达式匹配完整句子
for match in self.sentence_split_pattern.finditer(buffer):
sentence = match.group(0).strip()
if sentence and match.end(0) <= len(buffer):
# 检查句子是否完整(以标点符号结尾)
if sentence.endswith(("", "", "", ".", "!", "?")):
if sentence not in [",", "", ".", "", "!", "", "?", ""]:
self.partial_response += sentence
yield sentence
async def _process_content_streaming(self, content: str) -> AsyncGenerator[str, None]:
"""处理内容进行流式输出(用于非流式模型的模拟流式输出)"""
buffer = content
punctuation_buffer = ""
# 使用正则表达式匹配句子
last_match_end = 0
for match in self.sentence_split_pattern.finditer(buffer):
sentence = match.group(0).strip()
if sentence:
# 检查是否只是一个标点符号
if sentence in [",", "", ".", "", "!", "", "?", ""]:
punctuation_buffer += sentence
else:
# 发送之前累积的标点和当前句子
to_yield = punctuation_buffer + sentence
if to_yield.endswith((",", "")):
to_yield = to_yield.rstrip(",")
self.partial_response += to_yield
yield to_yield
punctuation_buffer = "" # 清空标点符号缓冲区
last_match_end = match.end(0)
# 发送缓冲区中剩余的任何内容
remaining = buffer[last_match_end:].strip()
to_yield = (punctuation_buffer + remaining).strip()
if to_yield:
if to_yield.endswith(("", ",")):
to_yield = to_yield.rstrip(",")
if to_yield:
self.partial_response += to_yield
yield to_yield
async def _generate_response_with_model(
self,
prompt: str,
client,
model_name: str,
**kwargs,
) -> AsyncGenerator[str, None]:
"""保留原有方法签名以保持兼容性,但重定向到新的实现"""
async for chunk in self._generate_response_with_llm_request(prompt):
yield chunk