refactor: ♻️ 调整 Outbox 写入时序并移除 Kafka 首包同步投递逻辑 * 将 `outbox` 表写入逻辑后置到 LLM 请求之后,减少主链路阻塞 * 删除 Codex 生成的 Kafka 首包同步投递抽象逻辑,简化消息发送流程 * 优化 SSE 首字到达时间,整体降低约 1s 延迟 * 当前在请求 LLM 之前的流程全部为 Redis 操作,显著降低 IO 开销 docs: 📊 保留 SSE 链路性能打点逻辑 * 保留原有 SSE 全链路打点计时代码,便于后续性能排查与分析 * 当前默认注释,如需使用可手动启用进行性能调试
199 lines
4.9 KiB
Go
199 lines
4.9 KiB
Go
package agent
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"io"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||
"github.com/cloudwego/eino/schema"
|
||
"github.com/google/uuid"
|
||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||
)
|
||
|
||
// StreamResponse 是 OpenAI/DeepSeek 兼容的流式 chunk 结构。
|
||
type StreamResponse struct {
|
||
ID string `json:"id"`
|
||
Object string `json:"object"`
|
||
Created int64 `json:"created"`
|
||
Model string `json:"model"`
|
||
Choices []StreamChoice `json:"choices"`
|
||
}
|
||
|
||
type StreamChoice struct {
|
||
Index int `json:"index"`
|
||
Delta StreamDelta `json:"delta"`
|
||
FinishReason *string `json:"finish_reason"`
|
||
}
|
||
|
||
type StreamDelta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}
|
||
|
||
// ToOpenAIStream 将单个 Eino chunk 转为 OpenAI 兼容 JSON。
|
||
func ToOpenAIStream(chunk *schema.Message, requestID, modelName string, created int64, includeRole bool) (string, error) {
|
||
delta := StreamDelta{}
|
||
if includeRole {
|
||
delta.Role = "assistant"
|
||
}
|
||
if chunk != nil {
|
||
delta.Content = chunk.Content
|
||
delta.ReasoningContent = chunk.ReasoningContent
|
||
}
|
||
|
||
if delta.Role == "" && delta.Content == "" && delta.ReasoningContent == "" {
|
||
return "", nil
|
||
}
|
||
|
||
dto := StreamResponse{
|
||
ID: requestID,
|
||
Object: "chat.completion.chunk",
|
||
Created: created,
|
||
Model: modelName,
|
||
Choices: []StreamChoice{{
|
||
Index: 0,
|
||
Delta: delta,
|
||
FinishReason: nil,
|
||
}},
|
||
}
|
||
jsonBytes, err := json.Marshal(dto)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return string(jsonBytes), nil
|
||
}
|
||
|
||
// ToOpenAIFinishStream 生成结束 chunk(finish_reason=stop)。
|
||
func ToOpenAIFinishStream(requestID, modelName string, created int64) (string, error) {
|
||
stop := "stop"
|
||
dto := StreamResponse{
|
||
ID: requestID,
|
||
Object: "chat.completion.chunk",
|
||
Created: created,
|
||
Model: modelName,
|
||
Choices: []StreamChoice{{
|
||
Index: 0,
|
||
Delta: StreamDelta{},
|
||
FinishReason: &stop,
|
||
}},
|
||
}
|
||
jsonBytes, err := json.Marshal(dto)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return string(jsonBytes), nil
|
||
}
|
||
|
||
// StreamChat 负责模型流式输出,并在关键节点打点:
|
||
// 1) 流连接建立(llm.Stream 返回)
|
||
// 2) 首包到达(首字延迟)
|
||
// 3) 流式输出结束
|
||
func StreamChat(
|
||
ctx context.Context,
|
||
llm *ark.ChatModel,
|
||
modelName string,
|
||
userInput string,
|
||
ifThinking bool,
|
||
chatHistory []*schema.Message,
|
||
outChan chan<- string,
|
||
traceID string,
|
||
chatID string,
|
||
requestStart time.Time,
|
||
) (string, error) {
|
||
/*callStart := time.Now()*/
|
||
|
||
messages := make([]*schema.Message, 0)
|
||
messages = append(messages, schema.SystemMessage(SystemPrompt))
|
||
if len(chatHistory) > 0 {
|
||
messages = append(messages, chatHistory...)
|
||
}
|
||
messages = append(messages, schema.UserMessage(userInput))
|
||
|
||
var thinking *ark.Thinking
|
||
if ifThinking {
|
||
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
|
||
} else {
|
||
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
|
||
}
|
||
|
||
/*connectStart := time.Now()*/
|
||
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer reader.Close()
|
||
|
||
if strings.TrimSpace(modelName) == "" {
|
||
modelName = "smartflow-worker"
|
||
}
|
||
requestID := "chatcmpl-" + uuid.NewString()
|
||
created := time.Now().Unix()
|
||
firstChunk := true
|
||
chunkCount := 0
|
||
/*streamRecvStart := time.Now()
|
||
|
||
log.Printf("打点|流连接建立|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d|history_len=%d",
|
||
traceID,
|
||
chatID,
|
||
requestID,
|
||
time.Since(connectStart).Milliseconds(),
|
||
time.Since(requestStart).Milliseconds(),
|
||
len(chatHistory),
|
||
)*/
|
||
|
||
var fullText strings.Builder
|
||
for {
|
||
chunk, err := reader.Recv()
|
||
if err == io.EOF {
|
||
break
|
||
}
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
fullText.WriteString(chunk.Content)
|
||
|
||
payload, err := ToOpenAIStream(chunk, requestID, modelName, created, firstChunk)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if payload != "" {
|
||
outChan <- payload
|
||
chunkCount++
|
||
/*if firstChunk {
|
||
log.Printf("打点|首包到达|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d",
|
||
traceID,
|
||
chatID,
|
||
requestID,
|
||
time.Since(streamRecvStart).Milliseconds(),
|
||
time.Since(requestStart).Milliseconds(),
|
||
)
|
||
firstChunk = false
|
||
}*/
|
||
}
|
||
}
|
||
|
||
finishChunk, err := ToOpenAIFinishStream(requestID, modelName, created)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
outChan <- finishChunk
|
||
outChan <- "[DONE]"
|
||
|
||
/*log.Printf("打点|流式输出结束|trace_id=%s|chat_id=%s|request_id=%s|chunks=%d|reply_chars=%d|本步耗时_ms=%d|请求累计_ms=%d",
|
||
traceID,
|
||
chatID,
|
||
requestID,
|
||
chunkCount,
|
||
len(fullText.String()),
|
||
time.Since(callStart).Milliseconds(),
|
||
time.Since(requestStart).Milliseconds(),
|
||
)*/
|
||
|
||
return fullText.String(), nil
|
||
}
|