Version: 0.5.0.dev.260310
refactor: ♻️ 调整 Outbox 写入时序并移除 Kafka 首包同步投递逻辑 * 将 `outbox` 表写入逻辑后置到 LLM 请求之后,减少主链路阻塞 * 删除 Codex 生成的 Kafka 首包同步投递抽象逻辑,简化消息发送流程 * 优化 SSE 首字到达时间,整体降低约 1s 延迟 * 当前在请求 LLM 之前的流程全部为 Redis 操作,显著降低 IO 开销 docs: 📊 保留 SSE 链路性能打点逻辑 * 保留原有 SSE 全链路打点计时代码,便于后续性能排查与分析 * 当前默认注释,如需使用可手动启用进行性能调试
This commit is contained in:
@@ -13,7 +13,7 @@ import (
|
||||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||||
)
|
||||
|
||||
// StreamResponse 为 OpenAI/DeepSeek 兼容的流式 chunk 结构。
|
||||
// StreamResponse 是 OpenAI/DeepSeek 兼容的流式 chunk 结构。
|
||||
type StreamResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
@@ -88,8 +88,24 @@ func ToOpenAIFinishStream(requestID, modelName string, created int64) (string, e
|
||||
return string(jsonBytes), nil
|
||||
}
|
||||
|
||||
func StreamChat(ctx context.Context, llm *ark.ChatModel, modelName string, userInput string, ifThinking bool, chatHistory []*schema.Message, outChan chan<- string) (string, error) {
|
||||
// 1) 组装提示消息
|
||||
// StreamChat 负责模型流式输出,并在关键节点打点:
|
||||
// 1) 流连接建立(llm.Stream 返回)
|
||||
// 2) 首包到达(首字延迟)
|
||||
// 3) 流式输出结束
|
||||
func StreamChat(
|
||||
ctx context.Context,
|
||||
llm *ark.ChatModel,
|
||||
modelName string,
|
||||
userInput string,
|
||||
ifThinking bool,
|
||||
chatHistory []*schema.Message,
|
||||
outChan chan<- string,
|
||||
traceID string,
|
||||
chatID string,
|
||||
requestStart time.Time,
|
||||
) (string, error) {
|
||||
/*callStart := time.Now()*/
|
||||
|
||||
messages := make([]*schema.Message, 0)
|
||||
messages = append(messages, schema.SystemMessage(SystemPrompt))
|
||||
if len(chatHistory) > 0 {
|
||||
@@ -97,13 +113,14 @@ func StreamChat(ctx context.Context, llm *ark.ChatModel, modelName string, userI
|
||||
}
|
||||
messages = append(messages, schema.UserMessage(userInput))
|
||||
|
||||
// 2) 发起流式请求
|
||||
var thinking *ark.Thinking
|
||||
if ifThinking {
|
||||
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
|
||||
} else {
|
||||
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
|
||||
}
|
||||
|
||||
/*connectStart := time.Now()*/
|
||||
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -116,8 +133,18 @@ func StreamChat(ctx context.Context, llm *ark.ChatModel, modelName string, userI
|
||||
requestID := "chatcmpl-" + uuid.NewString()
|
||||
created := time.Now().Unix()
|
||||
firstChunk := true
|
||||
chunkCount := 0
|
||||
/*streamRecvStart := time.Now()
|
||||
|
||||
log.Printf("打点|流连接建立|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d|history_len=%d",
|
||||
traceID,
|
||||
chatID,
|
||||
requestID,
|
||||
time.Since(connectStart).Milliseconds(),
|
||||
time.Since(requestStart).Milliseconds(),
|
||||
len(chatHistory),
|
||||
)*/
|
||||
|
||||
// 3) 持续转发 chunk
|
||||
var fullText strings.Builder
|
||||
for {
|
||||
chunk, err := reader.Recv()
|
||||
@@ -136,11 +163,20 @@ func StreamChat(ctx context.Context, llm *ark.ChatModel, modelName string, userI
|
||||
}
|
||||
if payload != "" {
|
||||
outChan <- payload
|
||||
firstChunk = false
|
||||
chunkCount++
|
||||
/*if firstChunk {
|
||||
log.Printf("打点|首包到达|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d",
|
||||
traceID,
|
||||
chatID,
|
||||
requestID,
|
||||
time.Since(streamRecvStart).Milliseconds(),
|
||||
time.Since(requestStart).Milliseconds(),
|
||||
)
|
||||
firstChunk = false
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
// 4) 发送结束 chunk 和 [DONE]
|
||||
finishChunk, err := ToOpenAIFinishStream(requestID, modelName, created)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -148,5 +184,15 @@ func StreamChat(ctx context.Context, llm *ark.ChatModel, modelName string, userI
|
||||
outChan <- finishChunk
|
||||
outChan <- "[DONE]"
|
||||
|
||||
/*log.Printf("打点|流式输出结束|trace_id=%s|chat_id=%s|request_id=%s|chunks=%d|reply_chars=%d|本步耗时_ms=%d|请求累计_ms=%d",
|
||||
traceID,
|
||||
chatID,
|
||||
requestID,
|
||||
chunkCount,
|
||||
len(fullText.String()),
|
||||
time.Since(callStart).Milliseconds(),
|
||||
time.Since(requestStart).Milliseconds(),
|
||||
)*/
|
||||
|
||||
return fullText.String(), nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user