Files
smartmate/backend/agent/graph.go
Losita 912a6d8cfe Version: 0.5.0.dev.260310
refactor: ♻️ 调整 Outbox 写入时序并移除 Kafka 首包同步投递逻辑

* 将 `outbox` 表写入逻辑后置到 LLM 请求之后,减少主链路阻塞
* 删除 Codex 生成的 Kafka 首包同步投递抽象逻辑,简化消息发送流程
* 优化 SSE 首字到达时间,整体降低约 1s 延迟
* 当前在请求 LLM 之前的流程全部为 Redis 操作,显著降低 IO 开销

docs: 📊 保留 SSE 链路性能打点逻辑

* 保留原有 SSE 全链路打点计时代码,便于后续性能排查与分析
* 当前默认注释,如需使用可手动启用进行性能调试
2026-03-10 23:10:09 +08:00

199 lines
4.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package agent
import (
"context"
"encoding/json"
"io"
"strings"
"time"
"github.com/cloudwego/eino-ext/components/model/ark"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
// StreamResponse 是 OpenAI/DeepSeek 兼容的流式 chunk 结构。
type StreamResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []StreamChoice `json:"choices"`
}
type StreamChoice struct {
Index int `json:"index"`
Delta StreamDelta `json:"delta"`
FinishReason *string `json:"finish_reason"`
}
type StreamDelta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}
// ToOpenAIStream 将单个 Eino chunk 转为 OpenAI 兼容 JSON。
func ToOpenAIStream(chunk *schema.Message, requestID, modelName string, created int64, includeRole bool) (string, error) {
delta := StreamDelta{}
if includeRole {
delta.Role = "assistant"
}
if chunk != nil {
delta.Content = chunk.Content
delta.ReasoningContent = chunk.ReasoningContent
}
if delta.Role == "" && delta.Content == "" && delta.ReasoningContent == "" {
return "", nil
}
dto := StreamResponse{
ID: requestID,
Object: "chat.completion.chunk",
Created: created,
Model: modelName,
Choices: []StreamChoice{{
Index: 0,
Delta: delta,
FinishReason: nil,
}},
}
jsonBytes, err := json.Marshal(dto)
if err != nil {
return "", err
}
return string(jsonBytes), nil
}
// ToOpenAIFinishStream 生成结束 chunkfinish_reason=stop
func ToOpenAIFinishStream(requestID, modelName string, created int64) (string, error) {
stop := "stop"
dto := StreamResponse{
ID: requestID,
Object: "chat.completion.chunk",
Created: created,
Model: modelName,
Choices: []StreamChoice{{
Index: 0,
Delta: StreamDelta{},
FinishReason: &stop,
}},
}
jsonBytes, err := json.Marshal(dto)
if err != nil {
return "", err
}
return string(jsonBytes), nil
}
// StreamChat 负责模型流式输出,并在关键节点打点:
// 1) 流连接建立llm.Stream 返回)
// 2) 首包到达(首字延迟)
// 3) 流式输出结束
func StreamChat(
ctx context.Context,
llm *ark.ChatModel,
modelName string,
userInput string,
ifThinking bool,
chatHistory []*schema.Message,
outChan chan<- string,
traceID string,
chatID string,
requestStart time.Time,
) (string, error) {
/*callStart := time.Now()*/
messages := make([]*schema.Message, 0)
messages = append(messages, schema.SystemMessage(SystemPrompt))
if len(chatHistory) > 0 {
messages = append(messages, chatHistory...)
}
messages = append(messages, schema.UserMessage(userInput))
var thinking *ark.Thinking
if ifThinking {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeEnabled}
} else {
thinking = &arkModel.Thinking{Type: arkModel.ThinkingTypeDisabled}
}
/*connectStart := time.Now()*/
reader, err := llm.Stream(ctx, messages, ark.WithThinking(thinking))
if err != nil {
return "", err
}
defer reader.Close()
if strings.TrimSpace(modelName) == "" {
modelName = "smartflow-worker"
}
requestID := "chatcmpl-" + uuid.NewString()
created := time.Now().Unix()
firstChunk := true
chunkCount := 0
/*streamRecvStart := time.Now()
log.Printf("打点|流连接建立|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d|history_len=%d",
traceID,
chatID,
requestID,
time.Since(connectStart).Milliseconds(),
time.Since(requestStart).Milliseconds(),
len(chatHistory),
)*/
var fullText strings.Builder
for {
chunk, err := reader.Recv()
if err == io.EOF {
break
}
if err != nil {
return "", err
}
fullText.WriteString(chunk.Content)
payload, err := ToOpenAIStream(chunk, requestID, modelName, created, firstChunk)
if err != nil {
return "", err
}
if payload != "" {
outChan <- payload
chunkCount++
/*if firstChunk {
log.Printf("打点|首包到达|trace_id=%s|chat_id=%s|request_id=%s|本步耗时_ms=%d|请求累计_ms=%d",
traceID,
chatID,
requestID,
time.Since(streamRecvStart).Milliseconds(),
time.Since(requestStart).Milliseconds(),
)
firstChunk = false
}*/
}
}
finishChunk, err := ToOpenAIFinishStream(requestID, modelName, created)
if err != nil {
return "", err
}
outChan <- finishChunk
outChan <- "[DONE]"
/*log.Printf("打点|流式输出结束|trace_id=%s|chat_id=%s|request_id=%s|chunks=%d|reply_chars=%d|本步耗时_ms=%d|请求累计_ms=%d",
traceID,
chatID,
requestID,
chunkCount,
len(fullText.String()),
time.Since(callStart).Milliseconds(),
time.Since(requestStart).Milliseconds(),
)*/
return fullText.String(), nil
}