Version: 0.9.65.dev.260503
后端: 1. 阶段 1.5/1.6 收口 llm-service / rag-service,统一模型出口与检索基础设施入口,清退 backend/infra/llm 与 backend/infra/rag 旧实现; 2. 同步更新相关调用链与微服务迁移计划文档
This commit is contained in:
73
backend/services/llm/ark.go
Normal file
73
backend/services/llm/ark.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
einoModel "github.com/cloudwego/eino/components/model"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||||
)
|
||||
|
||||
// ArkCallOptions 是直接调用 ark.ChatModel 时使用的通用入参。
|
||||
type ArkCallOptions struct {
|
||||
Temperature float64
|
||||
MaxTokens int
|
||||
Thinking ThinkingMode
|
||||
}
|
||||
|
||||
// CallArkText 调用 ark 模型并返回纯文本。
|
||||
func CallArkText(ctx context.Context, chatModel *ark.ChatModel, systemPrompt, userPrompt string, options ArkCallOptions) (string, error) {
|
||||
if chatModel == nil {
|
||||
return "", errors.New("ark model is nil")
|
||||
}
|
||||
|
||||
messages := []*schema.Message{
|
||||
schema.SystemMessage(systemPrompt),
|
||||
schema.UserMessage(userPrompt),
|
||||
}
|
||||
resp, err := chatModel.Generate(ctx, messages, buildArkOptions(options)...)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if resp == nil {
|
||||
return "", errors.New("模型返回为空")
|
||||
}
|
||||
|
||||
text := strings.TrimSpace(resp.Content)
|
||||
if text == "" {
|
||||
return "", errors.New("模型返回内容为空")
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
// CallArkJSON 调用 ark 模型并直接解析 JSON。
|
||||
func CallArkJSON[T any](ctx context.Context, chatModel *ark.ChatModel, systemPrompt, userPrompt string, options ArkCallOptions) (*T, string, error) {
|
||||
raw, err := CallArkText(ctx, chatModel, systemPrompt, userPrompt, options)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
parsed, err := ParseJSONObject[T](raw)
|
||||
if err != nil {
|
||||
return nil, raw, err
|
||||
}
|
||||
return parsed, raw, nil
|
||||
}
|
||||
|
||||
func buildArkOptions(options ArkCallOptions) []einoModel.Option {
|
||||
thinkingType := arkModel.ThinkingTypeDisabled
|
||||
if options.Thinking == ThinkingModeEnabled {
|
||||
thinkingType = arkModel.ThinkingTypeEnabled
|
||||
}
|
||||
|
||||
opts := []einoModel.Option{
|
||||
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
|
||||
einoModel.WithTemperature(float32(options.Temperature)),
|
||||
}
|
||||
if options.MaxTokens > 0 {
|
||||
opts = append(opts, einoModel.WithMaxTokens(options.MaxTokens))
|
||||
}
|
||||
return opts
|
||||
}
|
||||
111
backend/services/llm/ark_adapter.go
Normal file
111
backend/services/llm/ark_adapter.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
einoModel "github.com/cloudwego/eino/components/model"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
||||
)
|
||||
|
||||
// WrapArkClient 将 ark.ChatModel 适配为统一 Client。
|
||||
// 1. generateText 走 Generate,供 GenerateJSON/GenerateText 使用。
|
||||
// 2. streamText 走 Stream,供需要流式输出的场景使用。
|
||||
// 3. 两条路径共用同一套参数转换逻辑。
|
||||
func WrapArkClient(arkChatModel *ark.ChatModel) *Client {
|
||||
if arkChatModel == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
generateFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
|
||||
arkOpts := buildArkStreamOptions(options)
|
||||
msg, err := arkChatModel.Generate(ctx, messages, arkOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if msg == nil {
|
||||
return nil, errors.New("ark model returned nil message")
|
||||
}
|
||||
|
||||
var usage *schema.TokenUsage
|
||||
finishReason := ""
|
||||
if msg.ResponseMeta != nil {
|
||||
usage = CloneUsage(msg.ResponseMeta.Usage)
|
||||
finishReason = msg.ResponseMeta.FinishReason
|
||||
}
|
||||
|
||||
return &TextResult{
|
||||
Text: msg.Content,
|
||||
Usage: usage,
|
||||
FinishReason: finishReason,
|
||||
}, nil
|
||||
}
|
||||
|
||||
streamFunc := func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
|
||||
arkOpts := buildArkStreamOptions(options)
|
||||
reader, err := arkChatModel.Stream(ctx, messages, arkOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &arkStreamReaderAdapter{reader: reader}, nil
|
||||
}
|
||||
|
||||
return NewClient(generateFunc, streamFunc)
|
||||
}
|
||||
|
||||
// buildArkStreamOptions 将统一的 GenerateOptions 转换为 ark 的流式调用参数。
|
||||
func buildArkStreamOptions(options GenerateOptions) []einoModel.Option {
|
||||
thinkingEnabled := options.Thinking == ThinkingModeEnabled
|
||||
|
||||
thinkingType := arkModel.ThinkingTypeDisabled
|
||||
if thinkingEnabled {
|
||||
thinkingType = arkModel.ThinkingTypeEnabled
|
||||
}
|
||||
opts := []einoModel.Option{
|
||||
ark.WithThinking(&arkModel.Thinking{Type: thinkingType}),
|
||||
}
|
||||
|
||||
if thinkingEnabled {
|
||||
opts = append(opts, einoModel.WithTemperature(1.0))
|
||||
} else if options.Temperature > 0 {
|
||||
opts = append(opts, einoModel.WithTemperature(float32(options.Temperature)))
|
||||
}
|
||||
|
||||
maxTokens := options.MaxTokens
|
||||
if thinkingEnabled {
|
||||
const minThinkingBudget = 16000
|
||||
if maxTokens < minThinkingBudget {
|
||||
maxTokens = minThinkingBudget
|
||||
}
|
||||
}
|
||||
if maxTokens > 0 {
|
||||
opts = append(opts, einoModel.WithMaxTokens(maxTokens))
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
// arkStreamReaderAdapter 把 ark 的流式 reader 转成统一的 StreamReader 接口。
|
||||
type arkStreamReaderAdapter struct {
|
||||
reader *schema.StreamReader[*schema.Message]
|
||||
}
|
||||
|
||||
// Recv 转发到底层 reader。
|
||||
func (r *arkStreamReaderAdapter) Recv() (*schema.Message, error) {
|
||||
if r == nil || r.reader == nil {
|
||||
return nil, io.EOF
|
||||
}
|
||||
return r.reader.Recv()
|
||||
}
|
||||
|
||||
// Close 适配 ark reader 的 Close 行为。
|
||||
func (r *arkStreamReaderAdapter) Close() error {
|
||||
if r == nil || r.reader == nil {
|
||||
return nil
|
||||
}
|
||||
r.reader.Close()
|
||||
return nil
|
||||
}
|
||||
330
backend/services/llm/ark_responses_client.go
Normal file
330
backend/services/llm/ark_responses_client.go
Normal file
@@ -0,0 +1,330 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
|
||||
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model/responses"
|
||||
)
|
||||
|
||||
// ArkResponsesMessage 描述一次 Responses 输入消息。
|
||||
type ArkResponsesMessage struct {
|
||||
Role string
|
||||
Text string
|
||||
ImageURL string
|
||||
ImageDetail string
|
||||
}
|
||||
|
||||
// ArkResponsesOptions 描述 Responses 调用参数。
|
||||
type ArkResponsesOptions struct {
|
||||
Model string
|
||||
Temperature float64
|
||||
MaxOutputTokens int
|
||||
Thinking ThinkingMode
|
||||
TextFormat string
|
||||
}
|
||||
|
||||
// ArkResponsesUsage 统一转写 token usage。
|
||||
type ArkResponsesUsage struct {
|
||||
InputTokens int64
|
||||
OutputTokens int64
|
||||
TotalTokens int64
|
||||
}
|
||||
|
||||
// ArkResponsesResult 是 Responses 调用的统一输出结构。
|
||||
type ArkResponsesResult struct {
|
||||
Text string
|
||||
Status string
|
||||
IncompleteReason string
|
||||
ErrorCode string
|
||||
ErrorMessage string
|
||||
Usage *ArkResponsesUsage
|
||||
}
|
||||
|
||||
// ArkResponsesClient 是 Ark SDK Responses 的统一模型出口。
|
||||
type ArkResponsesClient struct {
|
||||
model string
|
||||
client *arkruntime.Client
|
||||
}
|
||||
|
||||
// NewArkResponsesClient 创建 Ark SDK Responses 客户端。
|
||||
// 1. model 为空时直接返回 nil,表示这条能力没有启用。
|
||||
// 2. baseURL 为空时使用 SDK 默认地址。
|
||||
// 3. 这里只负责本地构造,不做连通性探测。
|
||||
func NewArkResponsesClient(apiKey string, baseURL string, model string) *ArkResponsesClient {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
options := make([]arkruntime.ConfigOption, 0, 1)
|
||||
if strings.TrimSpace(baseURL) != "" {
|
||||
options = append(options, arkruntime.WithBaseUrl(strings.TrimSpace(baseURL)))
|
||||
}
|
||||
|
||||
return &ArkResponsesClient{
|
||||
model: model,
|
||||
client: arkruntime.NewClientWithApiKey(strings.TrimSpace(apiKey), options...),
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateText 执行一次非流式 Responses 调用并提取文本。
|
||||
func (c *ArkResponsesClient) GenerateText(ctx context.Context, messages []ArkResponsesMessage, options ArkResponsesOptions) (*ArkResponsesResult, error) {
|
||||
req, err := c.buildRequest(messages, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := c.client.CreateResponses(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result := buildArkResponsesResult(resp)
|
||||
if result.Status == "failed" {
|
||||
if result.ErrorMessage != "" {
|
||||
return result, fmt.Errorf("ark responses failed: %s", result.ErrorMessage)
|
||||
}
|
||||
return result, errors.New("ark responses failed")
|
||||
}
|
||||
|
||||
if strings.TrimSpace(result.Text) == "" {
|
||||
return result, FormatEmptyResponseError("ark_responses")
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// GenerateArkResponsesJSON 先调用 Responses,再解析成 JSON 结构体。
|
||||
func GenerateArkResponsesJSON[T any](ctx context.Context, client *ArkResponsesClient, messages []ArkResponsesMessage, options ArkResponsesOptions) (*T, *ArkResponsesResult, error) {
|
||||
if client == nil {
|
||||
return nil, nil, errors.New("ark responses client is not ready")
|
||||
}
|
||||
|
||||
result, err := client.GenerateText(ctx, messages, options)
|
||||
if err != nil {
|
||||
return nil, result, err
|
||||
}
|
||||
|
||||
parsed, err := ParseJSONObject[T](result.Text)
|
||||
if err != nil {
|
||||
return nil, result, err
|
||||
}
|
||||
return parsed, result, nil
|
||||
}
|
||||
|
||||
func (c *ArkResponsesClient) buildRequest(messages []ArkResponsesMessage, options ArkResponsesOptions) (*responses.ResponsesRequest, error) {
|
||||
if c == nil || c.client == nil {
|
||||
return nil, errors.New("ark responses client is not ready")
|
||||
}
|
||||
if len(messages) == 0 {
|
||||
return nil, errors.New("ark responses messages is empty")
|
||||
}
|
||||
|
||||
modelName := strings.TrimSpace(options.Model)
|
||||
if modelName == "" {
|
||||
modelName = c.model
|
||||
}
|
||||
if modelName == "" {
|
||||
return nil, errors.New("ark responses model is empty")
|
||||
}
|
||||
|
||||
inputItems := make([]*responses.InputItem, 0, len(messages))
|
||||
for idx := range messages {
|
||||
item, err := buildInputItem(messages[idx])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build ark responses message[%d] failed: %w", idx, err)
|
||||
}
|
||||
inputItems = append(inputItems, item)
|
||||
}
|
||||
|
||||
request := &responses.ResponsesRequest{
|
||||
Model: modelName,
|
||||
Input: &responses.ResponsesInput{
|
||||
Union: &responses.ResponsesInput_ListValue{
|
||||
ListValue: &responses.InputItemList{ListValue: inputItems},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if options.Temperature > 0 {
|
||||
request.Temperature = float64Ptr(options.Temperature)
|
||||
}
|
||||
if options.MaxOutputTokens > 0 {
|
||||
request.MaxOutputTokens = int64Ptr(int64(options.MaxOutputTokens))
|
||||
}
|
||||
|
||||
switch options.Thinking {
|
||||
case ThinkingModeEnabled:
|
||||
thinkingType := responses.ThinkingType_enabled
|
||||
request.Thinking = &responses.ResponsesThinking{Type: &thinkingType}
|
||||
case ThinkingModeDisabled:
|
||||
thinkingType := responses.ThinkingType_disabled
|
||||
request.Thinking = &responses.ResponsesThinking{Type: &thinkingType}
|
||||
}
|
||||
|
||||
if textType, ok := parseTextType(options.TextFormat); ok {
|
||||
request.Text = &responses.ResponsesText{
|
||||
Format: &responses.TextFormat{
|
||||
Type: textType,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return request, nil
|
||||
}
|
||||
|
||||
func buildInputItem(message ArkResponsesMessage) (*responses.InputItem, error) {
|
||||
role, ok := parseMessageRole(message.Role)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unsupported message role: %s", strings.TrimSpace(message.Role))
|
||||
}
|
||||
|
||||
content := make([]*responses.ContentItem, 0, 2)
|
||||
if text := strings.TrimSpace(message.Text); text != "" {
|
||||
content = append(content, &responses.ContentItem{
|
||||
Union: &responses.ContentItem_Text{
|
||||
Text: &responses.ContentItemText{
|
||||
Type: responses.ContentItemType_input_text,
|
||||
Text: text,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if imageURL := strings.TrimSpace(message.ImageURL); imageURL != "" {
|
||||
image := &responses.ContentItemImage{
|
||||
Type: responses.ContentItemType_input_image,
|
||||
ImageUrl: stringPtr(imageURL),
|
||||
}
|
||||
if detail, ok := parseImageDetail(message.ImageDetail); ok {
|
||||
image.Detail = &detail
|
||||
}
|
||||
|
||||
content = append(content, &responses.ContentItem{
|
||||
Union: &responses.ContentItem_Image{
|
||||
Image: image,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if len(content) == 0 {
|
||||
return nil, errors.New("message content is empty")
|
||||
}
|
||||
|
||||
return &responses.InputItem{
|
||||
Union: &responses.InputItem_InputMessage{
|
||||
InputMessage: &responses.ItemInputMessage{
|
||||
Role: role,
|
||||
Content: content,
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func buildArkResponsesResult(resp *responses.ResponseObject) *ArkResponsesResult {
|
||||
if resp == nil {
|
||||
return &ArkResponsesResult{}
|
||||
}
|
||||
|
||||
result := &ArkResponsesResult{
|
||||
Text: extractArkResponsesText(resp),
|
||||
Status: strings.TrimSpace(resp.GetStatus().String()),
|
||||
}
|
||||
|
||||
if details := resp.GetIncompleteDetails(); details != nil {
|
||||
result.IncompleteReason = strings.TrimSpace(details.GetReason())
|
||||
}
|
||||
|
||||
if responseErr := resp.GetError(); responseErr != nil {
|
||||
result.ErrorCode = strings.TrimSpace(responseErr.GetCode())
|
||||
result.ErrorMessage = strings.TrimSpace(responseErr.GetMessage())
|
||||
}
|
||||
|
||||
if usage := resp.GetUsage(); usage != nil {
|
||||
result.Usage = &ArkResponsesUsage{
|
||||
InputTokens: usage.GetInputTokens(),
|
||||
OutputTokens: usage.GetOutputTokens(),
|
||||
TotalTokens: usage.GetTotalTokens(),
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func extractArkResponsesText(resp *responses.ResponseObject) string {
|
||||
if resp == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
textParts := make([]string, 0, 2)
|
||||
for _, outputItem := range resp.GetOutput() {
|
||||
outputMessage := outputItem.GetOutputMessage()
|
||||
if outputMessage == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, contentItem := range outputMessage.GetContent() {
|
||||
text := strings.TrimSpace(contentItem.GetText().GetText())
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
textParts = append(textParts, text)
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(strings.Join(textParts, "\n"))
|
||||
}
|
||||
|
||||
func parseMessageRole(raw string) (responses.MessageRole_Enum, bool) {
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "user":
|
||||
return responses.MessageRole_user, true
|
||||
case "system":
|
||||
return responses.MessageRole_system, true
|
||||
case "developer":
|
||||
return responses.MessageRole_developer, true
|
||||
case "assistant":
|
||||
return responses.MessageRole_assistant, true
|
||||
default:
|
||||
return responses.MessageRole_unspecified, false
|
||||
}
|
||||
}
|
||||
|
||||
func parseImageDetail(raw string) (responses.ContentItemImageDetail_Enum, bool) {
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "high":
|
||||
return responses.ContentItemImageDetail_high, true
|
||||
case "low":
|
||||
return responses.ContentItemImageDetail_low, true
|
||||
case "auto":
|
||||
return responses.ContentItemImageDetail_auto, true
|
||||
default:
|
||||
return responses.ContentItemImageDetail_auto, false
|
||||
}
|
||||
}
|
||||
|
||||
func parseTextType(raw string) (responses.TextType_Enum, bool) {
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "":
|
||||
return responses.TextType_unspecified, false
|
||||
case "text":
|
||||
return responses.TextType_text, true
|
||||
case "json_object":
|
||||
return responses.TextType_json_object, true
|
||||
default:
|
||||
return responses.TextType_unspecified, false
|
||||
}
|
||||
}
|
||||
|
||||
func stringPtr(value string) *string {
|
||||
return &value
|
||||
}
|
||||
|
||||
func float64Ptr(value float64) *float64 {
|
||||
return &value
|
||||
}
|
||||
|
||||
func int64Ptr(value int64) *int64 {
|
||||
return &value
|
||||
}
|
||||
174
backend/services/llm/client.go
Normal file
174
backend/services/llm/client.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
// ThinkingMode 描述这次模型调用对 thinking 的期望。
|
||||
type ThinkingMode string
|
||||
|
||||
const (
|
||||
ThinkingModeDefault ThinkingMode = "default"
|
||||
ThinkingModeEnabled ThinkingMode = "enabled"
|
||||
ThinkingModeDisabled ThinkingMode = "disabled"
|
||||
)
|
||||
|
||||
// GenerateOptions 统一收敛文本调用时最常见的公共参数。
|
||||
type GenerateOptions struct {
|
||||
Temperature float64
|
||||
MaxTokens int
|
||||
Thinking ThinkingMode
|
||||
Metadata map[string]any
|
||||
}
|
||||
|
||||
// TextResult 保存一次文本生成的最终结果和 usage。
|
||||
// 1. Text 存放模型返回的纯文本。
|
||||
// 2. Usage 方便上层做统一统计。
|
||||
// 3. 这里不负责 JSON 解析,也不负责业务字段映射。
|
||||
type TextResult struct {
|
||||
Text string
|
||||
Usage *schema.TokenUsage
|
||||
FinishReason string
|
||||
}
|
||||
|
||||
// StreamReader 抽象可以逐块读取消息的流式返回器。
|
||||
type StreamReader interface {
|
||||
Recv() (*schema.Message, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
// TextGenerateFunc 定义统一文本生成函数签名。
|
||||
type TextGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error)
|
||||
|
||||
// StreamGenerateFunc 定义统一流式生成函数签名。
|
||||
type StreamGenerateFunc func(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error)
|
||||
|
||||
// Client 是统一模型客户端门面。
|
||||
// 1. 只做最小输入校验和空响应防御。
|
||||
// 2. 不负责 prompt 拼装,也不负责业务 fallback。
|
||||
// 3. 具体 provider 的细节由上层适配器收敛进来。
|
||||
type Client struct {
|
||||
generateText TextGenerateFunc
|
||||
streamText StreamGenerateFunc
|
||||
}
|
||||
|
||||
// NewClient 创建统一模型客户端。
|
||||
func NewClient(generateText TextGenerateFunc, streamText StreamGenerateFunc) *Client {
|
||||
return &Client{
|
||||
generateText: generateText,
|
||||
streamText: streamText,
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateText 执行一次统一文本生成。
|
||||
func (c *Client) GenerateText(ctx context.Context, messages []*schema.Message, options GenerateOptions) (*TextResult, error) {
|
||||
if c == nil || c.generateText == nil {
|
||||
return nil, errors.New("llm client is not ready")
|
||||
}
|
||||
if len(messages) == 0 {
|
||||
return nil, errors.New("llm messages is empty")
|
||||
}
|
||||
|
||||
result, err := c.generateText(ctx, messages, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result == nil {
|
||||
return nil, errors.New("llm result is nil")
|
||||
}
|
||||
if strings.TrimSpace(result.Text) == "" {
|
||||
return nil, errors.New("llm returned empty text")
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// GenerateJSON 先走统一文本生成,再走统一 JSON 解析。
|
||||
func GenerateJSON[T any](ctx context.Context, client *Client, messages []*schema.Message, options GenerateOptions) (*T, *TextResult, error) {
|
||||
result, err := client.GenerateText(ctx, messages, options)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
parsed, err := ParseJSONObject[T](result.Text)
|
||||
if err != nil {
|
||||
return nil, result, err
|
||||
}
|
||||
return parsed, result, nil
|
||||
}
|
||||
|
||||
// Stream 打开统一流式调用入口。
|
||||
func (c *Client) Stream(ctx context.Context, messages []*schema.Message, options GenerateOptions) (StreamReader, error) {
|
||||
if c == nil || c.streamText == nil {
|
||||
return nil, errors.New("llm stream client is not ready")
|
||||
}
|
||||
if len(messages) == 0 {
|
||||
return nil, errors.New("llm messages is empty")
|
||||
}
|
||||
return c.streamText(ctx, messages, options)
|
||||
}
|
||||
|
||||
// BuildSystemUserMessages 构造最常见的 system + history + user 消息列表。
|
||||
func BuildSystemUserMessages(systemPrompt string, history []*schema.Message, userPrompt string) []*schema.Message {
|
||||
messages := make([]*schema.Message, 0, len(history)+2)
|
||||
if strings.TrimSpace(systemPrompt) != "" {
|
||||
messages = append(messages, schema.SystemMessage(systemPrompt))
|
||||
}
|
||||
if len(history) > 0 {
|
||||
messages = append(messages, history...)
|
||||
}
|
||||
if strings.TrimSpace(userPrompt) != "" {
|
||||
messages = append(messages, schema.UserMessage(userPrompt))
|
||||
}
|
||||
return messages
|
||||
}
|
||||
|
||||
// CloneUsage 深拷贝 token usage,避免后续累加时共享同一个指针。
|
||||
func CloneUsage(usage *schema.TokenUsage) *schema.TokenUsage {
|
||||
if usage == nil {
|
||||
return nil
|
||||
}
|
||||
copied := *usage
|
||||
return &copied
|
||||
}
|
||||
|
||||
// MergeUsage 合并两段 usage,取各字段更大的值作为累计结果。
|
||||
func MergeUsage(base *schema.TokenUsage, incoming *schema.TokenUsage) *schema.TokenUsage {
|
||||
if incoming == nil {
|
||||
return CloneUsage(base)
|
||||
}
|
||||
if base == nil {
|
||||
return CloneUsage(incoming)
|
||||
}
|
||||
|
||||
merged := *base
|
||||
if incoming.PromptTokens > merged.PromptTokens {
|
||||
merged.PromptTokens = incoming.PromptTokens
|
||||
}
|
||||
if incoming.CompletionTokens > merged.CompletionTokens {
|
||||
merged.CompletionTokens = incoming.CompletionTokens
|
||||
}
|
||||
if incoming.TotalTokens > merged.TotalTokens {
|
||||
merged.TotalTokens = incoming.TotalTokens
|
||||
}
|
||||
if incoming.PromptTokenDetails.CachedTokens > merged.PromptTokenDetails.CachedTokens {
|
||||
merged.PromptTokenDetails.CachedTokens = incoming.PromptTokenDetails.CachedTokens
|
||||
}
|
||||
if incoming.CompletionTokensDetails.ReasoningTokens > merged.CompletionTokensDetails.ReasoningTokens {
|
||||
merged.CompletionTokensDetails.ReasoningTokens = incoming.CompletionTokensDetails.ReasoningTokens
|
||||
}
|
||||
return &merged
|
||||
}
|
||||
|
||||
// FormatEmptyResponseError 统一模型空结果的错误文案。
|
||||
func FormatEmptyResponseError(scene string) error {
|
||||
scene = strings.TrimSpace(scene)
|
||||
if scene == "" {
|
||||
scene = "unknown"
|
||||
}
|
||||
return fmt.Errorf("模型在 %s 场景返回空结果", scene)
|
||||
}
|
||||
102
backend/services/llm/json.go
Normal file
102
backend/services/llm/json.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseJSONObject 解析模型返回内容中的 JSON 对象。
|
||||
// 1. 先剥离常见的 markdown 代码块包装。
|
||||
// 2. 再从混合文本里提取最外层 JSON 对象。
|
||||
// 3. 这里只负责结构解析,不负责字段合法性校验。
|
||||
func ParseJSONObject[T any](raw string) (*T, error) {
|
||||
clean := strings.TrimSpace(raw)
|
||||
if clean == "" {
|
||||
return nil, errors.New("模型返回为空,无法解析 JSON")
|
||||
}
|
||||
|
||||
objectText := ExtractJSONObject(clean)
|
||||
if objectText == "" {
|
||||
return nil, fmt.Errorf("模型返回中未找到 JSON 对象: %s", truncateForError(clean))
|
||||
}
|
||||
|
||||
var out T
|
||||
if err := json.Unmarshal([]byte(objectText), &out); err != nil {
|
||||
return nil, fmt.Errorf("JSON 解析失败: %w", err)
|
||||
}
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
// ExtractJSONObject 从混合文本中提取第一个完整的 JSON 对象。
|
||||
func ExtractJSONObject(text string) string {
|
||||
clean := trimMarkdownCodeFence(strings.TrimSpace(text))
|
||||
if clean == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
start := strings.Index(clean, "{")
|
||||
if start < 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
depth := 0
|
||||
inString := false
|
||||
escaped := false
|
||||
for idx := start; idx < len(clean); idx++ {
|
||||
ch := clean[idx]
|
||||
|
||||
if escaped {
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if ch == '\\' && inString {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if ch == '"' {
|
||||
inString = !inString
|
||||
continue
|
||||
}
|
||||
if inString {
|
||||
continue
|
||||
}
|
||||
|
||||
switch ch {
|
||||
case '{':
|
||||
depth++
|
||||
case '}':
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return clean[start : idx+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func trimMarkdownCodeFence(text string) string {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if !strings.HasPrefix(trimmed, "```") {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
lines := strings.Split(trimmed, "\n")
|
||||
if len(lines) == 0 {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
body := lines[1:]
|
||||
if len(body) > 0 && strings.TrimSpace(body[len(body)-1]) == "```" {
|
||||
body = body[:len(body)-1]
|
||||
}
|
||||
return strings.TrimSpace(strings.Join(body, "\n"))
|
||||
}
|
||||
|
||||
func truncateForError(text string) string {
|
||||
if len(text) <= 160 {
|
||||
return text
|
||||
}
|
||||
return text[:160] + "..."
|
||||
}
|
||||
109
backend/services/llm/service.go
Normal file
109
backend/services/llm/service.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/LoveLosita/smartflow/backend/inits"
|
||||
)
|
||||
|
||||
// Service 只负责统一暴露已经构造好的模型客户端,不负责 prompt 和业务编排。
|
||||
type Service struct {
|
||||
liteClient *Client
|
||||
proClient *Client
|
||||
maxClient *Client
|
||||
courseImageResponsesClient *ArkResponsesClient
|
||||
}
|
||||
|
||||
// Options 描述 llm-service 初始化时需要接管的启动期依赖。
|
||||
// 1. AIHub 仍然是当前进程内 Ark ChatModel 的来源,但服务层只保存统一 Client。
|
||||
// 2. CourseImageResponsesClient 允许外部预先注入,便于测试或特殊启动路径复用。
|
||||
// 3. 某个字段为空时不报错,直接保留 nil,交给上层继续走兼容降级。
|
||||
type Options struct {
|
||||
AIHub *inits.AIHub
|
||||
APIKey string
|
||||
BaseURL string
|
||||
CourseVisionModel string
|
||||
CourseImageResponsesClient *ArkResponsesClient
|
||||
}
|
||||
|
||||
// AgentModelClients 一次性暴露 newAgent 图常用的模型分配结果。
|
||||
type AgentModelClients struct {
|
||||
Chat *Client
|
||||
Plan *Client
|
||||
Execute *Client
|
||||
Deliver *Client
|
||||
Summary *Client
|
||||
}
|
||||
|
||||
// New 构造 llm-service。
|
||||
// 1. 不返回 error,是为了让上层继续按 nil 客户端做逐步降级。
|
||||
// 2. 只要 AIHub 已初始化,就把其中的 ChatModel 收敛成统一 Client。
|
||||
// 3. 课程图片解析客户端在这里统一构建,避免业务层直接依赖 Responses SDK。
|
||||
func New(opts Options) *Service {
|
||||
svc := &Service{}
|
||||
|
||||
if opts.AIHub != nil {
|
||||
svc.liteClient = WrapArkClient(opts.AIHub.Lite)
|
||||
svc.proClient = WrapArkClient(opts.AIHub.Pro)
|
||||
svc.maxClient = WrapArkClient(opts.AIHub.Max)
|
||||
}
|
||||
|
||||
if opts.CourseImageResponsesClient != nil {
|
||||
svc.courseImageResponsesClient = opts.CourseImageResponsesClient
|
||||
} else {
|
||||
apiKey := strings.TrimSpace(opts.APIKey)
|
||||
baseURL := strings.TrimSpace(opts.BaseURL)
|
||||
model := strings.TrimSpace(opts.CourseVisionModel)
|
||||
if apiKey != "" && model != "" {
|
||||
svc.courseImageResponsesClient = NewArkResponsesClient(apiKey, baseURL, model)
|
||||
}
|
||||
}
|
||||
|
||||
return svc
|
||||
}
|
||||
|
||||
// LiteClient 返回低成本短输出模型客户端。
|
||||
func (s *Service) LiteClient() *Client {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.liteClient
|
||||
}
|
||||
|
||||
// ProClient 返回默认复杂对话模型客户端。
|
||||
func (s *Service) ProClient() *Client {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.proClient
|
||||
}
|
||||
|
||||
// MaxClient 返回深度推理模型客户端。
|
||||
func (s *Service) MaxClient() *Client {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.maxClient
|
||||
}
|
||||
|
||||
// CourseImageResponsesClient 返回课程图片解析所用的 Responses 客户端。
|
||||
func (s *Service) CourseImageResponsesClient() *ArkResponsesClient {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return s.courseImageResponsesClient
|
||||
}
|
||||
|
||||
// NewAgentModelClients 一次性返回 newAgent 图里常用的模型分配。
|
||||
func (s *Service) NewAgentModelClients() AgentModelClients {
|
||||
if s == nil {
|
||||
return AgentModelClients{}
|
||||
}
|
||||
return AgentModelClients{
|
||||
Chat: s.ProClient(),
|
||||
Plan: s.MaxClient(),
|
||||
Execute: s.MaxClient(),
|
||||
Deliver: s.ProClient(),
|
||||
Summary: s.LiteClient(),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user