Files
smartmate/backend/services/course/sv/course_parse_ark.go
Losita 3b6fca44a6 Version: 0.9.77.dev.260505
后端:
1.阶段 6 CP4/CP5 目录收口与共享边界纯化
- 将 backend 根目录收口为 services、client、gateway、cmd、shared 五个一级目录
- 收拢 bootstrap、inits、infra/kafka、infra/outbox、conv、respond、pkg、middleware,移除根目录旧实现与空目录
- 将 utils 下沉到 services/userauth/internal/auth,将 logic 下沉到 services/schedule/core/planning
- 将迁移期 runtime 桥接实现统一收拢到 services/runtime/{conv,dao,eventsvc,model},删除 shared/legacy 与未再被 import 的旧 service 实现
- 将 gateway/shared/respond 收口为 HTTP/Gin 错误写回适配,shared/respond 仅保留共享错误语义与状态映射
- 将 HTTP IdempotencyMiddleware 与 RateLimitMiddleware 收口到 gateway/middleware
- 将 GormCachePlugin 下沉到 shared/infra/gormcache,将共享 RateLimiter 下沉到 shared/infra/ratelimit,将 agent token budget 下沉到 services/agent/shared
- 删除 InitEino 兼容壳,收缩 cmd/internal/coreinit 仅保留旧组合壳残留域初始化语义
- 更新微服务迁移计划与桌面 checklist,补齐 CP4/CP5 当前切流点、目录终态与验证结果
- 完成 go test ./...、git diff --check 与最终真实 smoke;health、register/login、task/create+get、schedule/today、task-class/list、memory/items、agent chat/meta/timeline/context-stats 全部 200,SSE 合并结果为 CP5_OK 且 [DONE] 只有 1 个
2026-05-05 23:25:07 +08:00

229 lines
7.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package sv
import (
"context"
"encoding/base64"
"fmt"
"log"
"strings"
"time"
llmservice "github.com/LoveLosita/smartflow/backend/services/llm"
"github.com/LoveLosita/smartflow/backend/services/runtime/model"
)
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
if ss == nil || ss.courseImageResponsesClient == nil {
modelName := ""
if ss != nil {
modelName = ss.courseImageModel
}
log.Printf(
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
modelName,
req.Filename,
req.MIMEType,
len(req.ImageBytes),
)
return nil, ErrCourseImageParserUnavailable
}
normalizedReq, err := normalizeCourseImageParseRequest(req, ss.courseImageConfig)
if err != nil {
log.Printf(
"[COURSE_PARSE][SERVICE] request normalization failed filename=%q mime=%q bytes=%d err=%v",
req.Filename,
req.MIMEType,
len(req.ImageBytes),
err,
)
return nil, err
}
log.Printf(
"[COURSE_PARSE][SERVICE] normalized request model_name=%q filename=%q mime=%q bytes=%d max_bytes=%d",
ss.courseImageModel,
normalizedReq.Filename,
normalizedReq.MIMEType,
len(normalizedReq.ImageBytes),
ss.courseImageConfig.MaxImageBytes,
)
messages, base64Chars, promptChars := buildCourseImageParseResponsesMessages(normalizedReq)
startAt := time.Now()
log.Printf(
"[COURSE_PARSE][SERVICE] model invoke start model_name=%q filename=%q mime=%q message_count=%d base64_chars=%d prompt_chars=%d payload_chars_estimate=%d thinking=%s temperature=%.2f max_output_tokens=%d text_format=%s",
ss.courseImageModel,
normalizedReq.Filename,
normalizedReq.MIMEType,
len(messages),
base64Chars,
promptChars,
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
llmservice.ThinkingModeDisabled,
courseImageParseTemperature,
ss.courseImageConfig.MaxTokens,
"json_object",
)
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens避免被默认值截断。
// 2. text_format 固定为 json_object降低输出混入解释文本导致解析失败的概率。
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。
draft, rawResult, err := llmservice.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, llmservice.ArkResponsesOptions{
Temperature: courseImageParseTemperature,
MaxOutputTokens: ss.courseImageConfig.MaxTokens,
Thinking: llmservice.ThinkingModeDisabled,
TextFormat: "json_object",
})
if err != nil {
rawText := ""
rawChars := 0
status := ""
incompleteReason := ""
errorCode := ""
errorMessage := ""
inputTokens := int64(0)
outputTokens := int64(0)
totalTokens := int64(0)
if rawResult != nil {
rawText = strings.TrimSpace(rawResult.Text)
rawChars = len(rawText)
status = strings.TrimSpace(rawResult.Status)
incompleteReason = strings.TrimSpace(rawResult.IncompleteReason)
errorCode = strings.TrimSpace(rawResult.ErrorCode)
errorMessage = strings.TrimSpace(rawResult.ErrorMessage)
if rawResult.Usage != nil {
inputTokens = rawResult.Usage.InputTokens
outputTokens = rawResult.Usage.OutputTokens
totalTokens = rawResult.Usage.TotalTokens
}
}
log.Printf(
"[COURSE_PARSE][SERVICE] model invoke failed model_name=%q filename=%q mime=%q cost_ms=%d err=%v status=%q incomplete_reason=%q error_code=%q error_message=%q input_tokens=%d output_tokens=%d total_tokens=%d raw_chars=%d raw_full=\n%s",
ss.courseImageModel,
normalizedReq.Filename,
normalizedReq.MIMEType,
time.Since(startAt).Milliseconds(),
err,
status,
incompleteReason,
errorCode,
errorMessage,
inputTokens,
outputTokens,
totalTokens,
rawChars,
rawText,
)
if isCourseImageOutputTruncated(rawResult) {
return nil, fmt.Errorf(
"课程表识别输出疑似被 max_output_tokens 截断status=%s incomplete_reason=%s output_tokens=%d max_output_tokens=%d",
status,
incompleteReason,
outputTokens,
ss.courseImageConfig.MaxTokens,
)
}
return nil, err
}
rawText := ""
rawChars := 0
status := ""
incompleteReason := ""
errorCode := ""
errorMessage := ""
inputTokens := int64(0)
outputTokens := int64(0)
totalTokens := int64(0)
if rawResult != nil {
rawText = strings.TrimSpace(rawResult.Text)
rawChars = len(rawText)
status = strings.TrimSpace(rawResult.Status)
incompleteReason = strings.TrimSpace(rawResult.IncompleteReason)
errorCode = strings.TrimSpace(rawResult.ErrorCode)
errorMessage = strings.TrimSpace(rawResult.ErrorMessage)
if rawResult.Usage != nil {
inputTokens = rawResult.Usage.InputTokens
outputTokens = rawResult.Usage.OutputTokens
totalTokens = rawResult.Usage.TotalTokens
}
}
log.Printf(
"[COURSE_PARSE][SERVICE] model invoke success model_name=%q filename=%q mime=%q cost_ms=%d status=%q incomplete_reason=%q error_code=%q error_message=%q input_tokens=%d output_tokens=%d total_tokens=%d raw_chars=%d raw_full=\n%s",
ss.courseImageModel,
normalizedReq.Filename,
normalizedReq.MIMEType,
time.Since(startAt).Milliseconds(),
status,
incompleteReason,
errorCode,
errorMessage,
inputTokens,
outputTokens,
totalTokens,
rawChars,
rawText,
)
normalizedDraft, err := normalizeCourseImageParseResponse(draft)
if err != nil {
log.Printf(
"[COURSE_PARSE][SERVICE] draft normalization failed model_name=%q filename=%q err=%v draft_status=%v row_count=%d",
ss.courseImageModel,
normalizedReq.Filename,
err,
draft.DraftStatus,
len(draft.Rows),
)
return nil, err
}
log.Printf(
"[COURSE_PARSE][SERVICE] draft normalization success model_name=%q filename=%q draft_status=%s rows=%d warnings=%d",
ss.courseImageModel,
normalizedReq.Filename,
normalizedDraft.DraftStatus,
len(normalizedDraft.Rows),
len(normalizedDraft.Warnings),
)
return normalizedDraft, nil
}
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]llmservice.ArkResponsesMessage, int, int) {
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
messages := []llmservice.ArkResponsesMessage{
{
Role: "system",
Text: strings.TrimSpace(courseImageParseSystemPrompt),
},
{
Role: "user",
Text: strings.TrimSpace(userPrompt),
ImageURL: imageDataURL,
ImageDetail: "high",
},
}
return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
}
func isCourseImageOutputTruncated(rawResult *llmservice.ArkResponsesResult) bool {
if rawResult == nil {
return false
}
reason := strings.ToLower(strings.TrimSpace(rawResult.IncompleteReason))
if strings.Contains(reason, "max_output_tokens") ||
strings.Contains(reason, "max_tokens") ||
strings.Contains(reason, "length") {
return true
}
return strings.EqualFold(strings.TrimSpace(rawResult.Status), "incomplete") && reason == ""
}