后端: 1. 新增课表图片识别接口,支持上传截图后返回“可编辑草稿”(success / partial / reject),并补齐大图、空图、格式不支持、识别能力未配置等错误分支。 2. 课表识别服务接入多模态 Responses 链路,完善图片请求归一化与安全校验(大小、MIME、内容探测),并对识别结果做结构化清洗、强/弱约束校验、告警去重与默认文案兜底。 3. 新增 Ark Responses 统一客户端抽象,支持文本+图片输入、JSON对象输出、usage统计透传与不完整输出识别;同时补齐模型返回 finish_reason 透传,便于定位截断问题。 4. 启动阶段增加课表识图模型与参数注入(模型名、最大图片字节、最大输出token),并将配置示例收敛为“仅保留当前代码实际读取项”。 前端: 5. 课表中心新增“导入课表”完整闭环:上传图片识别、草稿编辑校对、正式导入落库;并新增对应 API 与类型定义。 6. 导入弹窗支持识别中止、全局告警与行级告警展示、低置信度提示、行内编辑、手动新增、删除、拖拽排序、本地校验与提交前二次确认。 7. 正式导入前将草稿按“课程名+地点+是否允许嵌入”聚合为导入结构,并统一携带幂等键请求头,降低重复提交风险。 8. 周课表画板修复跨节次事件遮挡导致的网格错位问题,改进“完全遮挡/部分遮挡”渲染判定与 grid 行定位。 9. 助手流式区域优化“思考中”指示逻辑与样式,避免已有正文时仍展示回答中占位;同时补充全局组件视觉统一(弹窗/按钮)样式。 仓库: 10. 新增课表图片识别前端对接说明文档,补充主动优化能力 PRD 讨论稿,并在协作规范中新增“实现 Eino 新能力前需先查官方文档”的约束。
225 lines
7.0 KiB
Go
225 lines
7.0 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"encoding/base64"
|
||
"fmt"
|
||
"log"
|
||
"strings"
|
||
"time"
|
||
|
||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||
"github.com/LoveLosita/smartflow/backend/model"
|
||
)
|
||
|
||
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
|
||
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
|
||
if ss == nil || ss.courseImageResponsesClient == nil {
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
|
||
ss.courseImageModel,
|
||
req.Filename,
|
||
req.MIMEType,
|
||
len(req.ImageBytes),
|
||
)
|
||
return nil, ErrCourseImageParserUnavailable
|
||
}
|
||
|
||
normalizedReq, err := normalizeCourseImageParseRequest(req, ss.courseImageConfig)
|
||
if err != nil {
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] request normalization failed filename=%q mime=%q bytes=%d err=%v",
|
||
req.Filename,
|
||
req.MIMEType,
|
||
len(req.ImageBytes),
|
||
err,
|
||
)
|
||
return nil, err
|
||
}
|
||
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] normalized request model_name=%q filename=%q mime=%q bytes=%d max_bytes=%d",
|
||
ss.courseImageModel,
|
||
normalizedReq.Filename,
|
||
normalizedReq.MIMEType,
|
||
len(normalizedReq.ImageBytes),
|
||
ss.courseImageConfig.MaxImageBytes,
|
||
)
|
||
|
||
messages, base64Chars, promptChars := buildCourseImageParseResponsesMessages(normalizedReq)
|
||
startAt := time.Now()
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] model invoke start model_name=%q filename=%q mime=%q message_count=%d base64_chars=%d prompt_chars=%d payload_chars_estimate=%d thinking=%s temperature=%.2f max_output_tokens=%d text_format=%s",
|
||
ss.courseImageModel,
|
||
normalizedReq.Filename,
|
||
normalizedReq.MIMEType,
|
||
len(messages),
|
||
base64Chars,
|
||
promptChars,
|
||
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
|
||
infrallm.ThinkingModeDisabled,
|
||
courseImageParseTemperature,
|
||
ss.courseImageConfig.MaxTokens,
|
||
"json_object",
|
||
)
|
||
|
||
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens,避免被默认值截断。
|
||
// 2. text_format 固定为 json_object,降低输出混入解释文本导致解析失败的概率。
|
||
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。
|
||
draft, rawResult, err := infrallm.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, infrallm.ArkResponsesOptions{
|
||
Temperature: courseImageParseTemperature,
|
||
MaxOutputTokens: ss.courseImageConfig.MaxTokens,
|
||
Thinking: infrallm.ThinkingModeDisabled,
|
||
TextFormat: "json_object",
|
||
})
|
||
if err != nil {
|
||
rawText := ""
|
||
rawChars := 0
|
||
status := ""
|
||
incompleteReason := ""
|
||
errorCode := ""
|
||
errorMessage := ""
|
||
inputTokens := int64(0)
|
||
outputTokens := int64(0)
|
||
totalTokens := int64(0)
|
||
if rawResult != nil {
|
||
rawText = strings.TrimSpace(rawResult.Text)
|
||
rawChars = len(rawText)
|
||
status = strings.TrimSpace(rawResult.Status)
|
||
incompleteReason = strings.TrimSpace(rawResult.IncompleteReason)
|
||
errorCode = strings.TrimSpace(rawResult.ErrorCode)
|
||
errorMessage = strings.TrimSpace(rawResult.ErrorMessage)
|
||
if rawResult.Usage != nil {
|
||
inputTokens = rawResult.Usage.InputTokens
|
||
outputTokens = rawResult.Usage.OutputTokens
|
||
totalTokens = rawResult.Usage.TotalTokens
|
||
}
|
||
}
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] model invoke failed model_name=%q filename=%q mime=%q cost_ms=%d err=%v status=%q incomplete_reason=%q error_code=%q error_message=%q input_tokens=%d output_tokens=%d total_tokens=%d raw_chars=%d raw_full=\n%s",
|
||
ss.courseImageModel,
|
||
normalizedReq.Filename,
|
||
normalizedReq.MIMEType,
|
||
time.Since(startAt).Milliseconds(),
|
||
err,
|
||
status,
|
||
incompleteReason,
|
||
errorCode,
|
||
errorMessage,
|
||
inputTokens,
|
||
outputTokens,
|
||
totalTokens,
|
||
rawChars,
|
||
rawText,
|
||
)
|
||
if isCourseImageOutputTruncated(rawResult) {
|
||
return nil, fmt.Errorf(
|
||
"课程表识别输出疑似被 max_output_tokens 截断:status=%s incomplete_reason=%s output_tokens=%d max_output_tokens=%d",
|
||
status,
|
||
incompleteReason,
|
||
outputTokens,
|
||
ss.courseImageConfig.MaxTokens,
|
||
)
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
rawText := ""
|
||
rawChars := 0
|
||
status := ""
|
||
incompleteReason := ""
|
||
errorCode := ""
|
||
errorMessage := ""
|
||
inputTokens := int64(0)
|
||
outputTokens := int64(0)
|
||
totalTokens := int64(0)
|
||
if rawResult != nil {
|
||
rawText = strings.TrimSpace(rawResult.Text)
|
||
rawChars = len(rawText)
|
||
status = strings.TrimSpace(rawResult.Status)
|
||
incompleteReason = strings.TrimSpace(rawResult.IncompleteReason)
|
||
errorCode = strings.TrimSpace(rawResult.ErrorCode)
|
||
errorMessage = strings.TrimSpace(rawResult.ErrorMessage)
|
||
if rawResult.Usage != nil {
|
||
inputTokens = rawResult.Usage.InputTokens
|
||
outputTokens = rawResult.Usage.OutputTokens
|
||
totalTokens = rawResult.Usage.TotalTokens
|
||
}
|
||
}
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] model invoke success model_name=%q filename=%q mime=%q cost_ms=%d status=%q incomplete_reason=%q error_code=%q error_message=%q input_tokens=%d output_tokens=%d total_tokens=%d raw_chars=%d raw_full=\n%s",
|
||
ss.courseImageModel,
|
||
normalizedReq.Filename,
|
||
normalizedReq.MIMEType,
|
||
time.Since(startAt).Milliseconds(),
|
||
status,
|
||
incompleteReason,
|
||
errorCode,
|
||
errorMessage,
|
||
inputTokens,
|
||
outputTokens,
|
||
totalTokens,
|
||
rawChars,
|
||
rawText,
|
||
)
|
||
|
||
normalizedDraft, err := normalizeCourseImageParseResponse(draft)
|
||
if err != nil {
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] draft normalization failed model_name=%q filename=%q err=%v draft_status=%v row_count=%d",
|
||
ss.courseImageModel,
|
||
normalizedReq.Filename,
|
||
err,
|
||
draft.DraftStatus,
|
||
len(draft.Rows),
|
||
)
|
||
return nil, err
|
||
}
|
||
|
||
log.Printf(
|
||
"[COURSE_PARSE][SERVICE] draft normalization success model_name=%q filename=%q draft_status=%s rows=%d warnings=%d",
|
||
ss.courseImageModel,
|
||
normalizedReq.Filename,
|
||
normalizedDraft.DraftStatus,
|
||
len(normalizedDraft.Rows),
|
||
len(normalizedDraft.Warnings),
|
||
)
|
||
|
||
return normalizedDraft, nil
|
||
}
|
||
|
||
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]infrallm.ArkResponsesMessage, int, int) {
|
||
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
|
||
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
|
||
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
|
||
|
||
messages := []infrallm.ArkResponsesMessage{
|
||
{
|
||
Role: "system",
|
||
Text: strings.TrimSpace(courseImageParseSystemPrompt),
|
||
},
|
||
{
|
||
Role: "user",
|
||
Text: strings.TrimSpace(userPrompt),
|
||
ImageURL: imageDataURL,
|
||
ImageDetail: "high",
|
||
},
|
||
}
|
||
return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
|
||
}
|
||
|
||
func isCourseImageOutputTruncated(rawResult *infrallm.ArkResponsesResult) bool {
|
||
if rawResult == nil {
|
||
return false
|
||
}
|
||
|
||
reason := strings.ToLower(strings.TrimSpace(rawResult.IncompleteReason))
|
||
if strings.Contains(reason, "max_output_tokens") ||
|
||
strings.Contains(reason, "max_tokens") ||
|
||
strings.Contains(reason, "length") {
|
||
return true
|
||
}
|
||
|
||
return strings.EqualFold(strings.TrimSpace(rawResult.Status), "incomplete") && reason == ""
|
||
}
|