Version: 0.9.42.dev.260424
后端: 1. 新增课表图片识别接口,支持上传截图后返回“可编辑草稿”(success / partial / reject),并补齐大图、空图、格式不支持、识别能力未配置等错误分支。 2. 课表识别服务接入多模态 Responses 链路,完善图片请求归一化与安全校验(大小、MIME、内容探测),并对识别结果做结构化清洗、强/弱约束校验、告警去重与默认文案兜底。 3. 新增 Ark Responses 统一客户端抽象,支持文本+图片输入、JSON对象输出、usage统计透传与不完整输出识别;同时补齐模型返回 finish_reason 透传,便于定位截断问题。 4. 启动阶段增加课表识图模型与参数注入(模型名、最大图片字节、最大输出token),并将配置示例收敛为“仅保留当前代码实际读取项”。 前端: 5. 课表中心新增“导入课表”完整闭环:上传图片识别、草稿编辑校对、正式导入落库;并新增对应 API 与类型定义。 6. 导入弹窗支持识别中止、全局告警与行级告警展示、低置信度提示、行内编辑、手动新增、删除、拖拽排序、本地校验与提交前二次确认。 7. 正式导入前将草稿按“课程名+地点+是否允许嵌入”聚合为导入结构,并统一携带幂等键请求头,降低重复提交风险。 8. 周课表画板修复跨节次事件遮挡导致的网格错位问题,改进“完全遮挡/部分遮挡”渲染判定与 grid 行定位。 9. 助手流式区域优化“思考中”指示逻辑与样式,避免已有正文时仍展示回答中占位;同时补充全局组件视觉统一(弹窗/按钮)样式。 仓库: 10. 新增课表图片识别前端对接说明文档,补充主动优化能力 PRD 讨论稿,并在协作规范中新增“实现 Eino 新能力前需先查官方文档”的约束。
This commit is contained in:
224
backend/service/course_parse_ark.go
Normal file
224
backend/service/course_parse_ark.go
Normal file
@@ -0,0 +1,224 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
infrallm "github.com/LoveLosita/smartflow/backend/infra/llm"
|
||||
"github.com/LoveLosita/smartflow/backend/model"
|
||||
)
|
||||
|
||||
// ParseCourseTableImage 使用 Ark SDK Responses 解析课程表图片。
|
||||
func (ss *CourseService) ParseCourseTableImage(ctx context.Context, req model.CourseImageParseRequest) (*model.CourseImageParseResponse, error) {
|
||||
if ss == nil || ss.courseImageResponsesClient == nil {
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] parser unavailable model_name=%q filename=%q mime=%q bytes=%d",
|
||||
ss.courseImageModel,
|
||||
req.Filename,
|
||||
req.MIMEType,
|
||||
len(req.ImageBytes),
|
||||
)
|
||||
return nil, ErrCourseImageParserUnavailable
|
||||
}
|
||||
|
||||
normalizedReq, err := normalizeCourseImageParseRequest(req, ss.courseImageConfig)
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] request normalization failed filename=%q mime=%q bytes=%d err=%v",
|
||||
req.Filename,
|
||||
req.MIMEType,
|
||||
len(req.ImageBytes),
|
||||
err,
|
||||
)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] normalized request model_name=%q filename=%q mime=%q bytes=%d max_bytes=%d",
|
||||
ss.courseImageModel,
|
||||
normalizedReq.Filename,
|
||||
normalizedReq.MIMEType,
|
||||
len(normalizedReq.ImageBytes),
|
||||
ss.courseImageConfig.MaxImageBytes,
|
||||
)
|
||||
|
||||
messages, base64Chars, promptChars := buildCourseImageParseResponsesMessages(normalizedReq)
|
||||
startAt := time.Now()
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] model invoke start model_name=%q filename=%q mime=%q message_count=%d base64_chars=%d prompt_chars=%d payload_chars_estimate=%d thinking=%s temperature=%.2f max_output_tokens=%d text_format=%s",
|
||||
ss.courseImageModel,
|
||||
normalizedReq.Filename,
|
||||
normalizedReq.MIMEType,
|
||||
len(messages),
|
||||
base64Chars,
|
||||
promptChars,
|
||||
base64Chars+promptChars+len(strings.TrimSpace(courseImageParseSystemPrompt)),
|
||||
infrallm.ThinkingModeDisabled,
|
||||
courseImageParseTemperature,
|
||||
ss.courseImageConfig.MaxTokens,
|
||||
"json_object",
|
||||
)
|
||||
|
||||
// 1. 课程表图片识别输出体量大,显式透传 max_output_tokens,避免被默认值截断。
|
||||
// 2. text_format 固定为 json_object,降低输出混入解释文本导致解析失败的概率。
|
||||
// 3. thinking 显式关闭,优先保证课程导入链路稳定性。
|
||||
draft, rawResult, err := infrallm.GenerateArkResponsesJSON[model.CourseImageParseResponse](ctx, ss.courseImageResponsesClient, messages, infrallm.ArkResponsesOptions{
|
||||
Temperature: courseImageParseTemperature,
|
||||
MaxOutputTokens: ss.courseImageConfig.MaxTokens,
|
||||
Thinking: infrallm.ThinkingModeDisabled,
|
||||
TextFormat: "json_object",
|
||||
})
|
||||
if err != nil {
|
||||
rawText := ""
|
||||
rawChars := 0
|
||||
status := ""
|
||||
incompleteReason := ""
|
||||
errorCode := ""
|
||||
errorMessage := ""
|
||||
inputTokens := int64(0)
|
||||
outputTokens := int64(0)
|
||||
totalTokens := int64(0)
|
||||
if rawResult != nil {
|
||||
rawText = strings.TrimSpace(rawResult.Text)
|
||||
rawChars = len(rawText)
|
||||
status = strings.TrimSpace(rawResult.Status)
|
||||
incompleteReason = strings.TrimSpace(rawResult.IncompleteReason)
|
||||
errorCode = strings.TrimSpace(rawResult.ErrorCode)
|
||||
errorMessage = strings.TrimSpace(rawResult.ErrorMessage)
|
||||
if rawResult.Usage != nil {
|
||||
inputTokens = rawResult.Usage.InputTokens
|
||||
outputTokens = rawResult.Usage.OutputTokens
|
||||
totalTokens = rawResult.Usage.TotalTokens
|
||||
}
|
||||
}
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] model invoke failed model_name=%q filename=%q mime=%q cost_ms=%d err=%v status=%q incomplete_reason=%q error_code=%q error_message=%q input_tokens=%d output_tokens=%d total_tokens=%d raw_chars=%d raw_full=\n%s",
|
||||
ss.courseImageModel,
|
||||
normalizedReq.Filename,
|
||||
normalizedReq.MIMEType,
|
||||
time.Since(startAt).Milliseconds(),
|
||||
err,
|
||||
status,
|
||||
incompleteReason,
|
||||
errorCode,
|
||||
errorMessage,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
totalTokens,
|
||||
rawChars,
|
||||
rawText,
|
||||
)
|
||||
if isCourseImageOutputTruncated(rawResult) {
|
||||
return nil, fmt.Errorf(
|
||||
"课程表识别输出疑似被 max_output_tokens 截断:status=%s incomplete_reason=%s output_tokens=%d max_output_tokens=%d",
|
||||
status,
|
||||
incompleteReason,
|
||||
outputTokens,
|
||||
ss.courseImageConfig.MaxTokens,
|
||||
)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rawText := ""
|
||||
rawChars := 0
|
||||
status := ""
|
||||
incompleteReason := ""
|
||||
errorCode := ""
|
||||
errorMessage := ""
|
||||
inputTokens := int64(0)
|
||||
outputTokens := int64(0)
|
||||
totalTokens := int64(0)
|
||||
if rawResult != nil {
|
||||
rawText = strings.TrimSpace(rawResult.Text)
|
||||
rawChars = len(rawText)
|
||||
status = strings.TrimSpace(rawResult.Status)
|
||||
incompleteReason = strings.TrimSpace(rawResult.IncompleteReason)
|
||||
errorCode = strings.TrimSpace(rawResult.ErrorCode)
|
||||
errorMessage = strings.TrimSpace(rawResult.ErrorMessage)
|
||||
if rawResult.Usage != nil {
|
||||
inputTokens = rawResult.Usage.InputTokens
|
||||
outputTokens = rawResult.Usage.OutputTokens
|
||||
totalTokens = rawResult.Usage.TotalTokens
|
||||
}
|
||||
}
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] model invoke success model_name=%q filename=%q mime=%q cost_ms=%d status=%q incomplete_reason=%q error_code=%q error_message=%q input_tokens=%d output_tokens=%d total_tokens=%d raw_chars=%d raw_full=\n%s",
|
||||
ss.courseImageModel,
|
||||
normalizedReq.Filename,
|
||||
normalizedReq.MIMEType,
|
||||
time.Since(startAt).Milliseconds(),
|
||||
status,
|
||||
incompleteReason,
|
||||
errorCode,
|
||||
errorMessage,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
totalTokens,
|
||||
rawChars,
|
||||
rawText,
|
||||
)
|
||||
|
||||
normalizedDraft, err := normalizeCourseImageParseResponse(draft)
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] draft normalization failed model_name=%q filename=%q err=%v draft_status=%v row_count=%d",
|
||||
ss.courseImageModel,
|
||||
normalizedReq.Filename,
|
||||
err,
|
||||
draft.DraftStatus,
|
||||
len(draft.Rows),
|
||||
)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Printf(
|
||||
"[COURSE_PARSE][SERVICE] draft normalization success model_name=%q filename=%q draft_status=%s rows=%d warnings=%d",
|
||||
ss.courseImageModel,
|
||||
normalizedReq.Filename,
|
||||
normalizedDraft.DraftStatus,
|
||||
len(normalizedDraft.Rows),
|
||||
len(normalizedDraft.Warnings),
|
||||
)
|
||||
|
||||
return normalizedDraft, nil
|
||||
}
|
||||
|
||||
func buildCourseImageParseResponsesMessages(req *model.CourseImageParseRequest) ([]infrallm.ArkResponsesMessage, int, int) {
|
||||
userPrompt := fmt.Sprintf(courseImageParseUserPromptTemplate, req.Filename, req.MIMEType)
|
||||
base64Data := base64.StdEncoding.EncodeToString(req.ImageBytes)
|
||||
imageDataURL := fmt.Sprintf("data:%s;base64,%s", req.MIMEType, base64Data)
|
||||
|
||||
messages := []infrallm.ArkResponsesMessage{
|
||||
{
|
||||
Role: "system",
|
||||
Text: strings.TrimSpace(courseImageParseSystemPrompt),
|
||||
},
|
||||
{
|
||||
Role: "user",
|
||||
Text: strings.TrimSpace(userPrompt),
|
||||
ImageURL: imageDataURL,
|
||||
ImageDetail: "high",
|
||||
},
|
||||
}
|
||||
return messages, len(base64Data), len(strings.TrimSpace(userPrompt))
|
||||
}
|
||||
|
||||
func isCourseImageOutputTruncated(rawResult *infrallm.ArkResponsesResult) bool {
|
||||
if rawResult == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
reason := strings.ToLower(strings.TrimSpace(rawResult.IncompleteReason))
|
||||
if strings.Contains(reason, "max_output_tokens") ||
|
||||
strings.Contains(reason, "max_tokens") ||
|
||||
strings.Contains(reason, "length") {
|
||||
return true
|
||||
}
|
||||
|
||||
return strings.EqualFold(strings.TrimSpace(rawResult.Status), "incomplete") && reason == ""
|
||||
}
|
||||
Reference in New Issue
Block a user