Version: 0.9.13.dev.260410
后端: 1. Memory Day1 链路打通(chat_history -> outbox -> memory_jobs) - 更新 service/events/chat_history_persist.go:聊天消息落库同事务追加 memory.extract.requested 事件(仅 user 消息,失败回滚后由 outbox 重试) - 新建 service/events/memory_extract_requested.go:消费 memory.extract.requested 并幂等入队 memory_jobs,补齐 payload 校验、文本截断与 idempotency key - 更新 cmd/start.go:注册 RegisterMemoryExtractRequestedHandler 2. Memory 模块骨架落地(先跑通状态机,再接入真实抽取) - 新建 memory/model、repo、service、orchestrator、worker、utils 目录与 Day1 mock 抽取执行链 - 新建 model/memory.go:补齐 memory_items / memory_jobs / memory_audit_logs / memory_user_settings 与事件 payload 模型 - 更新 inits/mysql.go:接入 4 张 memory 相关表 AutoMigrate 3. RAG 复用基础设施预埋(依赖可替换) - 新建 infra/rag:core pipeline + chunk/embed/retrieve/rerank/store/corpus/config 分层实现 - 默认接入 MockEmbedder + InMemoryStore,预留 Milvus / Eino 适配实现 - 新增 infra/rag/RAG复用接口实施计划.md 4. 本地依赖与交接文档同步 - 更新 docker-compose.yml:新增 etcd / minio / milvus / attu 服务与数据卷 - 删除 newAgent/HANDOFF_工具研究与运行态重置.md、newAgent/阶段3_上下文瘦身设计.md - 新增 newAgent/HANDOFF_WebSearch两阶段实施计划.md、memory/HANDOFF-RAG复用后续实施计划.md、memory/README.md 前端:无 仓库:无
This commit is contained in:
104
backend/memory/utils/extract_json.go
Normal file
104
backend/memory/utils/extract_json.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var fencedJSONPattern = regexp.MustCompile("(?s)```(?:json)?\\s*([\\[{].*[\\]}])\\s*```")
|
||||
|
||||
// ExtractJSON 从模型输出中提取 JSON 文本(兼容代码块包裹)。
|
||||
//
|
||||
// 步骤:
|
||||
// 1. 先判断整段文本是否本身就是合法 JSON;
|
||||
// 2. 再尝试匹配 ```json ... ``` 代码块;
|
||||
// 3. 最后做一次“首个 JSON 对象/数组”扫描提取。
|
||||
func ExtractJSON(raw string) (string, error) {
|
||||
trimmed := strings.TrimSpace(raw)
|
||||
if trimmed == "" {
|
||||
return "", errors.New("empty model output")
|
||||
}
|
||||
|
||||
// 1. 直接 JSON 命中时,避免做额外启发式扫描。
|
||||
if json.Valid([]byte(trimmed)) {
|
||||
return trimmed, nil
|
||||
}
|
||||
|
||||
// 2. 兼容 markdown 代码块包裹 JSON。
|
||||
matches := fencedJSONPattern.FindStringSubmatch(trimmed)
|
||||
if len(matches) > 1 {
|
||||
candidate := strings.TrimSpace(matches[1])
|
||||
if json.Valid([]byte(candidate)) {
|
||||
return candidate, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 兜底扫描首个完整 JSON 片段,尽量提升容错能力。
|
||||
if candidate, ok := findFirstJSONSegment(trimmed); ok {
|
||||
return candidate, nil
|
||||
}
|
||||
return "", errors.New("json not found in model output")
|
||||
}
|
||||
|
||||
func findFirstJSONSegment(raw string) (string, bool) {
|
||||
start := -1
|
||||
var open, close rune
|
||||
for i, ch := range raw {
|
||||
if ch == '{' {
|
||||
start = i
|
||||
open = '{'
|
||||
close = '}'
|
||||
break
|
||||
}
|
||||
if ch == '[' {
|
||||
start = i
|
||||
open = '['
|
||||
close = ']'
|
||||
break
|
||||
}
|
||||
}
|
||||
if start < 0 {
|
||||
return "", false
|
||||
}
|
||||
|
||||
depth := 0
|
||||
inString := false
|
||||
escaped := false
|
||||
for i, ch := range raw[start:] {
|
||||
if inString {
|
||||
if escaped {
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if ch == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if ch == '"' {
|
||||
inString = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
if ch == '"' {
|
||||
inString = true
|
||||
continue
|
||||
}
|
||||
if ch == open {
|
||||
depth++
|
||||
continue
|
||||
}
|
||||
if ch == close {
|
||||
depth--
|
||||
if depth == 0 {
|
||||
candidate := strings.TrimSpace(raw[start : start+i+1])
|
||||
if json.Valid([]byte(candidate)) {
|
||||
return candidate, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
102
backend/memory/utils/normalize_facts.go
Normal file
102
backend/memory/utils/normalize_facts.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
memorymodel "github.com/LoveLosita/smartflow/backend/memory/model"
|
||||
)
|
||||
|
||||
const (
|
||||
maxTitleLength = 64
|
||||
maxContentLength = 1000
|
||||
)
|
||||
|
||||
// NormalizeFacts 对候选事实做标准化与过滤。
|
||||
//
|
||||
// 步骤:
|
||||
// 1. 标准化 memory_type 与文本字段,丢弃空值和非法类型;
|
||||
// 2. 对超长内容截断,避免脏数据污染后续链路;
|
||||
// 3. 基于“类型+标准化内容”做去重,避免同一轮重复写入。
|
||||
func NormalizeFacts(candidates []memorymodel.FactCandidate) []memorymodel.NormalizedFact {
|
||||
if len(candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make([]memorymodel.NormalizedFact, 0, len(candidates))
|
||||
seen := make(map[string]struct{}, len(candidates))
|
||||
for _, candidate := range candidates {
|
||||
memoryType := memorymodel.NormalizeMemoryType(candidate.MemoryType)
|
||||
if memoryType == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
content := normalizeWhitespace(candidate.Content)
|
||||
if content == "" {
|
||||
continue
|
||||
}
|
||||
content = truncateByRune(content, maxContentLength)
|
||||
|
||||
title := normalizeWhitespace(candidate.Title)
|
||||
if title == "" {
|
||||
title = truncateByRune(content, maxTitleLength)
|
||||
}
|
||||
title = truncateByRune(title, maxTitleLength)
|
||||
|
||||
confidence := clamp01(candidate.Confidence)
|
||||
if confidence == 0 {
|
||||
confidence = 0.6
|
||||
}
|
||||
|
||||
normalizedContent := strings.ToLower(content)
|
||||
contentHash := hashContent(memoryType, normalizedContent)
|
||||
dedupKey := fmt.Sprintf("%s:%s", memoryType, contentHash)
|
||||
if _, exists := seen[dedupKey]; exists {
|
||||
continue
|
||||
}
|
||||
seen[dedupKey] = struct{}{}
|
||||
|
||||
result = append(result, memorymodel.NormalizedFact{
|
||||
MemoryType: memoryType,
|
||||
Title: title,
|
||||
Content: content,
|
||||
NormalizedContent: normalizedContent,
|
||||
ContentHash: contentHash,
|
||||
Confidence: confidence,
|
||||
IsExplicit: candidate.IsExplicit,
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func normalizeWhitespace(raw string) string {
|
||||
return strings.Join(strings.Fields(strings.TrimSpace(raw)), " ")
|
||||
}
|
||||
|
||||
func truncateByRune(raw string, max int) string {
|
||||
if max <= 0 {
|
||||
return ""
|
||||
}
|
||||
runes := []rune(raw)
|
||||
if len(runes) <= max {
|
||||
return raw
|
||||
}
|
||||
return string(runes[:max])
|
||||
}
|
||||
|
||||
func clamp01(v float64) float64 {
|
||||
if v < 0 {
|
||||
return 0
|
||||
}
|
||||
if v > 1 {
|
||||
return 1
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func hashContent(memoryType, normalizedContent string) string {
|
||||
sum := sha256.Sum256([]byte(memoryType + "::" + normalizedContent))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
Reference in New Issue
Block a user