feat:优化表情包注册,迁移数据库v3

This commit is contained in:
SengokuCola
2026-04-05 20:12:21 +08:00
parent 80be746be0
commit 526fc9b763
16 changed files with 926 additions and 534 deletions

View File

@@ -3,7 +3,7 @@ from pathlib import Path
from typing import ContextManager, Generator, TYPE_CHECKING
from rich.traceback import install
from sqlalchemy import event, text
from sqlalchemy import event
from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker
from sqlmodel import SQLModel, Session, create_engine
@@ -63,41 +63,6 @@ _migration_bootstrapper = create_database_migration_bootstrapper(engine)
_db_initialized = False
def _migrate_action_records_to_tool_records() -> None:
"""将旧的 ``action_records`` 历史数据迁移到 ``tool_records``。"""
migration_sql = text(
"""
INSERT INTO tool_records (
tool_id,
timestamp,
session_id,
tool_name,
tool_reasoning,
tool_data,
tool_builtin_prompt,
tool_display_prompt
)
SELECT
action_id,
timestamp,
session_id,
action_name,
action_reasoning,
action_data,
action_builtin_prompt,
action_display_prompt
FROM action_records
WHERE NOT EXISTS (
SELECT 1
FROM tool_records
WHERE tool_records.tool_id = action_records.action_id
)
"""
)
with engine.begin() as connection:
connection.execute(migration_sql)
def initialize_database() -> None:
"""初始化数据库连接、结构与启动期迁移。
@@ -105,8 +70,7 @@ def initialize_database() -> None:
1. 确保数据库目录存在;
2. 加载 SQLModel 模型定义;
3. 执行已注册的启动期迁移;
4. 兜底执行 ``create_all`` 确保当前模型定义已建表
5. 执行项目现有的轻量数据补迁移逻辑。
4. 兜底执行 ``create_all`` 确保当前模型定义已建表
"""
global _db_initialized
if _db_initialized:
@@ -120,7 +84,6 @@ def initialize_database() -> None:
f" 当前版本={migration_state.resolved_version.version},目标版本={migration_state.target_version}"
)
SQLModel.metadata.create_all(engine)
_migrate_action_records_to_tool_records()
_migration_bootstrapper.finalize_database(migration_state)
_db_initialized = True

View File

@@ -94,7 +94,6 @@ class Images(SQLModel, table=True):
full_path: str = Field(max_length=1024) # 文件的完整路径 (包括文件名)
image_type: ImageType = Field(sa_column=Column(SQLEnum(ImageType)), default=ImageType.EMOJI)
"""图片类型,例如 'emoji''image'"""
emotion: Optional[str] = Field(default=None, nullable=True) # 表情包的情感标签,逗号分隔
query_count: int = Field(default=0) # 被查询次数
is_registered: bool = Field(default=False) # 是否已经注册
@@ -113,27 +112,6 @@ class Images(SQLModel, table=True):
vlm_processed: bool = Field(default=False) # 是否已经过VLM处理
class ActionRecord(SQLModel, table=True):
"""存储动作记录"""
__tablename__ = "action_records" # type: ignore
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
# 元信息
action_id: str = Field(index=True, max_length=255) # 动作ID
timestamp: datetime = Field(default_factory=datetime.now, sa_column=Column(DateTime, index=True)) # 记录时间戳
session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id
# 调用信息
action_name: str = Field(index=True, max_length=255) # 动作名称
action_reasoning: Optional[str] = Field(default=None) # 动作推理过程
action_data: Optional[str] = Field(default=None) # 动作数据JSON格式存储
action_builtin_prompt: Optional[str] = Field(default=None) # 内置动作提示
action_display_prompt: Optional[str] = Field(default=None) # 最终输入到Prompt的内容
class ToolRecord(SQLModel, table=True):
"""存储工具调用记录"""
@@ -281,28 +259,6 @@ class ChatHistory(SQLModel, table=True):
summary: str # 概括:对这段话的平文本概括
class ThinkingQuestion(SQLModel, table=True):
"""存储思考型问题的模型"""
__tablename__ = "thinking_questions" # type: ignore
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
# 问答对
question: str # 问题内容
context: Optional[str] = Field(default=None, nullable=True) # 上下文
found_answer: bool = Field(default=False) # 是否找到答案
answer: Optional[str] = Field(default=None, nullable=True) # 问题答案
thinking_steps: Optional[str] = Field(default=None, nullable=True) # 思考步骤JSON格式存储
created_timestamp: datetime = Field(
default_factory=datetime.now, sa_column=Column(DateTime, index=True)
) # 创建时间
updated_timestamp: datetime = Field(
default_factory=datetime.now, sa_column=Column(DateTime, index=True)
) # 最后更新时间
class BinaryData(SQLModel, table=True):
"""存储二进制数据的模型"""

View File

@@ -5,6 +5,7 @@ from .builtin import (
EMPTY_SCHEMA_VERSION,
LATEST_SCHEMA_VERSION,
LEGACY_V1_SCHEMA_VERSION,
V2_SCHEMA_VERSION,
build_default_migration_registry,
build_default_schema_version_resolver,
)
@@ -61,6 +62,7 @@ __all__ = [
"EMPTY_SCHEMA_VERSION",
"LATEST_SCHEMA_VERSION",
"LEGACY_V1_SCHEMA_VERSION",
"V2_SCHEMA_VERSION",
"MigrationExecutionContext",
"MigrationPlan",
"MigrationPlanner",

View File

@@ -6,12 +6,14 @@ from .legacy_v1_to_v2 import migrate_legacy_v1_to_v2
from .models import DatabaseSchemaSnapshot, MigrationStep
from .registry import MigrationRegistry
from .resolver import BaseSchemaVersionDetector, SchemaVersionResolver
from .version_store import SQLiteUserVersionStore
from .schema import SQLiteSchemaInspector
from .v2_to_v3 import migrate_v2_to_v3
from .version_store import SQLiteUserVersionStore
EMPTY_SCHEMA_VERSION = 0
LEGACY_V1_SCHEMA_VERSION = 1
LATEST_SCHEMA_VERSION = 2
V2_SCHEMA_VERSION = 2
LATEST_SCHEMA_VERSION = 3
_LEGACY_V1_EXCLUSIVE_TABLES = (
"chat_streams",
@@ -24,6 +26,13 @@ _LEGACY_V1_EXCLUSIVE_TABLES = (
"messages",
"thinking_back",
)
_COMMON_MARKER_TABLES = (
"mai_messages",
"chat_sessions",
"expressions",
"jargons",
"tool_records",
)
class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
@@ -36,6 +45,7 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
Returns:
str: 当前探测器名称。
"""
return "latest_schema_detector"
def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]:
@@ -47,18 +57,16 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
Returns:
Optional[int]: 若识别为最新结构则返回最新版本号,否则返回 ``None``。
"""
if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES):
return None
latest_marker_tables = (
"mai_messages",
"chat_sessions",
"expressions",
"jargons",
"thinking_questions",
"tool_records",
)
if not all(snapshot.has_table(table_name) for table_name in latest_marker_tables):
if not all(snapshot.has_table(table_name) for table_name in _COMMON_MARKER_TABLES):
return None
if snapshot.has_table("action_records"):
return None
if snapshot.has_table("thinking_questions"):
return None
if snapshot.has_column("images", "emotion"):
return None
if not snapshot.has_column("images", "image_hash"):
return None
@@ -66,13 +74,53 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
return None
if not snapshot.has_column("images", "image_type"):
return None
if not snapshot.has_column("chat_history", "session_id"):
return None
if not snapshot.has_column("person_info", "user_nickname"):
return None
return LATEST_SCHEMA_VERSION
class V2SchemaVersionDetector(BaseSchemaVersionDetector):
"""v2 schema 结构探测器。"""
@property
def name(self) -> str:
"""返回探测器名称。
Returns:
str: 当前探测器名称。
"""
return "v2_schema_detector"
def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]:
"""检测数据库是否为 v2 结构。
Args:
snapshot: 当前数据库结构快照。
Returns:
Optional[int]: 若识别为 v2 结构则返回 ``2``,否则返回 ``None``。
"""
if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES):
return None
if not all(snapshot.has_table(table_name) for table_name in _COMMON_MARKER_TABLES):
return None
if not snapshot.has_table("action_records"):
return None
if not snapshot.has_table("thinking_questions"):
return None
if not snapshot.has_column("images", "emotion"):
return None
if not snapshot.has_column("action_records", "session_id"):
return None
if not snapshot.has_column("chat_history", "session_id"):
return None
if not snapshot.has_column("person_info", "user_nickname"):
return None
return LATEST_SCHEMA_VERSION
return V2_SCHEMA_VERSION
class LegacyV1SchemaDetector(BaseSchemaVersionDetector):
@@ -85,6 +133,7 @@ class LegacyV1SchemaDetector(BaseSchemaVersionDetector):
Returns:
str: 当前探测器名称。
"""
return "legacy_v1_schema_detector"
def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]:
@@ -96,6 +145,7 @@ class LegacyV1SchemaDetector(BaseSchemaVersionDetector):
Returns:
Optional[int]: 若识别为旧版结构则返回 ``1``,否则返回 ``None``。
"""
if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES):
return LEGACY_V1_SCHEMA_VERSION
@@ -121,8 +171,10 @@ def build_default_schema_version_detectors() -> List[BaseSchemaVersionDetector]:
Returns:
List[BaseSchemaVersionDetector]: 按优先级排序的探测器列表。
"""
return [
LatestSchemaVersionDetector(),
V2SchemaVersionDetector(),
LegacyV1SchemaDetector(),
]
@@ -133,6 +185,7 @@ def build_default_schema_version_resolver() -> SchemaVersionResolver:
Returns:
SchemaVersionResolver: 配置完成的 schema 版本解析器。
"""
return SchemaVersionResolver(
version_store=SQLiteUserVersionStore(),
schema_inspector=SQLiteSchemaInspector(),
@@ -146,14 +199,22 @@ def build_default_migration_registry() -> MigrationRegistry:
Returns:
MigrationRegistry: 含默认迁移步骤的注册表实例。
"""
return MigrationRegistry(
steps=[
MigrationStep(
version_from=LEGACY_V1_SCHEMA_VERSION,
version_to=LATEST_SCHEMA_VERSION,
name="legacy_v1_to_latest_v2",
description="将旧版 0.x 数据库整体迁移到当前最新 schema。",
version_to=V2_SCHEMA_VERSION,
name="legacy_v1_to_v2",
description="将旧版 0.x 数据库迁移到 v2 schema。",
handler=migrate_legacy_v1_to_v2,
)
),
MigrationStep(
version_from=V2_SCHEMA_VERSION,
version_to=LATEST_SCHEMA_VERSION,
name="v2_to_v3",
description="移除废弃表,并将 emoji 标签统一收敛到 description 字段。",
handler=migrate_v2_to_v3,
),
]
)

View File

@@ -0,0 +1,298 @@
"""冻结的 v2 schema 快照。
该模块只用于 ``legacy_v1_to_v2`` 迁移,避免迁移过程依赖当前运行时代码中的
最新 SQLModel 定义,导致历史迁移随着后续 schema 演进而失真。
"""
from sqlalchemy.engine import Connection
_V2_TABLE_STATEMENTS = (
"""
CREATE TABLE IF NOT EXISTS action_records (
id INTEGER NOT NULL,
action_id VARCHAR(255) NOT NULL,
timestamp DATETIME,
session_id VARCHAR(255) NOT NULL,
action_name VARCHAR(255) NOT NULL,
action_reasoning VARCHAR,
action_data VARCHAR,
action_builtin_prompt VARCHAR,
action_display_prompt VARCHAR,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS binary_data (
id INTEGER NOT NULL,
data_hash VARCHAR(255) NOT NULL,
full_path VARCHAR(1024) NOT NULL,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS chat_history (
id INTEGER NOT NULL,
session_id VARCHAR(255) NOT NULL,
start_timestamp DATETIME,
end_timestamp DATETIME,
query_count INTEGER NOT NULL,
query_forget_count INTEGER NOT NULL,
original_messages VARCHAR NOT NULL,
participants VARCHAR NOT NULL,
theme VARCHAR NOT NULL,
keywords VARCHAR NOT NULL,
summary VARCHAR NOT NULL,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS chat_sessions (
id INTEGER NOT NULL,
session_id VARCHAR(255) NOT NULL,
created_timestamp DATETIME,
last_active_timestamp DATETIME,
user_id VARCHAR(255),
group_id VARCHAR(255),
platform VARCHAR(100) NOT NULL,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS command_records (
id INTEGER NOT NULL,
timestamp DATETIME,
session_id VARCHAR(255) NOT NULL,
command_name VARCHAR(255) NOT NULL,
command_data VARCHAR,
command_result VARCHAR,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS expressions (
id INTEGER NOT NULL,
situation VARCHAR(255) NOT NULL,
style VARCHAR(255) NOT NULL,
content_list VARCHAR NOT NULL,
count INTEGER NOT NULL,
last_active_time DATETIME,
create_time DATETIME,
session_id VARCHAR(255),
checked BOOLEAN NOT NULL,
rejected BOOLEAN NOT NULL,
modified_by VARCHAR(4),
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS images (
id INTEGER NOT NULL,
image_hash VARCHAR(255) NOT NULL,
description VARCHAR NOT NULL,
full_path VARCHAR(1024) NOT NULL,
image_type VARCHAR(5),
emotion VARCHAR,
query_count INTEGER NOT NULL,
is_registered BOOLEAN NOT NULL,
is_banned BOOLEAN NOT NULL,
no_file_flag BOOLEAN NOT NULL,
record_time DATETIME,
register_time DATETIME,
last_used_time DATETIME,
vlm_processed BOOLEAN NOT NULL,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS jargons (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
content VARCHAR(255) NOT NULL,
raw_content TEXT,
meaning TEXT NOT NULL,
session_id_dict TEXT NOT NULL,
count INTEGER NOT NULL,
is_jargon BOOLEAN,
is_complete BOOLEAN NOT NULL,
is_global BOOLEAN NOT NULL,
last_inference_count INTEGER NOT NULL,
inference_with_context TEXT,
inference_with_content_only TEXT
)
""",
"""
CREATE TABLE IF NOT EXISTS llm_usage (
id INTEGER NOT NULL,
model_name VARCHAR(255) NOT NULL,
model_assign_name VARCHAR(255),
model_api_provider_name VARCHAR(255) NOT NULL,
endpoint VARCHAR(255),
user_type VARCHAR(6),
request_type VARCHAR(50) NOT NULL,
time_cost FLOAT,
timestamp DATETIME,
prompt_tokens INTEGER NOT NULL,
completion_tokens INTEGER NOT NULL,
total_tokens INTEGER NOT NULL,
cost FLOAT NOT NULL,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS mai_knowledge (
id INTEGER NOT NULL,
knowledge_id VARCHAR(255) NOT NULL,
category_id VARCHAR(32) NOT NULL,
content VARCHAR NOT NULL,
normalized_content VARCHAR NOT NULL,
metadata_json VARCHAR,
created_at DATETIME,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS mai_messages (
id INTEGER NOT NULL,
message_id VARCHAR(255) NOT NULL,
timestamp DATETIME,
platform VARCHAR(100) NOT NULL,
user_id VARCHAR(255) NOT NULL,
user_nickname VARCHAR(255) NOT NULL,
user_cardname VARCHAR(255),
group_id VARCHAR(255),
group_name VARCHAR(255),
is_mentioned BOOLEAN NOT NULL,
is_at BOOLEAN NOT NULL,
session_id VARCHAR(255) NOT NULL,
reply_to VARCHAR(255),
is_emoji BOOLEAN NOT NULL,
is_picture BOOLEAN NOT NULL,
is_command BOOLEAN NOT NULL,
is_notify BOOLEAN NOT NULL,
raw_content BLOB,
processed_plain_text VARCHAR,
display_message VARCHAR,
additional_config VARCHAR,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS online_time (
id INTEGER NOT NULL,
timestamp DATETIME,
duration_minutes INTEGER NOT NULL,
start_timestamp DATETIME,
end_timestamp DATETIME,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS person_info (
id INTEGER NOT NULL,
is_known BOOLEAN NOT NULL,
person_id VARCHAR(255) NOT NULL,
person_name VARCHAR(255),
name_reason VARCHAR,
platform VARCHAR(100) NOT NULL,
user_id VARCHAR(255) NOT NULL,
user_nickname VARCHAR(255) NOT NULL,
group_cardname VARCHAR,
memory_points VARCHAR,
know_counts INTEGER NOT NULL,
first_known_time DATETIME,
last_known_time DATETIME,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS thinking_questions (
id INTEGER NOT NULL,
question VARCHAR NOT NULL,
context VARCHAR,
found_answer BOOLEAN NOT NULL,
answer VARCHAR,
thinking_steps VARCHAR,
created_timestamp DATETIME,
updated_timestamp DATETIME,
PRIMARY KEY (id)
)
""",
"""
CREATE TABLE IF NOT EXISTS tool_records (
id INTEGER NOT NULL,
tool_id VARCHAR(255) NOT NULL,
timestamp DATETIME,
session_id VARCHAR(255) NOT NULL,
tool_name VARCHAR(255) NOT NULL,
tool_reasoning VARCHAR,
tool_data VARCHAR,
tool_builtin_prompt VARCHAR,
tool_display_prompt VARCHAR,
PRIMARY KEY (id)
)
""",
)
_V2_INDEX_STATEMENTS = (
"CREATE INDEX IF NOT EXISTS ix_action_records_action_id ON action_records (action_id)",
"CREATE INDEX IF NOT EXISTS ix_action_records_action_name ON action_records (action_name)",
"CREATE INDEX IF NOT EXISTS ix_action_records_session_id ON action_records (session_id)",
"CREATE INDEX IF NOT EXISTS ix_action_records_timestamp ON action_records (timestamp)",
"CREATE INDEX IF NOT EXISTS ix_binary_data_data_hash ON binary_data (data_hash)",
"CREATE INDEX IF NOT EXISTS ix_chat_history_end_timestamp ON chat_history (end_timestamp)",
"CREATE INDEX IF NOT EXISTS ix_chat_history_session_id ON chat_history (session_id)",
"CREATE INDEX IF NOT EXISTS ix_chat_history_start_timestamp ON chat_history (start_timestamp)",
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_created_timestamp ON chat_sessions (created_timestamp)",
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_group_id ON chat_sessions (group_id)",
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_last_active_timestamp ON chat_sessions (last_active_timestamp)",
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_platform ON chat_sessions (platform)",
"CREATE UNIQUE INDEX IF NOT EXISTS ix_chat_sessions_session_id ON chat_sessions (session_id)",
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_user_id ON chat_sessions (user_id)",
"CREATE INDEX IF NOT EXISTS ix_command_records_command_name ON command_records (command_name)",
"CREATE INDEX IF NOT EXISTS ix_command_records_session_id ON command_records (session_id)",
"CREATE INDEX IF NOT EXISTS ix_command_records_timestamp ON command_records (timestamp)",
"CREATE INDEX IF NOT EXISTS ix_expressions_last_active_time ON expressions (last_active_time)",
"CREATE INDEX IF NOT EXISTS ix_expressions_situation ON expressions (situation)",
"CREATE INDEX IF NOT EXISTS ix_expressions_style ON expressions (style)",
"CREATE INDEX IF NOT EXISTS ix_images_image_hash ON images (image_hash)",
"CREATE INDEX IF NOT EXISTS ix_images_record_time ON images (record_time)",
"CREATE INDEX IF NOT EXISTS ix_jargons_content ON jargons (content)",
"CREATE INDEX IF NOT EXISTS ix_llm_usage_model_api_provider_name ON llm_usage (model_api_provider_name)",
"CREATE INDEX IF NOT EXISTS ix_llm_usage_model_assign_name ON llm_usage (model_assign_name)",
"CREATE INDEX IF NOT EXISTS ix_llm_usage_model_name ON llm_usage (model_name)",
"CREATE INDEX IF NOT EXISTS ix_llm_usage_timestamp ON llm_usage (timestamp)",
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_category_id ON mai_knowledge (category_id)",
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_created_at ON mai_knowledge (created_at)",
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_knowledge_id ON mai_knowledge (knowledge_id)",
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_normalized_content ON mai_knowledge (normalized_content)",
"CREATE INDEX IF NOT EXISTS ix_mai_messages_group_id ON mai_messages (group_id)",
"CREATE INDEX IF NOT EXISTS ix_mai_messages_message_id ON mai_messages (message_id)",
"CREATE INDEX IF NOT EXISTS ix_mai_messages_platform ON mai_messages (platform)",
"CREATE INDEX IF NOT EXISTS ix_mai_messages_session_id ON mai_messages (session_id)",
"CREATE INDEX IF NOT EXISTS ix_mai_messages_user_id ON mai_messages (user_id)",
"CREATE INDEX IF NOT EXISTS ix_mai_messages_user_nickname ON mai_messages (user_nickname)",
"CREATE INDEX IF NOT EXISTS ix_online_time_timestamp ON online_time (timestamp)",
"CREATE UNIQUE INDEX IF NOT EXISTS ix_person_info_person_id ON person_info (person_id)",
"CREATE INDEX IF NOT EXISTS ix_person_info_platform ON person_info (platform)",
"CREATE INDEX IF NOT EXISTS ix_person_info_user_id ON person_info (user_id)",
"CREATE INDEX IF NOT EXISTS ix_person_info_user_nickname ON person_info (user_nickname)",
"CREATE INDEX IF NOT EXISTS ix_thinking_questions_created_timestamp ON thinking_questions (created_timestamp)",
"CREATE INDEX IF NOT EXISTS ix_thinking_questions_updated_timestamp ON thinking_questions (updated_timestamp)",
"CREATE INDEX IF NOT EXISTS ix_tool_records_session_id ON tool_records (session_id)",
"CREATE INDEX IF NOT EXISTS ix_tool_records_timestamp ON tool_records (timestamp)",
"CREATE INDEX IF NOT EXISTS ix_tool_records_tool_id ON tool_records (tool_id)",
"CREATE INDEX IF NOT EXISTS ix_tool_records_tool_name ON tool_records (tool_name)",
)
def create_frozen_v2_schema(connection: Connection) -> None:
"""创建冻结的 v2 schema。
Args:
connection: 当前数据库连接。
"""
for statement in _V2_TABLE_STATEMENTS:
connection.exec_driver_sql(statement)
for statement in _V2_INDEX_STATEMENTS:
connection.exec_driver_sql(statement)

View File

@@ -1,4 +1,4 @@
"""旧版 ``0.x`` 数据库升级到最新 schema 的迁移逻辑。"""
"""旧版 ``0.x`` 数据库升级到 v2 schema 的迁移逻辑。"""
from __future__ import annotations
@@ -7,15 +7,16 @@ from dataclasses import dataclass
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, cast
import json
import msgpack
from sqlalchemy import text
from sqlalchemy.engine import Connection
import json
import msgpack
from src.common.logger import get_logger
from .exceptions import DatabaseMigrationExecutionError
from .frozen_v2_schema import create_frozen_v2_schema
from .models import DatabaseSchemaSnapshot, MigrationExecutionContext
from .schema import SQLiteSchemaInspector
@@ -52,19 +53,15 @@ class LegacyTableData:
def migrate_legacy_v1_to_v2(context: MigrationExecutionContext) -> None:
"""执行旧版 ``0.x`` 数据库到最新 schema 的迁移。
"""执行旧版 ``0.x`` 数据库到 v2 schema 的迁移。
Args:
context: 当前迁移步骤执行上下文。
"""
from sqlmodel import SQLModel
import src.common.database.database_model # noqa: F401
schema_inspector = SQLiteSchemaInspector()
snapshot = schema_inspector.inspect(context.connection)
_rename_legacy_v1_tables(context.connection, snapshot)
SQLModel.metadata.create_all(context.connection)
create_frozen_v2_schema(context.connection)
table_migration_jobs: List[Tuple[str, Callable[[MigrationExecutionContext], int]]] = [
("chat_sessions", _migrate_chat_sessions),
@@ -794,8 +791,6 @@ def _migrate_images(context: MigrationExecutionContext) -> int:
if full_path and dedupe_key not in existing_keys:
migrated_description = _normalize_required_text(row.get("description"))
migrated_emotion = _normalize_optional_text(row.get("emotion"))
if not migrated_description and migrated_emotion:
migrated_description = migrated_emotion
connection.execute(
insert_sql,
{
@@ -803,7 +798,7 @@ def _migrate_images(context: MigrationExecutionContext) -> int:
"description": migrated_description,
"full_path": full_path,
"image_type": "EMOJI",
"emotion": None,
"emotion": migrated_emotion,
"query_count": _normalize_int(row.get("query_count"), default=0),
"is_registered": _normalize_bool(row.get("is_registered"), default=False),
"is_banned": _normalize_bool(row.get("is_banned"), default=False),

View File

@@ -0,0 +1,269 @@
"""v2 schema 升级到 v3 的迁移逻辑。"""
from typing import Any, Dict, List
from sqlalchemy import text
from sqlalchemy.engine import Connection
from src.common.logger import get_logger
from .exceptions import DatabaseMigrationExecutionError
from .models import MigrationExecutionContext
from .schema import SQLiteSchemaInspector
logger = get_logger("database_migration")
_V2_IMAGES_BACKUP_TABLE = "__v2_images_backup"
_V3_IMAGES_CREATE_SQL = """
CREATE TABLE images (
id INTEGER NOT NULL,
image_hash VARCHAR(255) NOT NULL,
description VARCHAR NOT NULL,
full_path VARCHAR(1024) NOT NULL,
image_type VARCHAR(5),
query_count INTEGER NOT NULL,
is_registered BOOLEAN NOT NULL,
is_banned BOOLEAN NOT NULL,
no_file_flag BOOLEAN NOT NULL,
record_time DATETIME,
register_time DATETIME,
last_used_time DATETIME,
vlm_processed BOOLEAN NOT NULL,
PRIMARY KEY (id)
)
"""
_V3_IMAGES_INDEX_STATEMENTS = (
"CREATE INDEX ix_images_image_hash ON images (image_hash)",
"CREATE INDEX ix_images_record_time ON images (record_time)",
)
def migrate_v2_to_v3(context: MigrationExecutionContext) -> None:
"""执行 v2 到 v3 的 schema 迁移。
Args:
context: 当前迁移步骤执行上下文。
"""
connection = context.connection
total_records = (
_count_table_rows(connection, "action_records")
+ _count_table_rows(connection, "thinking_questions")
+ _count_table_rows(connection, "images")
)
context.start_progress(
total_tables=3,
total_records=total_records,
description="v2 -> v3 迁移进度",
table_unit_name="",
record_unit_name="记录",
)
migrated_tool_records = _migrate_action_records_to_tool_records(connection)
action_record_count = _count_table_rows(connection, "action_records")
_drop_table_if_exists(connection, "action_records")
context.advance_progress(
records=action_record_count,
completed_tables=1,
item_name="action_records",
)
thinking_question_count = _count_table_rows(connection, "thinking_questions")
_drop_table_if_exists(connection, "thinking_questions")
context.advance_progress(
records=thinking_question_count,
completed_tables=1,
item_name="thinking_questions",
)
migrated_image_rows = _migrate_images_table_to_v3(connection)
context.advance_progress(
records=migrated_image_rows,
completed_tables=1,
item_name="images",
)
logger.info(
"v2 -> v3 数据库迁移完成: "
f"tool_records补迁移={migrated_tool_records}"
f"images重建={migrated_image_rows}"
)
def _count_table_rows(connection: Connection, table_name: str) -> int:
"""统计表记录数,不存在时返回 0。"""
schema_inspector = SQLiteSchemaInspector()
if not schema_inspector.table_exists(connection, table_name):
return 0
row = connection.execute(text(f'SELECT COUNT(*) FROM "{table_name}"')).first()
return int(row[0]) if row else 0
def _drop_table_if_exists(connection: Connection, table_name: str) -> None:
"""删除指定表,不存在时静默跳过。"""
connection.exec_driver_sql(f'DROP TABLE IF EXISTS "{table_name}"')
def _migrate_action_records_to_tool_records(connection: Connection) -> int:
"""把 v2 中残留的 ``action_records`` 数据转存到 ``tool_records``。"""
schema_inspector = SQLiteSchemaInspector()
if not schema_inspector.table_exists(connection, "action_records"):
return 0
inserted_count = _count_table_rows(connection, "action_records")
connection.execute(
text(
"""
INSERT INTO tool_records (
tool_id,
timestamp,
session_id,
tool_name,
tool_reasoning,
tool_data,
tool_builtin_prompt,
tool_display_prompt
)
SELECT
action_id,
timestamp,
session_id,
action_name,
action_reasoning,
action_data,
action_builtin_prompt,
action_display_prompt
FROM action_records
WHERE NOT EXISTS (
SELECT 1
FROM tool_records
WHERE tool_records.tool_id = action_records.action_id
)
"""
)
)
return inserted_count
def _migrate_images_table_to_v3(connection: Connection) -> int:
"""重建 ``images`` 表并移除 ``emotion`` 列。"""
schema_inspector = SQLiteSchemaInspector()
if not schema_inspector.table_exists(connection, "images"):
return 0
if not schema_inspector.get_table_schema(connection, "images").has_column("emotion"):
return _count_table_rows(connection, "images")
if schema_inspector.table_exists(connection, _V2_IMAGES_BACKUP_TABLE):
raise DatabaseMigrationExecutionError(
f"检测到残留备份表 {_V2_IMAGES_BACKUP_TABLE},无法安全执行 v2 -> v3 images 迁移。"
)
connection.exec_driver_sql(f'ALTER TABLE "images" RENAME TO "{_V2_IMAGES_BACKUP_TABLE}"')
connection.exec_driver_sql(_V3_IMAGES_CREATE_SQL)
legacy_rows = connection.execute(
text(f'SELECT * FROM "{_V2_IMAGES_BACKUP_TABLE}" ORDER BY id')
).mappings().all()
insert_sql = text(
"""
INSERT INTO images (
id,
image_hash,
description,
full_path,
image_type,
query_count,
is_registered,
is_banned,
no_file_flag,
record_time,
register_time,
last_used_time,
vlm_processed
) VALUES (
:id,
:image_hash,
:description,
:full_path,
:image_type,
:query_count,
:is_registered,
:is_banned,
:no_file_flag,
:record_time,
:register_time,
:last_used_time,
:vlm_processed
)
"""
)
for row in legacy_rows:
payload: Dict[str, Any] = {
"id": row.get("id"),
"image_hash": str(row.get("image_hash") or "").strip(),
"description": _migrate_v3_emoji_description(row),
"full_path": str(row.get("full_path") or "").strip(),
"image_type": row.get("image_type"),
"query_count": int(row.get("query_count") or 0),
"is_registered": bool(row.get("is_registered")),
"is_banned": bool(row.get("is_banned")),
"no_file_flag": bool(row.get("no_file_flag")),
"record_time": row.get("record_time"),
"register_time": row.get("register_time"),
"last_used_time": row.get("last_used_time"),
"vlm_processed": bool(row.get("vlm_processed")),
}
connection.execute(insert_sql, payload)
connection.exec_driver_sql(f'DROP TABLE "{_V2_IMAGES_BACKUP_TABLE}"')
for statement in _V3_IMAGES_INDEX_STATEMENTS:
connection.exec_driver_sql(statement)
return len(legacy_rows)
def _migrate_v3_emoji_description(row: Dict[str, Any]) -> str:
"""为 v3 统一 emoji 描述字段语义。
v3 中 `description` 对 emoji 统一承担“标签列表”的职责,因此迁移时:
1. 若旧 `emotion` 非空,优先将其规范化后写入 `description`
2. 否则保留并规范化当前 `description`
3. 非 emoji 图片保持原描述不变。
"""
image_type = str(row.get("image_type") or "").strip().upper()
current_description = str(row.get("description") or "").strip()
current_emotion = str(row.get("emotion") or "").strip()
if image_type != "EMOJI":
return current_description
normalized_tags = _normalize_emoji_tag_text(current_emotion or current_description)
if normalized_tags:
return ",".join(normalized_tags)
return current_description
def _normalize_emoji_tag_text(raw_value: Any) -> List[str]:
"""将 emoji 标签文本转换为去重后的标签列表。"""
normalized_text = str(raw_value or "").strip()
if not normalized_text:
return []
separators = [",", "", "", ";", "", "\n", "\r", "\t"]
for separator in separators[1:]:
normalized_text = normalized_text.replace(separator, separators[0])
deduped_tags: List[str] = []
seen_tags: set[str] = set()
for part in normalized_text.split(separators[0]):
normalized_part = part.strip()
lowered_part = normalized_part.lower()
if not normalized_part or lowered_part in seen_tags:
continue
seen_tags.add(lowered_part)
deduped_tags.append(normalized_part)
return deduped_tags