feat:优化表情包注册,迁移数据库v3
This commit is contained in:
@@ -3,7 +3,7 @@ from pathlib import Path
|
||||
from typing import ContextManager, Generator, TYPE_CHECKING
|
||||
|
||||
from rich.traceback import install
|
||||
from sqlalchemy import event, text
|
||||
from sqlalchemy import event
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlmodel import SQLModel, Session, create_engine
|
||||
@@ -63,41 +63,6 @@ _migration_bootstrapper = create_database_migration_bootstrapper(engine)
|
||||
_db_initialized = False
|
||||
|
||||
|
||||
def _migrate_action_records_to_tool_records() -> None:
|
||||
"""将旧的 ``action_records`` 历史数据迁移到 ``tool_records``。"""
|
||||
migration_sql = text(
|
||||
"""
|
||||
INSERT INTO tool_records (
|
||||
tool_id,
|
||||
timestamp,
|
||||
session_id,
|
||||
tool_name,
|
||||
tool_reasoning,
|
||||
tool_data,
|
||||
tool_builtin_prompt,
|
||||
tool_display_prompt
|
||||
)
|
||||
SELECT
|
||||
action_id,
|
||||
timestamp,
|
||||
session_id,
|
||||
action_name,
|
||||
action_reasoning,
|
||||
action_data,
|
||||
action_builtin_prompt,
|
||||
action_display_prompt
|
||||
FROM action_records
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM tool_records
|
||||
WHERE tool_records.tool_id = action_records.action_id
|
||||
)
|
||||
"""
|
||||
)
|
||||
with engine.begin() as connection:
|
||||
connection.execute(migration_sql)
|
||||
|
||||
|
||||
def initialize_database() -> None:
|
||||
"""初始化数据库连接、结构与启动期迁移。
|
||||
|
||||
@@ -105,8 +70,7 @@ def initialize_database() -> None:
|
||||
1. 确保数据库目录存在;
|
||||
2. 加载 SQLModel 模型定义;
|
||||
3. 执行已注册的启动期迁移;
|
||||
4. 兜底执行 ``create_all`` 确保当前模型定义已建表;
|
||||
5. 执行项目现有的轻量数据补迁移逻辑。
|
||||
4. 兜底执行 ``create_all`` 确保当前模型定义已建表。
|
||||
"""
|
||||
global _db_initialized
|
||||
if _db_initialized:
|
||||
@@ -120,7 +84,6 @@ def initialize_database() -> None:
|
||||
f" 当前版本={migration_state.resolved_version.version},目标版本={migration_state.target_version}"
|
||||
)
|
||||
SQLModel.metadata.create_all(engine)
|
||||
_migrate_action_records_to_tool_records()
|
||||
_migration_bootstrapper.finalize_database(migration_state)
|
||||
_db_initialized = True
|
||||
|
||||
|
||||
@@ -94,7 +94,6 @@ class Images(SQLModel, table=True):
|
||||
full_path: str = Field(max_length=1024) # 文件的完整路径 (包括文件名)
|
||||
image_type: ImageType = Field(sa_column=Column(SQLEnum(ImageType)), default=ImageType.EMOJI)
|
||||
"""图片类型,例如 'emoji' 或 'image'"""
|
||||
emotion: Optional[str] = Field(default=None, nullable=True) # 表情包的情感标签,逗号分隔
|
||||
|
||||
query_count: int = Field(default=0) # 被查询次数
|
||||
is_registered: bool = Field(default=False) # 是否已经注册
|
||||
@@ -113,27 +112,6 @@ class Images(SQLModel, table=True):
|
||||
vlm_processed: bool = Field(default=False) # 是否已经过VLM处理
|
||||
|
||||
|
||||
class ActionRecord(SQLModel, table=True):
|
||||
"""存储动作记录"""
|
||||
|
||||
__tablename__ = "action_records" # type: ignore
|
||||
|
||||
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
|
||||
|
||||
# 元信息
|
||||
action_id: str = Field(index=True, max_length=255) # 动作ID
|
||||
timestamp: datetime = Field(default_factory=datetime.now, sa_column=Column(DateTime, index=True)) # 记录时间戳
|
||||
session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id
|
||||
|
||||
# 调用信息
|
||||
action_name: str = Field(index=True, max_length=255) # 动作名称
|
||||
action_reasoning: Optional[str] = Field(default=None) # 动作推理过程
|
||||
action_data: Optional[str] = Field(default=None) # 动作数据,JSON格式存储
|
||||
|
||||
action_builtin_prompt: Optional[str] = Field(default=None) # 内置动作提示
|
||||
action_display_prompt: Optional[str] = Field(default=None) # 最终输入到Prompt的内容
|
||||
|
||||
|
||||
class ToolRecord(SQLModel, table=True):
|
||||
"""存储工具调用记录"""
|
||||
|
||||
@@ -281,28 +259,6 @@ class ChatHistory(SQLModel, table=True):
|
||||
summary: str # 概括:对这段话的平文本概括
|
||||
|
||||
|
||||
class ThinkingQuestion(SQLModel, table=True):
|
||||
"""存储思考型问题的模型"""
|
||||
|
||||
__tablename__ = "thinking_questions" # type: ignore
|
||||
|
||||
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
|
||||
|
||||
# 问答对
|
||||
question: str # 问题内容
|
||||
context: Optional[str] = Field(default=None, nullable=True) # 上下文
|
||||
found_answer: bool = Field(default=False) # 是否找到答案
|
||||
answer: Optional[str] = Field(default=None, nullable=True) # 问题答案
|
||||
|
||||
thinking_steps: Optional[str] = Field(default=None, nullable=True) # 思考步骤,JSON格式存储
|
||||
created_timestamp: datetime = Field(
|
||||
default_factory=datetime.now, sa_column=Column(DateTime, index=True)
|
||||
) # 创建时间
|
||||
updated_timestamp: datetime = Field(
|
||||
default_factory=datetime.now, sa_column=Column(DateTime, index=True)
|
||||
) # 最后更新时间
|
||||
|
||||
|
||||
class BinaryData(SQLModel, table=True):
|
||||
"""存储二进制数据的模型"""
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from .builtin import (
|
||||
EMPTY_SCHEMA_VERSION,
|
||||
LATEST_SCHEMA_VERSION,
|
||||
LEGACY_V1_SCHEMA_VERSION,
|
||||
V2_SCHEMA_VERSION,
|
||||
build_default_migration_registry,
|
||||
build_default_schema_version_resolver,
|
||||
)
|
||||
@@ -61,6 +62,7 @@ __all__ = [
|
||||
"EMPTY_SCHEMA_VERSION",
|
||||
"LATEST_SCHEMA_VERSION",
|
||||
"LEGACY_V1_SCHEMA_VERSION",
|
||||
"V2_SCHEMA_VERSION",
|
||||
"MigrationExecutionContext",
|
||||
"MigrationPlan",
|
||||
"MigrationPlanner",
|
||||
|
||||
@@ -6,12 +6,14 @@ from .legacy_v1_to_v2 import migrate_legacy_v1_to_v2
|
||||
from .models import DatabaseSchemaSnapshot, MigrationStep
|
||||
from .registry import MigrationRegistry
|
||||
from .resolver import BaseSchemaVersionDetector, SchemaVersionResolver
|
||||
from .version_store import SQLiteUserVersionStore
|
||||
from .schema import SQLiteSchemaInspector
|
||||
from .v2_to_v3 import migrate_v2_to_v3
|
||||
from .version_store import SQLiteUserVersionStore
|
||||
|
||||
EMPTY_SCHEMA_VERSION = 0
|
||||
LEGACY_V1_SCHEMA_VERSION = 1
|
||||
LATEST_SCHEMA_VERSION = 2
|
||||
V2_SCHEMA_VERSION = 2
|
||||
LATEST_SCHEMA_VERSION = 3
|
||||
|
||||
_LEGACY_V1_EXCLUSIVE_TABLES = (
|
||||
"chat_streams",
|
||||
@@ -24,6 +26,13 @@ _LEGACY_V1_EXCLUSIVE_TABLES = (
|
||||
"messages",
|
||||
"thinking_back",
|
||||
)
|
||||
_COMMON_MARKER_TABLES = (
|
||||
"mai_messages",
|
||||
"chat_sessions",
|
||||
"expressions",
|
||||
"jargons",
|
||||
"tool_records",
|
||||
)
|
||||
|
||||
|
||||
class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
|
||||
@@ -36,6 +45,7 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
|
||||
Returns:
|
||||
str: 当前探测器名称。
|
||||
"""
|
||||
|
||||
return "latest_schema_detector"
|
||||
|
||||
def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]:
|
||||
@@ -47,18 +57,16 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
|
||||
Returns:
|
||||
Optional[int]: 若识别为最新结构则返回最新版本号,否则返回 ``None``。
|
||||
"""
|
||||
|
||||
if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES):
|
||||
return None
|
||||
|
||||
latest_marker_tables = (
|
||||
"mai_messages",
|
||||
"chat_sessions",
|
||||
"expressions",
|
||||
"jargons",
|
||||
"thinking_questions",
|
||||
"tool_records",
|
||||
)
|
||||
if not all(snapshot.has_table(table_name) for table_name in latest_marker_tables):
|
||||
if not all(snapshot.has_table(table_name) for table_name in _COMMON_MARKER_TABLES):
|
||||
return None
|
||||
if snapshot.has_table("action_records"):
|
||||
return None
|
||||
if snapshot.has_table("thinking_questions"):
|
||||
return None
|
||||
if snapshot.has_column("images", "emotion"):
|
||||
return None
|
||||
if not snapshot.has_column("images", "image_hash"):
|
||||
return None
|
||||
@@ -66,13 +74,53 @@ class LatestSchemaVersionDetector(BaseSchemaVersionDetector):
|
||||
return None
|
||||
if not snapshot.has_column("images", "image_type"):
|
||||
return None
|
||||
if not snapshot.has_column("chat_history", "session_id"):
|
||||
return None
|
||||
if not snapshot.has_column("person_info", "user_nickname"):
|
||||
return None
|
||||
return LATEST_SCHEMA_VERSION
|
||||
|
||||
|
||||
class V2SchemaVersionDetector(BaseSchemaVersionDetector):
|
||||
"""v2 schema 结构探测器。"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""返回探测器名称。
|
||||
|
||||
Returns:
|
||||
str: 当前探测器名称。
|
||||
"""
|
||||
|
||||
return "v2_schema_detector"
|
||||
|
||||
def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]:
|
||||
"""检测数据库是否为 v2 结构。
|
||||
|
||||
Args:
|
||||
snapshot: 当前数据库结构快照。
|
||||
|
||||
Returns:
|
||||
Optional[int]: 若识别为 v2 结构则返回 ``2``,否则返回 ``None``。
|
||||
"""
|
||||
|
||||
if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES):
|
||||
return None
|
||||
if not all(snapshot.has_table(table_name) for table_name in _COMMON_MARKER_TABLES):
|
||||
return None
|
||||
if not snapshot.has_table("action_records"):
|
||||
return None
|
||||
if not snapshot.has_table("thinking_questions"):
|
||||
return None
|
||||
if not snapshot.has_column("images", "emotion"):
|
||||
return None
|
||||
if not snapshot.has_column("action_records", "session_id"):
|
||||
return None
|
||||
if not snapshot.has_column("chat_history", "session_id"):
|
||||
return None
|
||||
if not snapshot.has_column("person_info", "user_nickname"):
|
||||
return None
|
||||
return LATEST_SCHEMA_VERSION
|
||||
return V2_SCHEMA_VERSION
|
||||
|
||||
|
||||
class LegacyV1SchemaDetector(BaseSchemaVersionDetector):
|
||||
@@ -85,6 +133,7 @@ class LegacyV1SchemaDetector(BaseSchemaVersionDetector):
|
||||
Returns:
|
||||
str: 当前探测器名称。
|
||||
"""
|
||||
|
||||
return "legacy_v1_schema_detector"
|
||||
|
||||
def detect_version(self, snapshot: DatabaseSchemaSnapshot) -> Optional[int]:
|
||||
@@ -96,6 +145,7 @@ class LegacyV1SchemaDetector(BaseSchemaVersionDetector):
|
||||
Returns:
|
||||
Optional[int]: 若识别为旧版结构则返回 ``1``,否则返回 ``None``。
|
||||
"""
|
||||
|
||||
if any(snapshot.has_table(table_name) for table_name in _LEGACY_V1_EXCLUSIVE_TABLES):
|
||||
return LEGACY_V1_SCHEMA_VERSION
|
||||
|
||||
@@ -121,8 +171,10 @@ def build_default_schema_version_detectors() -> List[BaseSchemaVersionDetector]:
|
||||
Returns:
|
||||
List[BaseSchemaVersionDetector]: 按优先级排序的探测器列表。
|
||||
"""
|
||||
|
||||
return [
|
||||
LatestSchemaVersionDetector(),
|
||||
V2SchemaVersionDetector(),
|
||||
LegacyV1SchemaDetector(),
|
||||
]
|
||||
|
||||
@@ -133,6 +185,7 @@ def build_default_schema_version_resolver() -> SchemaVersionResolver:
|
||||
Returns:
|
||||
SchemaVersionResolver: 配置完成的 schema 版本解析器。
|
||||
"""
|
||||
|
||||
return SchemaVersionResolver(
|
||||
version_store=SQLiteUserVersionStore(),
|
||||
schema_inspector=SQLiteSchemaInspector(),
|
||||
@@ -146,14 +199,22 @@ def build_default_migration_registry() -> MigrationRegistry:
|
||||
Returns:
|
||||
MigrationRegistry: 含默认迁移步骤的注册表实例。
|
||||
"""
|
||||
|
||||
return MigrationRegistry(
|
||||
steps=[
|
||||
MigrationStep(
|
||||
version_from=LEGACY_V1_SCHEMA_VERSION,
|
||||
version_to=LATEST_SCHEMA_VERSION,
|
||||
name="legacy_v1_to_latest_v2",
|
||||
description="将旧版 0.x 数据库整体迁移到当前最新 schema。",
|
||||
version_to=V2_SCHEMA_VERSION,
|
||||
name="legacy_v1_to_v2",
|
||||
description="将旧版 0.x 数据库迁移到 v2 schema。",
|
||||
handler=migrate_legacy_v1_to_v2,
|
||||
)
|
||||
),
|
||||
MigrationStep(
|
||||
version_from=V2_SCHEMA_VERSION,
|
||||
version_to=LATEST_SCHEMA_VERSION,
|
||||
name="v2_to_v3",
|
||||
description="移除废弃表,并将 emoji 标签统一收敛到 description 字段。",
|
||||
handler=migrate_v2_to_v3,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
298
src/common/database/migrations/frozen_v2_schema.py
Normal file
298
src/common/database/migrations/frozen_v2_schema.py
Normal file
@@ -0,0 +1,298 @@
|
||||
"""冻结的 v2 schema 快照。
|
||||
|
||||
该模块只用于 ``legacy_v1_to_v2`` 迁移,避免迁移过程依赖当前运行时代码中的
|
||||
最新 SQLModel 定义,导致历史迁移随着后续 schema 演进而失真。
|
||||
"""
|
||||
|
||||
from sqlalchemy.engine import Connection
|
||||
|
||||
_V2_TABLE_STATEMENTS = (
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS action_records (
|
||||
id INTEGER NOT NULL,
|
||||
action_id VARCHAR(255) NOT NULL,
|
||||
timestamp DATETIME,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
action_name VARCHAR(255) NOT NULL,
|
||||
action_reasoning VARCHAR,
|
||||
action_data VARCHAR,
|
||||
action_builtin_prompt VARCHAR,
|
||||
action_display_prompt VARCHAR,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS binary_data (
|
||||
id INTEGER NOT NULL,
|
||||
data_hash VARCHAR(255) NOT NULL,
|
||||
full_path VARCHAR(1024) NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS chat_history (
|
||||
id INTEGER NOT NULL,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
start_timestamp DATETIME,
|
||||
end_timestamp DATETIME,
|
||||
query_count INTEGER NOT NULL,
|
||||
query_forget_count INTEGER NOT NULL,
|
||||
original_messages VARCHAR NOT NULL,
|
||||
participants VARCHAR NOT NULL,
|
||||
theme VARCHAR NOT NULL,
|
||||
keywords VARCHAR NOT NULL,
|
||||
summary VARCHAR NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS chat_sessions (
|
||||
id INTEGER NOT NULL,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
created_timestamp DATETIME,
|
||||
last_active_timestamp DATETIME,
|
||||
user_id VARCHAR(255),
|
||||
group_id VARCHAR(255),
|
||||
platform VARCHAR(100) NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS command_records (
|
||||
id INTEGER NOT NULL,
|
||||
timestamp DATETIME,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
command_name VARCHAR(255) NOT NULL,
|
||||
command_data VARCHAR,
|
||||
command_result VARCHAR,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS expressions (
|
||||
id INTEGER NOT NULL,
|
||||
situation VARCHAR(255) NOT NULL,
|
||||
style VARCHAR(255) NOT NULL,
|
||||
content_list VARCHAR NOT NULL,
|
||||
count INTEGER NOT NULL,
|
||||
last_active_time DATETIME,
|
||||
create_time DATETIME,
|
||||
session_id VARCHAR(255),
|
||||
checked BOOLEAN NOT NULL,
|
||||
rejected BOOLEAN NOT NULL,
|
||||
modified_by VARCHAR(4),
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS images (
|
||||
id INTEGER NOT NULL,
|
||||
image_hash VARCHAR(255) NOT NULL,
|
||||
description VARCHAR NOT NULL,
|
||||
full_path VARCHAR(1024) NOT NULL,
|
||||
image_type VARCHAR(5),
|
||||
emotion VARCHAR,
|
||||
query_count INTEGER NOT NULL,
|
||||
is_registered BOOLEAN NOT NULL,
|
||||
is_banned BOOLEAN NOT NULL,
|
||||
no_file_flag BOOLEAN NOT NULL,
|
||||
record_time DATETIME,
|
||||
register_time DATETIME,
|
||||
last_used_time DATETIME,
|
||||
vlm_processed BOOLEAN NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS jargons (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
content VARCHAR(255) NOT NULL,
|
||||
raw_content TEXT,
|
||||
meaning TEXT NOT NULL,
|
||||
session_id_dict TEXT NOT NULL,
|
||||
count INTEGER NOT NULL,
|
||||
is_jargon BOOLEAN,
|
||||
is_complete BOOLEAN NOT NULL,
|
||||
is_global BOOLEAN NOT NULL,
|
||||
last_inference_count INTEGER NOT NULL,
|
||||
inference_with_context TEXT,
|
||||
inference_with_content_only TEXT
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS llm_usage (
|
||||
id INTEGER NOT NULL,
|
||||
model_name VARCHAR(255) NOT NULL,
|
||||
model_assign_name VARCHAR(255),
|
||||
model_api_provider_name VARCHAR(255) NOT NULL,
|
||||
endpoint VARCHAR(255),
|
||||
user_type VARCHAR(6),
|
||||
request_type VARCHAR(50) NOT NULL,
|
||||
time_cost FLOAT,
|
||||
timestamp DATETIME,
|
||||
prompt_tokens INTEGER NOT NULL,
|
||||
completion_tokens INTEGER NOT NULL,
|
||||
total_tokens INTEGER NOT NULL,
|
||||
cost FLOAT NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS mai_knowledge (
|
||||
id INTEGER NOT NULL,
|
||||
knowledge_id VARCHAR(255) NOT NULL,
|
||||
category_id VARCHAR(32) NOT NULL,
|
||||
content VARCHAR NOT NULL,
|
||||
normalized_content VARCHAR NOT NULL,
|
||||
metadata_json VARCHAR,
|
||||
created_at DATETIME,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS mai_messages (
|
||||
id INTEGER NOT NULL,
|
||||
message_id VARCHAR(255) NOT NULL,
|
||||
timestamp DATETIME,
|
||||
platform VARCHAR(100) NOT NULL,
|
||||
user_id VARCHAR(255) NOT NULL,
|
||||
user_nickname VARCHAR(255) NOT NULL,
|
||||
user_cardname VARCHAR(255),
|
||||
group_id VARCHAR(255),
|
||||
group_name VARCHAR(255),
|
||||
is_mentioned BOOLEAN NOT NULL,
|
||||
is_at BOOLEAN NOT NULL,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
reply_to VARCHAR(255),
|
||||
is_emoji BOOLEAN NOT NULL,
|
||||
is_picture BOOLEAN NOT NULL,
|
||||
is_command BOOLEAN NOT NULL,
|
||||
is_notify BOOLEAN NOT NULL,
|
||||
raw_content BLOB,
|
||||
processed_plain_text VARCHAR,
|
||||
display_message VARCHAR,
|
||||
additional_config VARCHAR,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS online_time (
|
||||
id INTEGER NOT NULL,
|
||||
timestamp DATETIME,
|
||||
duration_minutes INTEGER NOT NULL,
|
||||
start_timestamp DATETIME,
|
||||
end_timestamp DATETIME,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS person_info (
|
||||
id INTEGER NOT NULL,
|
||||
is_known BOOLEAN NOT NULL,
|
||||
person_id VARCHAR(255) NOT NULL,
|
||||
person_name VARCHAR(255),
|
||||
name_reason VARCHAR,
|
||||
platform VARCHAR(100) NOT NULL,
|
||||
user_id VARCHAR(255) NOT NULL,
|
||||
user_nickname VARCHAR(255) NOT NULL,
|
||||
group_cardname VARCHAR,
|
||||
memory_points VARCHAR,
|
||||
know_counts INTEGER NOT NULL,
|
||||
first_known_time DATETIME,
|
||||
last_known_time DATETIME,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS thinking_questions (
|
||||
id INTEGER NOT NULL,
|
||||
question VARCHAR NOT NULL,
|
||||
context VARCHAR,
|
||||
found_answer BOOLEAN NOT NULL,
|
||||
answer VARCHAR,
|
||||
thinking_steps VARCHAR,
|
||||
created_timestamp DATETIME,
|
||||
updated_timestamp DATETIME,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS tool_records (
|
||||
id INTEGER NOT NULL,
|
||||
tool_id VARCHAR(255) NOT NULL,
|
||||
timestamp DATETIME,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
tool_name VARCHAR(255) NOT NULL,
|
||||
tool_reasoning VARCHAR,
|
||||
tool_data VARCHAR,
|
||||
tool_builtin_prompt VARCHAR,
|
||||
tool_display_prompt VARCHAR,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
""",
|
||||
)
|
||||
|
||||
_V2_INDEX_STATEMENTS = (
|
||||
"CREATE INDEX IF NOT EXISTS ix_action_records_action_id ON action_records (action_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_action_records_action_name ON action_records (action_name)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_action_records_session_id ON action_records (session_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_action_records_timestamp ON action_records (timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_binary_data_data_hash ON binary_data (data_hash)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_history_end_timestamp ON chat_history (end_timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_history_session_id ON chat_history (session_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_history_start_timestamp ON chat_history (start_timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_created_timestamp ON chat_sessions (created_timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_group_id ON chat_sessions (group_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_last_active_timestamp ON chat_sessions (last_active_timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_platform ON chat_sessions (platform)",
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS ix_chat_sessions_session_id ON chat_sessions (session_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_chat_sessions_user_id ON chat_sessions (user_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_command_records_command_name ON command_records (command_name)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_command_records_session_id ON command_records (session_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_command_records_timestamp ON command_records (timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_expressions_last_active_time ON expressions (last_active_time)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_expressions_situation ON expressions (situation)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_expressions_style ON expressions (style)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_images_image_hash ON images (image_hash)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_images_record_time ON images (record_time)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_jargons_content ON jargons (content)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_llm_usage_model_api_provider_name ON llm_usage (model_api_provider_name)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_llm_usage_model_assign_name ON llm_usage (model_assign_name)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_llm_usage_model_name ON llm_usage (model_name)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_llm_usage_timestamp ON llm_usage (timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_category_id ON mai_knowledge (category_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_created_at ON mai_knowledge (created_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_knowledge_id ON mai_knowledge (knowledge_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_knowledge_normalized_content ON mai_knowledge (normalized_content)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_messages_group_id ON mai_messages (group_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_messages_message_id ON mai_messages (message_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_messages_platform ON mai_messages (platform)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_messages_session_id ON mai_messages (session_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_messages_user_id ON mai_messages (user_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_mai_messages_user_nickname ON mai_messages (user_nickname)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_online_time_timestamp ON online_time (timestamp)",
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS ix_person_info_person_id ON person_info (person_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_person_info_platform ON person_info (platform)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_person_info_user_id ON person_info (user_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_person_info_user_nickname ON person_info (user_nickname)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_thinking_questions_created_timestamp ON thinking_questions (created_timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_thinking_questions_updated_timestamp ON thinking_questions (updated_timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_tool_records_session_id ON tool_records (session_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_tool_records_timestamp ON tool_records (timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_tool_records_tool_id ON tool_records (tool_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_tool_records_tool_name ON tool_records (tool_name)",
|
||||
)
|
||||
|
||||
|
||||
def create_frozen_v2_schema(connection: Connection) -> None:
|
||||
"""创建冻结的 v2 schema。
|
||||
|
||||
Args:
|
||||
connection: 当前数据库连接。
|
||||
"""
|
||||
|
||||
for statement in _V2_TABLE_STATEMENTS:
|
||||
connection.exec_driver_sql(statement)
|
||||
|
||||
for statement in _V2_INDEX_STATEMENTS:
|
||||
connection.exec_driver_sql(statement)
|
||||
@@ -1,4 +1,4 @@
|
||||
"""旧版 ``0.x`` 数据库升级到最新 schema 的迁移逻辑。"""
|
||||
"""旧版 ``0.x`` 数据库升级到 v2 schema 的迁移逻辑。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -7,15 +7,16 @@ from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, cast
|
||||
|
||||
import json
|
||||
|
||||
import msgpack
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Connection
|
||||
|
||||
import json
|
||||
import msgpack
|
||||
|
||||
from src.common.logger import get_logger
|
||||
|
||||
from .exceptions import DatabaseMigrationExecutionError
|
||||
from .frozen_v2_schema import create_frozen_v2_schema
|
||||
from .models import DatabaseSchemaSnapshot, MigrationExecutionContext
|
||||
from .schema import SQLiteSchemaInspector
|
||||
|
||||
@@ -52,19 +53,15 @@ class LegacyTableData:
|
||||
|
||||
|
||||
def migrate_legacy_v1_to_v2(context: MigrationExecutionContext) -> None:
|
||||
"""执行旧版 ``0.x`` 数据库到最新 schema 的迁移。
|
||||
"""执行旧版 ``0.x`` 数据库到 v2 schema 的迁移。
|
||||
|
||||
Args:
|
||||
context: 当前迁移步骤执行上下文。
|
||||
"""
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
import src.common.database.database_model # noqa: F401
|
||||
|
||||
schema_inspector = SQLiteSchemaInspector()
|
||||
snapshot = schema_inspector.inspect(context.connection)
|
||||
_rename_legacy_v1_tables(context.connection, snapshot)
|
||||
SQLModel.metadata.create_all(context.connection)
|
||||
create_frozen_v2_schema(context.connection)
|
||||
|
||||
table_migration_jobs: List[Tuple[str, Callable[[MigrationExecutionContext], int]]] = [
|
||||
("chat_sessions", _migrate_chat_sessions),
|
||||
@@ -794,8 +791,6 @@ def _migrate_images(context: MigrationExecutionContext) -> int:
|
||||
if full_path and dedupe_key not in existing_keys:
|
||||
migrated_description = _normalize_required_text(row.get("description"))
|
||||
migrated_emotion = _normalize_optional_text(row.get("emotion"))
|
||||
if not migrated_description and migrated_emotion:
|
||||
migrated_description = migrated_emotion
|
||||
connection.execute(
|
||||
insert_sql,
|
||||
{
|
||||
@@ -803,7 +798,7 @@ def _migrate_images(context: MigrationExecutionContext) -> int:
|
||||
"description": migrated_description,
|
||||
"full_path": full_path,
|
||||
"image_type": "EMOJI",
|
||||
"emotion": None,
|
||||
"emotion": migrated_emotion,
|
||||
"query_count": _normalize_int(row.get("query_count"), default=0),
|
||||
"is_registered": _normalize_bool(row.get("is_registered"), default=False),
|
||||
"is_banned": _normalize_bool(row.get("is_banned"), default=False),
|
||||
|
||||
269
src/common/database/migrations/v2_to_v3.py
Normal file
269
src/common/database/migrations/v2_to_v3.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""v2 schema 升级到 v3 的迁移逻辑。"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Connection
|
||||
|
||||
from src.common.logger import get_logger
|
||||
|
||||
from .exceptions import DatabaseMigrationExecutionError
|
||||
from .models import MigrationExecutionContext
|
||||
from .schema import SQLiteSchemaInspector
|
||||
|
||||
logger = get_logger("database_migration")
|
||||
|
||||
_V2_IMAGES_BACKUP_TABLE = "__v2_images_backup"
|
||||
_V3_IMAGES_CREATE_SQL = """
|
||||
CREATE TABLE images (
|
||||
id INTEGER NOT NULL,
|
||||
image_hash VARCHAR(255) NOT NULL,
|
||||
description VARCHAR NOT NULL,
|
||||
full_path VARCHAR(1024) NOT NULL,
|
||||
image_type VARCHAR(5),
|
||||
query_count INTEGER NOT NULL,
|
||||
is_registered BOOLEAN NOT NULL,
|
||||
is_banned BOOLEAN NOT NULL,
|
||||
no_file_flag BOOLEAN NOT NULL,
|
||||
record_time DATETIME,
|
||||
register_time DATETIME,
|
||||
last_used_time DATETIME,
|
||||
vlm_processed BOOLEAN NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
)
|
||||
"""
|
||||
_V3_IMAGES_INDEX_STATEMENTS = (
|
||||
"CREATE INDEX ix_images_image_hash ON images (image_hash)",
|
||||
"CREATE INDEX ix_images_record_time ON images (record_time)",
|
||||
)
|
||||
|
||||
|
||||
def migrate_v2_to_v3(context: MigrationExecutionContext) -> None:
|
||||
"""执行 v2 到 v3 的 schema 迁移。
|
||||
|
||||
Args:
|
||||
context: 当前迁移步骤执行上下文。
|
||||
"""
|
||||
|
||||
connection = context.connection
|
||||
total_records = (
|
||||
_count_table_rows(connection, "action_records")
|
||||
+ _count_table_rows(connection, "thinking_questions")
|
||||
+ _count_table_rows(connection, "images")
|
||||
)
|
||||
context.start_progress(
|
||||
total_tables=3,
|
||||
total_records=total_records,
|
||||
description="v2 -> v3 迁移进度",
|
||||
table_unit_name="表",
|
||||
record_unit_name="记录",
|
||||
)
|
||||
|
||||
migrated_tool_records = _migrate_action_records_to_tool_records(connection)
|
||||
action_record_count = _count_table_rows(connection, "action_records")
|
||||
_drop_table_if_exists(connection, "action_records")
|
||||
context.advance_progress(
|
||||
records=action_record_count,
|
||||
completed_tables=1,
|
||||
item_name="action_records",
|
||||
)
|
||||
|
||||
thinking_question_count = _count_table_rows(connection, "thinking_questions")
|
||||
_drop_table_if_exists(connection, "thinking_questions")
|
||||
context.advance_progress(
|
||||
records=thinking_question_count,
|
||||
completed_tables=1,
|
||||
item_name="thinking_questions",
|
||||
)
|
||||
|
||||
migrated_image_rows = _migrate_images_table_to_v3(connection)
|
||||
context.advance_progress(
|
||||
records=migrated_image_rows,
|
||||
completed_tables=1,
|
||||
item_name="images",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"v2 -> v3 数据库迁移完成: "
|
||||
f"tool_records补迁移={migrated_tool_records},"
|
||||
f"images重建={migrated_image_rows}"
|
||||
)
|
||||
|
||||
|
||||
def _count_table_rows(connection: Connection, table_name: str) -> int:
|
||||
"""统计表记录数,不存在时返回 0。"""
|
||||
|
||||
schema_inspector = SQLiteSchemaInspector()
|
||||
if not schema_inspector.table_exists(connection, table_name):
|
||||
return 0
|
||||
row = connection.execute(text(f'SELECT COUNT(*) FROM "{table_name}"')).first()
|
||||
return int(row[0]) if row else 0
|
||||
|
||||
|
||||
def _drop_table_if_exists(connection: Connection, table_name: str) -> None:
|
||||
"""删除指定表,不存在时静默跳过。"""
|
||||
|
||||
connection.exec_driver_sql(f'DROP TABLE IF EXISTS "{table_name}"')
|
||||
|
||||
|
||||
def _migrate_action_records_to_tool_records(connection: Connection) -> int:
|
||||
"""把 v2 中残留的 ``action_records`` 数据转存到 ``tool_records``。"""
|
||||
|
||||
schema_inspector = SQLiteSchemaInspector()
|
||||
if not schema_inspector.table_exists(connection, "action_records"):
|
||||
return 0
|
||||
|
||||
inserted_count = _count_table_rows(connection, "action_records")
|
||||
connection.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO tool_records (
|
||||
tool_id,
|
||||
timestamp,
|
||||
session_id,
|
||||
tool_name,
|
||||
tool_reasoning,
|
||||
tool_data,
|
||||
tool_builtin_prompt,
|
||||
tool_display_prompt
|
||||
)
|
||||
SELECT
|
||||
action_id,
|
||||
timestamp,
|
||||
session_id,
|
||||
action_name,
|
||||
action_reasoning,
|
||||
action_data,
|
||||
action_builtin_prompt,
|
||||
action_display_prompt
|
||||
FROM action_records
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM tool_records
|
||||
WHERE tool_records.tool_id = action_records.action_id
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
return inserted_count
|
||||
|
||||
|
||||
def _migrate_images_table_to_v3(connection: Connection) -> int:
|
||||
"""重建 ``images`` 表并移除 ``emotion`` 列。"""
|
||||
|
||||
schema_inspector = SQLiteSchemaInspector()
|
||||
if not schema_inspector.table_exists(connection, "images"):
|
||||
return 0
|
||||
if not schema_inspector.get_table_schema(connection, "images").has_column("emotion"):
|
||||
return _count_table_rows(connection, "images")
|
||||
if schema_inspector.table_exists(connection, _V2_IMAGES_BACKUP_TABLE):
|
||||
raise DatabaseMigrationExecutionError(
|
||||
f"检测到残留备份表 {_V2_IMAGES_BACKUP_TABLE},无法安全执行 v2 -> v3 images 迁移。"
|
||||
)
|
||||
|
||||
connection.exec_driver_sql(f'ALTER TABLE "images" RENAME TO "{_V2_IMAGES_BACKUP_TABLE}"')
|
||||
connection.exec_driver_sql(_V3_IMAGES_CREATE_SQL)
|
||||
|
||||
legacy_rows = connection.execute(
|
||||
text(f'SELECT * FROM "{_V2_IMAGES_BACKUP_TABLE}" ORDER BY id')
|
||||
).mappings().all()
|
||||
insert_sql = text(
|
||||
"""
|
||||
INSERT INTO images (
|
||||
id,
|
||||
image_hash,
|
||||
description,
|
||||
full_path,
|
||||
image_type,
|
||||
query_count,
|
||||
is_registered,
|
||||
is_banned,
|
||||
no_file_flag,
|
||||
record_time,
|
||||
register_time,
|
||||
last_used_time,
|
||||
vlm_processed
|
||||
) VALUES (
|
||||
:id,
|
||||
:image_hash,
|
||||
:description,
|
||||
:full_path,
|
||||
:image_type,
|
||||
:query_count,
|
||||
:is_registered,
|
||||
:is_banned,
|
||||
:no_file_flag,
|
||||
:record_time,
|
||||
:register_time,
|
||||
:last_used_time,
|
||||
:vlm_processed
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
for row in legacy_rows:
|
||||
payload: Dict[str, Any] = {
|
||||
"id": row.get("id"),
|
||||
"image_hash": str(row.get("image_hash") or "").strip(),
|
||||
"description": _migrate_v3_emoji_description(row),
|
||||
"full_path": str(row.get("full_path") or "").strip(),
|
||||
"image_type": row.get("image_type"),
|
||||
"query_count": int(row.get("query_count") or 0),
|
||||
"is_registered": bool(row.get("is_registered")),
|
||||
"is_banned": bool(row.get("is_banned")),
|
||||
"no_file_flag": bool(row.get("no_file_flag")),
|
||||
"record_time": row.get("record_time"),
|
||||
"register_time": row.get("register_time"),
|
||||
"last_used_time": row.get("last_used_time"),
|
||||
"vlm_processed": bool(row.get("vlm_processed")),
|
||||
}
|
||||
connection.execute(insert_sql, payload)
|
||||
|
||||
connection.exec_driver_sql(f'DROP TABLE "{_V2_IMAGES_BACKUP_TABLE}"')
|
||||
for statement in _V3_IMAGES_INDEX_STATEMENTS:
|
||||
connection.exec_driver_sql(statement)
|
||||
return len(legacy_rows)
|
||||
|
||||
|
||||
def _migrate_v3_emoji_description(row: Dict[str, Any]) -> str:
|
||||
"""为 v3 统一 emoji 描述字段语义。
|
||||
|
||||
v3 中 `description` 对 emoji 统一承担“标签列表”的职责,因此迁移时:
|
||||
1. 若旧 `emotion` 非空,优先将其规范化后写入 `description`;
|
||||
2. 否则保留并规范化当前 `description`;
|
||||
3. 非 emoji 图片保持原描述不变。
|
||||
"""
|
||||
|
||||
image_type = str(row.get("image_type") or "").strip().upper()
|
||||
current_description = str(row.get("description") or "").strip()
|
||||
current_emotion = str(row.get("emotion") or "").strip()
|
||||
if image_type != "EMOJI":
|
||||
return current_description
|
||||
|
||||
normalized_tags = _normalize_emoji_tag_text(current_emotion or current_description)
|
||||
if normalized_tags:
|
||||
return ",".join(normalized_tags)
|
||||
return current_description
|
||||
|
||||
|
||||
def _normalize_emoji_tag_text(raw_value: Any) -> List[str]:
|
||||
"""将 emoji 标签文本转换为去重后的标签列表。"""
|
||||
|
||||
normalized_text = str(raw_value or "").strip()
|
||||
if not normalized_text:
|
||||
return []
|
||||
|
||||
separators = [",", ",", "、", ";", ";", "\n", "\r", "\t"]
|
||||
for separator in separators[1:]:
|
||||
normalized_text = normalized_text.replace(separator, separators[0])
|
||||
|
||||
deduped_tags: List[str] = []
|
||||
seen_tags: set[str] = set()
|
||||
for part in normalized_text.split(separators[0]):
|
||||
normalized_part = part.strip()
|
||||
lowered_part = normalized_part.lower()
|
||||
if not normalized_part or lowered_part in seen_tags:
|
||||
continue
|
||||
seen_tags.add(lowered_part)
|
||||
deduped_tags.append(normalized_part)
|
||||
return deduped_tags
|
||||
Reference in New Issue
Block a user