refactor: 优化数据库操作和模型定义,增强表达方式和黑话表的插入逻辑
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
from typing import Optional
|
||||
from sqlalchemy import Column, Float, Enum as SQLEnum, DateTime
|
||||
from sqlmodel import SQLModel, Field, LargeBinary
|
||||
from enum import Enum
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import Column, DateTime, Enum as SQLEnum, Float
|
||||
from sqlmodel import Field, LargeBinary, SQLModel
|
||||
|
||||
|
||||
class ModelUser(str, Enum):
|
||||
@@ -172,8 +173,8 @@ class Expression(SQLModel, table=True):
|
||||
|
||||
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
|
||||
|
||||
situation: str = Field(index=True, max_length=255, primary_key=True) # 情景
|
||||
style: str = Field(index=True, max_length=255, primary_key=True) # 风格
|
||||
situation: str = Field(index=True, max_length=255) # 情景
|
||||
style: str = Field(index=True, max_length=255) # 风格
|
||||
|
||||
# context: str # 上下文
|
||||
# up_content: str
|
||||
@@ -200,7 +201,7 @@ class Jargon(SQLModel, table=True):
|
||||
|
||||
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
|
||||
|
||||
content: str = Field(index=True, max_length=255, primary_key=True) # 黑话内容
|
||||
content: str = Field(index=True, max_length=255) # 黑话内容
|
||||
raw_content: Optional[str] = Field(default=None, nullable=True) # 原始内容,未处理的黑话内容,为List[str]
|
||||
|
||||
meaning: str # 黑话含义
|
||||
|
||||
@@ -329,7 +329,13 @@ class ExpressionLearner:
|
||||
return filtered_expressions
|
||||
|
||||
# ====== DB 操作相关 ======
|
||||
async def _upsert_expression_to_db(self, situation: str, style: str):
|
||||
async def _upsert_expression_to_db(self, situation: str, style: str) -> None:
|
||||
"""将表达方式写入数据库,存在时更新,不存在时新增。
|
||||
|
||||
Args:
|
||||
situation: 表达方式对应的使用情景。
|
||||
style: 表达方式风格。
|
||||
"""
|
||||
expr, similarity = self._find_similar_expression(situation) or (None, 0)
|
||||
if expr:
|
||||
# 根据相似度决定是否使用 LLM 总结
|
||||
@@ -340,7 +346,13 @@ class ExpressionLearner:
|
||||
# 没有找到匹配的记录,创建新记录
|
||||
self._create_expression(situation, style)
|
||||
|
||||
def _create_expression(self, situation: str, style: str):
|
||||
def _create_expression(self, situation: str, style: str) -> None:
|
||||
"""创建新的表达方式记录。
|
||||
|
||||
Args:
|
||||
situation: 表达方式对应的使用情景。
|
||||
style: 表达方式风格。
|
||||
"""
|
||||
content_list = [situation]
|
||||
try:
|
||||
with get_db_session() as db:
|
||||
@@ -353,6 +365,7 @@ class ExpressionLearner:
|
||||
last_active_time=datetime.now(),
|
||||
)
|
||||
db.add(new_expr)
|
||||
db.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"创建表达方式失败: {e}")
|
||||
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
from collections import OrderedDict
|
||||
from json_repair import repair_json
|
||||
from sqlmodel import select
|
||||
from typing import List, Optional, Dict, Callable, TypedDict, Set
|
||||
from typing import Callable, Dict, List, Optional, Set, TypedDict
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import random
|
||||
|
||||
from src.common.logger import get_logger
|
||||
from json_repair import repair_json
|
||||
from sqlmodel import select
|
||||
|
||||
from src.common.data_models.jargon_data_model import MaiJargon
|
||||
from src.common.database.database import get_db_session
|
||||
from src.common.database.database_model import Jargon
|
||||
from src.common.data_models.jargon_data_model import MaiJargon
|
||||
from src.config.config import model_config, global_config
|
||||
from src.common.logger import get_logger
|
||||
from src.config.config import global_config, model_config
|
||||
from src.llm_models.utils_model import LLMRequest
|
||||
from src.prompt.prompt_manager import prompt_manager
|
||||
|
||||
@@ -273,11 +274,12 @@ class JargonMiner:
|
||||
try:
|
||||
with get_db_session() as session:
|
||||
session.add(new_jargon)
|
||||
session.flush()
|
||||
saved += 1
|
||||
self._add_to_cache(content)
|
||||
except Exception as e:
|
||||
logger.error(f"保存新黑话 '{content}' 失败: {e}")
|
||||
continue
|
||||
finally:
|
||||
self._add_to_cache(content)
|
||||
# 固定输出提取的jargon结果,格式化为可读形式(只要有提取结果就输出)
|
||||
if uniq_entries:
|
||||
# 收集所有提取的jargon内容
|
||||
|
||||
Reference in New Issue
Block a user