Ruff fix

2025-11-13 13:24:55 +08:00
parent e78a070fbd
commit 7839acd25d
52 changed files with 1322 additions and 1408 deletions
--- a/src/express/expressor_model/tokenizer.py
+++ b/src/express/expressor_model/tokenizer.py
@@ -3,17 +3,20 @@ from typing import List, Optional, Set

 try:
    import jieba
+
    _HAS_JIEBA = True
 except Exception:
    _HAS_JIEBA = False

 _WORD_RE = re.compile(r"[A-Za-z0-9_]+")
 # 匹配纯符号的正则表达式
-_SYMBOL_RE = re.compile(r'^[^\w\u4e00-\u9fff]+$')
+_SYMBOL_RE = re.compile(r"^[^\w\u4e00-\u9fff]+$")
+

 def simple_en_tokenize(text: str) -> List[str]:
    return _WORD_RE.findall(text.lower())

+
 class Tokenizer:
    def __init__(self, stopwords: Optional[Set[str]] = None, use_jieba: bool = True):
        self.stopwords = stopwords or set()
@@ -28,4 +31,4 @@ class Tokenizer:
        else:
            toks = simple_en_tokenize(text)
        # 过滤掉纯符号和停用词
-        return [t for t in toks if t not in self.stopwords and not _SYMBOL_RE.match(t)]
+        return [t for t in toks if t not in self.stopwords and not _SYMBOL_RE.match(t)]