feat:记忆系统重出江湖,移除了即时记忆和定期记忆

This commit is contained in:
SengokuCola
2025-08-27 22:18:22 +08:00
parent 01197cb2b7
commit 6d3e9fd3d4
14 changed files with 481 additions and 486 deletions

View File

@@ -834,3 +834,79 @@ def parse_keywords_string(keywords_input) -> list[str]:
# 如果没有分隔符,返回单个关键词
return [keywords_str] if keywords_str else []
def cut_key_words(concept_name: str) -> list[str]:
"""对概念名称进行jieba分词并过滤掉关键词列表中的关键词"""
concept_name_tokens = list(jieba.cut(concept_name))
# 定义常见连词、停用词与标点
conjunctions = {
"", "", "", "", "以及", "并且", "而且", "", "或者", ""
}
stop_words = {
"", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "而且", "或者", "", "以及"
}
chinese_punctuations = set(",。!?、;:()【】《》“”‘’—…·-——,.!?;:()[]<>'\"/\\")
# 清理空白并初步过滤纯标点
cleaned_tokens = []
for tok in concept_name_tokens:
t = tok.strip()
if not t:
continue
# 去除纯标点
if all(ch in chinese_punctuations for ch in t):
continue
cleaned_tokens.append(t)
# 合并连词两侧的词(仅当两侧都存在且不是标点/停用词时)
merged_tokens = []
i = 0
n = len(cleaned_tokens)
while i < n:
tok = cleaned_tokens[i]
if tok in conjunctions and merged_tokens and i + 1 < n:
left = merged_tokens[-1]
right = cleaned_tokens[i + 1]
# 左右都需要是有效词
if left and right \
and left not in conjunctions and right not in conjunctions \
and left not in stop_words and right not in stop_words \
and not all(ch in chinese_punctuations for ch in left) \
and not all(ch in chinese_punctuations for ch in right):
# 合并为一个新词,并替换掉左侧与跳过右侧
combined = f"{left}{tok}{right}"
merged_tokens[-1] = combined
i += 2
continue
# 常规推进
merged_tokens.append(tok)
i += 1
# 二次过滤:去除停用词、单字符纯标点与无意义项
result_tokens = []
seen = set()
# ban_words = set(getattr(global_config.memory, "memory_ban_words", []) or [])
for tok in merged_tokens:
if tok in conjunctions:
# 独立连词丢弃
continue
if tok in stop_words:
continue
# if tok in ban_words:
# continue
if all(ch in chinese_punctuations for ch in tok):
continue
if tok.strip() == "":
continue
if tok not in seen:
seen.add(tok)
result_tokens.append(tok)
filtered_concept_name_tokens = result_tokens
return filtered_concept_name_tokens