chore(A_memorix): 回退实现整理以恢复本地主导基线

This commit is contained in:
A-Dawn
2026-04-21 14:06:46 +08:00
parent 54d3f156d4
commit c6e2c6e003
25 changed files with 39 additions and 22 deletions

View File

@@ -23,7 +23,10 @@ from src.common.logger import get_logger
from .presets import (
EmbeddingModelConfig,
get_custom_config,
validate_config_compatibility,
are_models_compatible,
)
from ..utils.quantization import QuantizationType
logger = get_logger("A_Memorix.EmbeddingManager")

View File

@@ -3,7 +3,7 @@
"""
from dataclasses import dataclass
from typing import Optional, Union
from typing import Optional, Dict, Any, Union
from pathlib import Path

View File

@@ -7,7 +7,7 @@
import asyncio
import re
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any, Tuple
from typing import Optional, List, Dict, Any, Tuple, Union
from enum import Enum
import numpy as np
@@ -320,7 +320,7 @@ class DualPathRetriever:
# 调试模式:打印结果原文
if self.config.debug:
logger.info("[DEBUG] 检索结果内容原文:")
logger.info(f"[DEBUG] 检索结果内容原文:")
for i, res in enumerate(results):
logger.info(f" {i+1}. [{res.result_type}] (Score: {res.score:.4f}) {res.content}")

View File

@@ -4,8 +4,9 @@ Personalized PageRank实现
提供个性化的图节点排序功能。
"""
from typing import Dict, List, Optional, Tuple, Any
from typing import Dict, List, Optional, Tuple, Union, Any
from dataclasses import dataclass
import numpy as np
from src.common.logger import get_logger
from ..storage import GraphStore
@@ -48,7 +49,7 @@ class PageRankConfig:
raise ValueError(f"min_iterations必须大于等于0: {self.min_iterations}")
if self.min_iterations >= self.max_iter:
raise ValueError("min_iterations必须小于max_iter")
raise ValueError(f"min_iterations必须小于max_iter")
class PersonalizedPageRank:

View File

@@ -56,7 +56,7 @@ class ThresholdConfig:
raise ValueError(f"max_threshold必须在[0, 1]之间: {self.max_threshold}")
if self.min_threshold >= self.max_threshold:
raise ValueError("min_threshold必须小于max_threshold")
raise ValueError(f"min_threshold必须小于max_threshold")
if not 0 <= self.percentile <= 100:
raise ValueError(f"percentile必须在[0, 100]之间: {self.percentile}")

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
from pathlib import Path
from typing import Any, Callable, Coroutine, cast
from src.common.logger import get_logger

View File

@@ -4,6 +4,7 @@ import asyncio
import json
import pickle
import time
import uuid
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
@@ -18,7 +19,7 @@ from src.services.llm_service import LLMServiceClient
from ...paths import default_data_dir, resolve_repo_path
from ..embedding import create_embedding_api_adapter
from ..retrieval import RetrievalResult, SparseBM25Config, SparseBM25Index
from ..retrieval import RetrievalResult, SparseBM25Config, SparseBM25Index, TemporalQueryOptions
from ..storage import GraphStore, MetadataStore, QuantizationType, SparseMatrixFormat, VectorStore
from ..utils.aggregate_query_service import AggregateQueryService
from ..utils.episode_retrieval_service import EpisodeRetrievalService

View File

@@ -9,6 +9,7 @@ from enum import Enum
from pathlib import Path
from typing import Optional, Union, Tuple, List, Dict, Set, Any
from collections import defaultdict
import threading
import asyncio
import numpy as np
@@ -41,6 +42,7 @@ except ImportError:
import contextlib
from src.common.logger import get_logger
from ..utils.hash import compute_hash
from ..utils.io import atomic_write
logger = get_logger("A_Memorix.GraphStore")

View File

@@ -4,6 +4,7 @@
基于Faiss的高效向量存储与检索支持SQ8量化、Append-Only磁盘存储和内存映射。
"""
import os
import pickle
import hashlib
import shutil
@@ -190,7 +191,7 @@ class VectorStore:
self._update_reservoir(batch_vecs)
# 这里的 TRAIN_SIZE 取默认 10k或者根据当前数据量动态判断
if len(self._reservoir_buffer) >= 10000:
logger.info("训练样本达到上限,开始训练...")
logger.info(f"训练样本达到上限,开始训练...")
self._train_and_replay_unlocked()
self._total_added += len(batch_ids)

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import List, Dict, Any
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass, field
from enum import Enum
import hashlib

View File

@@ -1,5 +1,5 @@
import re
from typing import List
from typing import List, Dict, Any
from .base import BaseStrategy, ProcessedChunk, KnowledgeType, SourceInfo, ChunkContext
class FactualStrategy(BaseStrategy):

View File

@@ -1,5 +1,5 @@
import re
from typing import List
from typing import List, Dict, Any
from .base import BaseStrategy, ProcessedChunk, KnowledgeType, SourceInfo, ChunkContext
class NarrativeStrategy(BaseStrategy):

View File

@@ -1,4 +1,4 @@
from typing import List
from typing import List, Dict, Any
from .base import BaseStrategy, ProcessedChunk, KnowledgeType, SourceInfo, ChunkContext, ChunkFlags
class QuoteStrategy(BaseStrategy):

View File

@@ -6,6 +6,7 @@
import hashlib
import re
from typing import Union
def compute_hash(text: str, hash_type: str = "sha256") -> str:

View File

@@ -5,6 +5,7 @@ IO Utilities
"""
import os
import shutil
import contextlib
from pathlib import Path
from typing import Union

View File

@@ -4,7 +4,7 @@
实现 Aho-Corasick 算法用于多模式匹配。
"""
from typing import List, Dict, Tuple, Set
from typing import List, Dict, Tuple, Set, Any
from collections import deque

View File

@@ -3,7 +3,7 @@
from __future__ import annotations
import hashlib
from typing import Any, Dict, List, Sequence, Tuple
from typing import Any, Dict, List, Optional, Sequence, Tuple
from ..retrieval.dual_path import RetrievalResult

View File

@@ -234,7 +234,7 @@ async def ensure_runtime_self_check(
sample_text=sample_text,
)
try:
plugin_or_config._runtime_self_check_report = report
setattr(plugin_or_config, "_runtime_self_check_report", report)
except Exception:
pass
return report

View File

@@ -287,7 +287,7 @@ class SearchExecutionService:
async def _executor() -> Dict[str, Any]:
original_ppr = bool(getattr(retriever.config, "enable_ppr", True))
retriever.config.enable_ppr = bool(request.enable_ppr)
setattr(retriever.config, "enable_ppr", bool(request.enable_ppr))
started_at = time.time()
try:
retrieved = await retriever.retrieve(
@@ -380,7 +380,7 @@ class SearchExecutionService:
elapsed_ms = (time.time() - started_at) * 1000.0
return {"results": retrieved, "elapsed_ms": elapsed_ms}
finally:
retriever.config.enable_ppr = original_ppr
setattr(retriever.config, "enable_ppr", original_ppr)
dedup_hit = False
try:

View File

@@ -5,6 +5,7 @@
导入到 A_memorix 的存储组件中。
"""
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import json

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Any, Dict
from typing import Any, Dict, Optional
_runtime_kernel: Any = None

View File

@@ -15,7 +15,7 @@ for _path in (SRC_ROOT, PROJECT_ROOT, PLUGIN_ROOT):
if _path_str not in sys.path:
sys.path.insert(0, _path_str)
from A_memorix.paths import config_path, default_data_dir
from A_memorix.paths import config_path, default_data_dir, resolve_repo_path
DEFAULT_CONFIG_PATH = config_path()
DEFAULT_DATA_DIR = default_data_dir()

View File

@@ -10,12 +10,14 @@ LPMM 到 A_memorix 存储转换器
"""
import sys
import os
import json
import argparse
import asyncio
import pickle
import logging
from pathlib import Path
from typing import Dict, Any, Tuple
from typing import Dict, Any, List, Tuple
import numpy as np
import tomlkit

View File

@@ -12,14 +12,17 @@
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
from rich.console import Console
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import argparse
import asyncio
import hashlib
import json
import os
import random
import sys
import time
import tomlkit

View File

@@ -17,7 +17,7 @@ import sqlite3
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
import tomlkit