添加 A_Memorix 插件 v2.0.0(包含运行时与文档)
引入 A_Memorix 插件 v2.0.0:新增大量运行时组件、存储/模式更新、检索能力提升、管理工具、导入/调优工作流以及相关文档。关键新增内容包括:lifecycle_orchestrator、SDKMemoryKernel/运行时初始化器、新的存储层与 metadata_store 变更(SCHEMA_VERSION v8)、检索增强(双路径检索、图关系召回、稀疏 BM25),以及多种工具服务(episode/person_profile/relation/segmentation/tuning/search execution)。同时新增 Web 导入/摘要导入器及大量维护脚本。还更新了插件清单、embedding API 适配器、plugin.py、requirements/pyproject,以及主入口文件,使新插件接入项目。该变更为 2.0.0 版本发布做好准备,实现统一的 SDK Tool 接口并扩展整体运行能力。
This commit is contained in:
@@ -1,46 +1,55 @@
|
||||
"""
|
||||
Hash-based embedding adapter used by the SDK runtime.
|
||||
请求式嵌入 API 适配器。
|
||||
|
||||
The plugin runtime cannot import MaiBot host embedding internals from ``src.chat``
|
||||
or ``src.llm_models``. This adapter keeps A_Memorix self-contained and stable in
|
||||
Runner by generating deterministic dense vectors locally.
|
||||
恢复 v1.0.1 的真实 embedding 请求语义:
|
||||
- 通过宿主模型配置探测/请求 embedding
|
||||
- 支持 dimensions 参数
|
||||
- 支持批量与重试
|
||||
- 不再提供本地 hash fallback
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
import asyncio
|
||||
import time
|
||||
from typing import List, Optional, Union
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import aiohttp
|
||||
import numpy as np
|
||||
import openai
|
||||
|
||||
from src.common.logger import get_logger
|
||||
|
||||
from src.config.config import config_manager
|
||||
from src.config.model_configs import APIProvider, ModelInfo
|
||||
from src.llm_models.exceptions import NetworkConnectionError
|
||||
from src.llm_models.model_client.base_client import client_registry
|
||||
|
||||
logger = get_logger("A_Memorix.EmbeddingAPIAdapter")
|
||||
|
||||
_TOKEN_PATTERN = re.compile(r"[A-Za-z0-9_\u4e00-\u9fff]{1,}")
|
||||
|
||||
|
||||
class EmbeddingAPIAdapter:
|
||||
"""Deterministic local embedding adapter."""
|
||||
"""适配宿主 embedding 请求接口。"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
batch_size: int = 32,
|
||||
max_concurrent: int = 5,
|
||||
default_dimension: int = 256,
|
||||
default_dimension: int = 1024,
|
||||
enable_cache: bool = False,
|
||||
model_name: str = "hash-v1",
|
||||
model_name: str = "auto",
|
||||
retry_config: Optional[dict] = None,
|
||||
) -> None:
|
||||
self.batch_size = max(1, int(batch_size))
|
||||
self.max_concurrent = max(1, int(max_concurrent))
|
||||
self.default_dimension = max(32, int(default_dimension))
|
||||
self.default_dimension = max(1, int(default_dimension))
|
||||
self.enable_cache = bool(enable_cache)
|
||||
self.model_name = str(model_name or "hash-v1")
|
||||
self.model_name = str(model_name or "auto")
|
||||
|
||||
self.retry_config = retry_config or {}
|
||||
self.max_attempts = max(1, int(self.retry_config.get("max_attempts", 5)))
|
||||
self.max_wait_seconds = max(0.1, float(self.retry_config.get("max_wait_seconds", 40)))
|
||||
self.min_wait_seconds = max(0.1, float(self.retry_config.get("min_wait_seconds", 3)))
|
||||
self.backoff_multiplier = max(1.0, float(self.retry_config.get("backoff_multiplier", 3)))
|
||||
|
||||
self._dimension: Optional[int] = None
|
||||
self._dimension_detected = False
|
||||
@@ -49,57 +58,164 @@ class EmbeddingAPIAdapter:
|
||||
self._total_time = 0.0
|
||||
|
||||
logger.info(
|
||||
"EmbeddingAPIAdapter 初始化: model=%s, batch_size=%s, dimension=%s",
|
||||
self.model_name,
|
||||
self.batch_size,
|
||||
self.default_dimension,
|
||||
"EmbeddingAPIAdapter 初始化: "
|
||||
f"batch_size={self.batch_size}, "
|
||||
f"max_concurrent={self.max_concurrent}, "
|
||||
f"default_dim={self.default_dimension}, "
|
||||
f"model={self.model_name}"
|
||||
)
|
||||
|
||||
def _get_current_model_config(self):
|
||||
return config_manager.get_model_config()
|
||||
|
||||
@staticmethod
|
||||
def _find_model_info(model_name: str) -> ModelInfo:
|
||||
model_cfg = config_manager.get_model_config()
|
||||
for item in model_cfg.models:
|
||||
if item.name == model_name:
|
||||
return item
|
||||
raise ValueError(f"未找到 embedding 模型: {model_name}")
|
||||
|
||||
@staticmethod
|
||||
def _find_provider(provider_name: str) -> APIProvider:
|
||||
model_cfg = config_manager.get_model_config()
|
||||
for item in model_cfg.api_providers:
|
||||
if item.name == provider_name:
|
||||
return item
|
||||
raise ValueError(f"未找到 embedding provider: {provider_name}")
|
||||
|
||||
def _resolve_candidate_model_names(self) -> List[str]:
|
||||
task_config = self._get_current_model_config().model_task_config.embedding
|
||||
configured = list(getattr(task_config, "model_list", []) or [])
|
||||
if self.model_name and self.model_name != "auto":
|
||||
return [self.model_name, *[name for name in configured if name != self.model_name]]
|
||||
return configured
|
||||
|
||||
@staticmethod
|
||||
def _validate_embedding_vector(embedding: Any, *, source: str) -> np.ndarray:
|
||||
array = np.asarray(embedding, dtype=np.float32)
|
||||
if array.ndim != 1:
|
||||
raise RuntimeError(f"{source} 返回的 embedding 维度非法: ndim={array.ndim}")
|
||||
if array.size <= 0:
|
||||
raise RuntimeError(f"{source} 返回了空 embedding")
|
||||
if not np.all(np.isfinite(array)):
|
||||
raise RuntimeError(f"{source} 返回了非有限 embedding 值")
|
||||
return array
|
||||
|
||||
async def _request_with_retry(self, client, model_info, text: str, extra_params: dict):
|
||||
retriable_exceptions = (
|
||||
openai.APIConnectionError,
|
||||
openai.APITimeoutError,
|
||||
aiohttp.ClientError,
|
||||
asyncio.TimeoutError,
|
||||
NetworkConnectionError,
|
||||
)
|
||||
|
||||
last_exc: Optional[BaseException] = None
|
||||
for attempt in range(1, self.max_attempts + 1):
|
||||
try:
|
||||
return await client.get_embedding(
|
||||
model_info=model_info,
|
||||
embedding_input=text,
|
||||
extra_params=extra_params,
|
||||
)
|
||||
except retriable_exceptions as exc:
|
||||
last_exc = exc
|
||||
if attempt >= self.max_attempts:
|
||||
raise
|
||||
wait_seconds = min(
|
||||
self.max_wait_seconds,
|
||||
self.min_wait_seconds * (self.backoff_multiplier ** (attempt - 1)),
|
||||
)
|
||||
logger.warning(
|
||||
"Embedding 请求失败,重试 "
|
||||
f"{attempt}/{max(1, self.max_attempts - 1)},"
|
||||
f"{wait_seconds:.1f}s 后重试: {exc}"
|
||||
)
|
||||
await asyncio.sleep(wait_seconds)
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
if last_exc is not None:
|
||||
raise last_exc
|
||||
raise RuntimeError("Embedding 请求失败:未知错误")
|
||||
|
||||
async def _get_embedding_direct(self, text: str, dimensions: Optional[int] = None) -> Optional[List[float]]:
|
||||
candidate_names = self._resolve_candidate_model_names()
|
||||
if not candidate_names:
|
||||
raise RuntimeError("embedding 任务未配置模型")
|
||||
|
||||
last_exc: Optional[BaseException] = None
|
||||
for candidate_name in candidate_names:
|
||||
try:
|
||||
model_info = self._find_model_info(candidate_name)
|
||||
api_provider = self._find_provider(model_info.api_provider)
|
||||
client = client_registry.get_client_class_instance(api_provider, force_new=True)
|
||||
|
||||
extra_params = dict(getattr(model_info, "extra_params", {}) or {})
|
||||
if dimensions is not None:
|
||||
extra_params["dimensions"] = int(dimensions)
|
||||
|
||||
response = await self._request_with_retry(
|
||||
client=client,
|
||||
model_info=model_info,
|
||||
text=text,
|
||||
extra_params=extra_params,
|
||||
)
|
||||
embedding = getattr(response, "embedding", None)
|
||||
if embedding is None:
|
||||
raise RuntimeError(f"模型 {candidate_name} 未返回 embedding")
|
||||
vector = self._validate_embedding_vector(
|
||||
embedding,
|
||||
source=f"embedding 模型 {candidate_name}",
|
||||
)
|
||||
return vector.tolist()
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
logger.warning(f"embedding 模型 {candidate_name} 请求失败: {exc}")
|
||||
|
||||
if last_exc is not None:
|
||||
logger.error(f"通过直接 Client 获取 Embedding 失败: {last_exc}")
|
||||
return None
|
||||
|
||||
async def _detect_dimension(self) -> int:
|
||||
if self._dimension_detected and self._dimension is not None:
|
||||
return self._dimension
|
||||
|
||||
logger.info("正在检测嵌入模型维度...")
|
||||
try:
|
||||
target_dim = self.default_dimension
|
||||
logger.debug(f"尝试请求指定维度: {target_dim}")
|
||||
test_embedding = await self._get_embedding_direct("test", dimensions=target_dim)
|
||||
if test_embedding and isinstance(test_embedding, list):
|
||||
detected_dim = len(test_embedding)
|
||||
if detected_dim == target_dim:
|
||||
logger.info(f"嵌入维度检测成功 (匹配配置): {detected_dim}")
|
||||
else:
|
||||
logger.warning(
|
||||
f"请求维度 {target_dim} 但模型返回 {detected_dim},将使用模型自然维度"
|
||||
)
|
||||
self._dimension = detected_dim
|
||||
self._dimension_detected = True
|
||||
return detected_dim
|
||||
except Exception as exc:
|
||||
logger.debug(f"带维度参数探测失败: {exc},尝试不带参数探测")
|
||||
|
||||
try:
|
||||
test_embedding = await self._get_embedding_direct("test", dimensions=None)
|
||||
if test_embedding and isinstance(test_embedding, list):
|
||||
detected_dim = len(test_embedding)
|
||||
self._dimension = detected_dim
|
||||
self._dimension_detected = True
|
||||
logger.info(f"嵌入维度检测成功 (自然维度): {detected_dim}")
|
||||
return detected_dim
|
||||
logger.warning(f"嵌入维度检测失败,使用默认值: {self.default_dimension}")
|
||||
except Exception as exc:
|
||||
logger.error(f"嵌入维度检测异常: {exc},使用默认值: {self.default_dimension}")
|
||||
|
||||
self._dimension = self.default_dimension
|
||||
self._dimension_detected = True
|
||||
return self._dimension
|
||||
|
||||
@staticmethod
|
||||
def _tokenize(text: str) -> List[str]:
|
||||
clean = str(text or "").strip().lower()
|
||||
if not clean:
|
||||
return []
|
||||
return _TOKEN_PATTERN.findall(clean)
|
||||
|
||||
@staticmethod
|
||||
def _feature_weight(token: str) -> float:
|
||||
digest = hashlib.sha256(token.encode("utf-8")).digest()
|
||||
return 1.0 + (digest[10] / 255.0) * 0.5
|
||||
|
||||
def _encode_single(self, text: str, dimension: int) -> np.ndarray:
|
||||
vector = np.zeros(dimension, dtype=np.float32)
|
||||
content = str(text or "").strip()
|
||||
tokens = self._tokenize(content)
|
||||
if not tokens and content:
|
||||
tokens = [content.lower()]
|
||||
if not tokens:
|
||||
vector[0] = 1.0
|
||||
return vector
|
||||
|
||||
for token in tokens:
|
||||
digest = hashlib.sha256(token.encode("utf-8")).digest()
|
||||
bucket = int.from_bytes(digest[:8], byteorder="big", signed=False) % dimension
|
||||
sign = 1.0 if digest[8] % 2 == 0 else -1.0
|
||||
vector[bucket] += sign * self._feature_weight(token)
|
||||
|
||||
second_bucket = int.from_bytes(digest[12:20], byteorder="big", signed=False) % dimension
|
||||
if second_bucket != bucket:
|
||||
vector[second_bucket] += (sign * 0.35)
|
||||
|
||||
norm = float(np.linalg.norm(vector))
|
||||
if norm > 1e-8:
|
||||
vector /= norm
|
||||
else:
|
||||
vector[0] = 1.0
|
||||
return vector
|
||||
return self.default_dimension
|
||||
|
||||
async def encode(
|
||||
self,
|
||||
@@ -109,59 +225,137 @@ class EmbeddingAPIAdapter:
|
||||
normalize: bool = True,
|
||||
dimensions: Optional[int] = None,
|
||||
) -> np.ndarray:
|
||||
_ = batch_size
|
||||
_ = show_progress
|
||||
_ = normalize
|
||||
del show_progress
|
||||
del normalize
|
||||
|
||||
started_at = time.time()
|
||||
target_dimension = max(32, int(dimensions or await self._detect_dimension()))
|
||||
start_time = time.time()
|
||||
target_dim = int(dimensions) if dimensions is not None else int(await self._detect_dimension())
|
||||
|
||||
if isinstance(texts, str):
|
||||
single_input = True
|
||||
normalized_texts = [texts]
|
||||
single_input = True
|
||||
else:
|
||||
single_input = False
|
||||
normalized_texts = list(texts or [])
|
||||
single_input = False
|
||||
|
||||
if not normalized_texts:
|
||||
empty = np.zeros((0, target_dimension), dtype=np.float32)
|
||||
empty = np.zeros((0, target_dim), dtype=np.float32)
|
||||
return empty[0] if single_input else empty
|
||||
|
||||
if batch_size is None:
|
||||
batch_size = self.batch_size
|
||||
|
||||
try:
|
||||
matrix = np.vstack([self._encode_single(item, target_dimension) for item in normalized_texts])
|
||||
embeddings = await self._encode_batch_internal(
|
||||
normalized_texts,
|
||||
batch_size=max(1, int(batch_size)),
|
||||
dimensions=dimensions,
|
||||
)
|
||||
if embeddings.ndim == 1:
|
||||
embeddings = embeddings.reshape(1, -1)
|
||||
self._total_encoded += len(normalized_texts)
|
||||
self._total_time += time.time() - started_at
|
||||
except Exception:
|
||||
elapsed = time.time() - start_time
|
||||
self._total_time += elapsed
|
||||
logger.debug(
|
||||
"编码完成: "
|
||||
f"{len(normalized_texts)} 个文本, "
|
||||
f"耗时 {elapsed:.2f}s, "
|
||||
f"平均 {elapsed / max(1, len(normalized_texts)):.3f}s/文本"
|
||||
)
|
||||
return embeddings[0] if single_input else embeddings
|
||||
except Exception as exc:
|
||||
self._total_errors += 1
|
||||
raise
|
||||
logger.error(f"编码失败: {exc}")
|
||||
raise RuntimeError(f"embedding encode failed: {exc}") from exc
|
||||
|
||||
return matrix[0] if single_input else matrix
|
||||
async def _encode_batch_internal(
|
||||
self,
|
||||
texts: List[str],
|
||||
batch_size: int,
|
||||
dimensions: Optional[int] = None,
|
||||
) -> np.ndarray:
|
||||
all_embeddings: List[np.ndarray] = []
|
||||
for offset in range(0, len(texts), batch_size):
|
||||
batch = texts[offset : offset + batch_size]
|
||||
semaphore = asyncio.Semaphore(self.max_concurrent)
|
||||
|
||||
def get_statistics(self) -> dict:
|
||||
avg_time = self._total_time / self._total_encoded if self._total_encoded else 0.0
|
||||
async def encode_with_semaphore(text: str, index: int):
|
||||
async with semaphore:
|
||||
embedding = await self._get_embedding_direct(text, dimensions=dimensions)
|
||||
if embedding is None:
|
||||
raise RuntimeError(f"文本 {index} 编码失败:embedding 返回为空")
|
||||
vector = self._validate_embedding_vector(
|
||||
embedding,
|
||||
source=f"文本 {index}",
|
||||
)
|
||||
return index, vector
|
||||
|
||||
tasks = [
|
||||
encode_with_semaphore(text, offset + index)
|
||||
for index, text in enumerate(batch)
|
||||
]
|
||||
results = await asyncio.gather(*tasks)
|
||||
results.sort(key=lambda item: item[0])
|
||||
all_embeddings.extend(emb for _, emb in results)
|
||||
|
||||
return np.array(all_embeddings, dtype=np.float32)
|
||||
|
||||
async def encode_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
batch_size: Optional[int] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
show_progress: bool = False,
|
||||
dimensions: Optional[int] = None,
|
||||
) -> np.ndarray:
|
||||
del show_progress
|
||||
if num_workers is not None:
|
||||
previous = self.max_concurrent
|
||||
self.max_concurrent = max(1, int(num_workers))
|
||||
try:
|
||||
return await self.encode(texts, batch_size=batch_size, dimensions=dimensions)
|
||||
finally:
|
||||
self.max_concurrent = previous
|
||||
return await self.encode(texts, batch_size=batch_size, dimensions=dimensions)
|
||||
|
||||
def get_embedding_dimension(self) -> int:
|
||||
if self._dimension is not None:
|
||||
return self._dimension
|
||||
logger.warning(f"维度尚未检测,返回默认值: {self.default_dimension}")
|
||||
return self.default_dimension
|
||||
|
||||
def get_model_info(self) -> dict:
|
||||
return {
|
||||
"model_name": self.model_name,
|
||||
"dimension": self._dimension or self.default_dimension,
|
||||
"dimension_detected": self._dimension_detected,
|
||||
"batch_size": self.batch_size,
|
||||
"max_concurrent": self.max_concurrent,
|
||||
"total_encoded": self._total_encoded,
|
||||
"total_errors": self._total_errors,
|
||||
"total_time": self._total_time,
|
||||
"avg_time_per_text": avg_time,
|
||||
"avg_time_per_text": self._total_time / self._total_encoded if self._total_encoded else 0.0,
|
||||
}
|
||||
|
||||
def get_statistics(self) -> dict:
|
||||
return self.get_model_info()
|
||||
|
||||
@property
|
||||
def is_model_loaded(self) -> bool:
|
||||
return True
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"EmbeddingAPIAdapter(model_name={self.model_name}, "
|
||||
f"dimension={self._dimension or self.default_dimension}, "
|
||||
f"total_encoded={self._total_encoded})"
|
||||
f"EmbeddingAPIAdapter(dim={self._dimension or self.default_dimension}, "
|
||||
f"detected={self._dimension_detected}, encoded={self._total_encoded})"
|
||||
)
|
||||
|
||||
|
||||
def create_embedding_api_adapter(
|
||||
batch_size: int = 32,
|
||||
max_concurrent: int = 5,
|
||||
default_dimension: int = 256,
|
||||
default_dimension: int = 1024,
|
||||
enable_cache: bool = False,
|
||||
model_name: str = "hash-v1",
|
||||
model_name: str = "auto",
|
||||
retry_config: Optional[dict] = None,
|
||||
) -> EmbeddingAPIAdapter:
|
||||
return EmbeddingAPIAdapter(
|
||||
|
||||
Reference in New Issue
Block a user