Files
mai-bot/pytests/A_memorix_test/test_embedding_dimension_control.py
A-Dawn 15d436b3a1 refactor: 将 A_Memorix 重构为主线长期记忆子系统并重建管理界面
- 将 A_Memorix 从旧 submodule / 插件形态迁入主线源码,主体落到 src/A_memorix
- 调整主程序接入方式,使 A_Memorix 作为源码内长期记忆子系统运行
- 回收父项目插件体系中针对 A_Memorix 的特判,减少对 plugin 通用层的侵入
- 将长期记忆配置、运行时、自检、导入、调优等能力收口到 memory 路由与主线服务层
- 重做长期记忆控制台与图谱页面,按 MaiBot 现有 dashboard 风格接入
- 补充实体关系图与证据视图双视图能力,支持查看节点、关系、段落及其证据链路
- 新增长期记忆配置编辑器与 memory-api,支持主线内配置管理
- 补齐删除管理能力:删除预览、混合删除、来源批量删除、删除操作恢复
- 优化删除预览与删除操作详情的前端展示,支持分页、检索,并以实体名/关系内容/段落摘要替代单纯 hash 展示
- 修复图谱与控制台相关前端问题,包括证据视图切换、查询触发时机、删除弹层空值保护等
- 新增或更新 A_Memorix 相关测试、WebUI 路由测试、前端 vitest 测试与辅助验证脚本
- 移除旧 plugins/A_memorix、.gitmodules 及相关历史维护文档
2026-04-03 08:08:24 +08:00

167 lines
5.6 KiB
Python

from __future__ import annotations
from types import SimpleNamespace
import numpy as np
import pytest
from A_memorix.core.embedding import api_adapter as api_adapter_module
from A_memorix.core.embedding.api_adapter import EmbeddingAPIAdapter
from A_memorix.core.utils.runtime_self_check import run_embedding_runtime_self_check
class _FakeEmbeddingClient:
def __init__(self, *, natural_dimension: int = 12) -> None:
self.natural_dimension = int(natural_dimension)
self.requests = []
async def get_embedding(self, request):
self.requests.append(request)
requested_dimension = request.extra_params.get("dimensions")
if requested_dimension is None:
requested_dimension = request.extra_params.get("output_dimensionality")
dimension = int(requested_dimension or self.natural_dimension)
return SimpleNamespace(embedding=[1.0] * dimension)
def _build_adapter(
monkeypatch: pytest.MonkeyPatch,
*,
client_type: str,
configured_dimension: int = 1024,
effective_dimension: int | None = None,
model_extra_params: dict | None = None,
):
adapter = EmbeddingAPIAdapter(default_dimension=configured_dimension)
if effective_dimension is not None:
adapter._dimension = int(effective_dimension)
adapter._dimension_detected = True
fake_client = _FakeEmbeddingClient()
model_info = SimpleNamespace(
name="embedding-model",
api_provider="provider-1",
model_identifier="embedding-model-id",
extra_params=dict(model_extra_params or {}),
)
provider = SimpleNamespace(name="provider-1", client_type=client_type)
monkeypatch.setattr(adapter, "_resolve_candidate_model_names", lambda: ["embedding-model"])
monkeypatch.setattr(adapter, "_find_model_info", lambda model_name: model_info)
monkeypatch.setattr(adapter, "_find_provider", lambda provider_name: provider)
monkeypatch.setattr(
api_adapter_module.client_registry,
"get_client_class_instance",
lambda api_provider, force_new=True: fake_client,
)
return adapter, fake_client
@pytest.mark.asyncio
async def test_encode_uses_canonical_dimension_for_openai_provider(monkeypatch):
adapter, fake_client = _build_adapter(
monkeypatch,
client_type="openai",
configured_dimension=1024,
effective_dimension=1024,
model_extra_params={"task_type": "SEMANTIC_SIMILARITY"},
)
embedding = await adapter.encode("北塔木梯")
request = fake_client.requests[-1]
assert request.extra_params["dimensions"] == 1024
assert "output_dimensionality" not in request.extra_params
assert request.extra_params["task_type"] == "SEMANTIC_SIMILARITY"
assert embedding.shape == (1024,)
@pytest.mark.asyncio
async def test_encode_explicit_dimension_override_wins(monkeypatch):
adapter, fake_client = _build_adapter(
monkeypatch,
client_type="openai",
configured_dimension=1024,
effective_dimension=1024,
)
embedding = await adapter.encode("海潮图", dimensions=256)
request = fake_client.requests[-1]
assert request.extra_params["dimensions"] == 256
assert "output_dimensionality" not in request.extra_params
assert embedding.shape == (256,)
@pytest.mark.asyncio
async def test_encode_maps_dimension_to_gemini_output_dimensionality(monkeypatch):
adapter, fake_client = _build_adapter(
monkeypatch,
client_type="gemini",
configured_dimension=1024,
effective_dimension=768,
)
embedding = await adapter.encode("广播站")
request = fake_client.requests[-1]
assert request.extra_params["output_dimensionality"] == 768
assert "dimensions" not in request.extra_params
assert embedding.shape == (768,)
@pytest.mark.asyncio
async def test_encode_does_not_force_dimension_for_unsupported_provider(monkeypatch):
adapter, fake_client = _build_adapter(
monkeypatch,
client_type="custom",
configured_dimension=1024,
effective_dimension=640,
model_extra_params={
"dimensions": 999,
"output_dimensionality": 888,
"custom_flag": "keep-me",
},
)
embedding = await adapter.encode("蓝漆铁盒")
request = fake_client.requests[-1]
assert "dimensions" not in request.extra_params
assert "output_dimensionality" not in request.extra_params
assert request.extra_params["custom_flag"] == "keep-me"
assert embedding.shape == (fake_client.natural_dimension,)
@pytest.mark.asyncio
async def test_runtime_self_check_reports_requested_dimension_without_explicit_override():
class _FakeEmbeddingManager:
def __init__(self) -> None:
self.detected_dimension = 384
self.encode_calls = []
async def _detect_dimension(self) -> int:
return self.detected_dimension
def get_requested_dimension(self) -> int:
return self.detected_dimension
async def encode(self, text):
self.encode_calls.append(text)
return np.ones(self.detected_dimension, dtype=np.float32)
manager = _FakeEmbeddingManager()
report = await run_embedding_runtime_self_check(
config={"embedding": {"dimension": 1024}},
vector_store=SimpleNamespace(dimension=384),
embedding_manager=manager,
)
assert report["ok"] is True
assert report["configured_dimension"] == 1024
assert report["requested_dimension"] == 384
assert report["detected_dimension"] == 384
assert report["encoded_dimension"] == 384
assert manager.encode_calls == ["A_Memorix runtime self check"]