fix(A_memorix):标准化导入数据并修复进度显示
为导入/LLM输出增加健壮的标准化与验证机制,修复进度计算逻辑,并改进UI摘要展示
This commit is contained in:
21
pytests/A_memorix_test/test_metadata_store_sources.py
Normal file
21
pytests/A_memorix_test/test_metadata_store_sources.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from pathlib import Path
|
||||
|
||||
from src.A_memorix.core.storage.metadata_store import MetadataStore
|
||||
|
||||
|
||||
def test_get_all_sources_ignores_soft_deleted_paragraphs(tmp_path: Path) -> None:
|
||||
store = MetadataStore(data_dir=tmp_path)
|
||||
store.connect()
|
||||
try:
|
||||
live_hash = store.add_paragraph("Alice 喜欢地图", source="live-source")
|
||||
deleted_hash = store.add_paragraph("Bob 喜欢咖啡", source="deleted-source")
|
||||
|
||||
assert live_hash
|
||||
store.mark_as_deleted([deleted_hash], "paragraph")
|
||||
|
||||
sources = store.get_all_sources()
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
assert [item["source"] for item in sources] == ["live-source"]
|
||||
assert sources[0]["count"] == 1
|
||||
@@ -1,6 +1,11 @@
|
||||
import pytest
|
||||
|
||||
from src.A_memorix.core.utils.summary_importer import SummaryImporter
|
||||
from src.A_memorix.core.utils.summary_importer import (
|
||||
SummaryImporter,
|
||||
_message_timestamp,
|
||||
_normalize_entity_items,
|
||||
_normalize_relation_items,
|
||||
)
|
||||
from src.config.model_configs import TaskConfig
|
||||
from src.services import llm_service as llm_api
|
||||
|
||||
@@ -46,3 +51,22 @@ def test_resolve_summary_model_config_rejects_legacy_string_selector(monkeypatch
|
||||
|
||||
with pytest.raises(ValueError, match="List\\[str\\]"):
|
||||
importer._resolve_summary_model_config()
|
||||
|
||||
|
||||
def test_summary_importer_normalizes_llm_entities_and_relations():
|
||||
assert _normalize_entity_items(["Alice", {"name": "地图"}, ["bad"], "Alice"]) == ["Alice", "地图"]
|
||||
assert _normalize_entity_items("Alice") == []
|
||||
assert _normalize_relation_items(
|
||||
[
|
||||
{"subject": "Alice", "predicate": "持有", "object": "地图"},
|
||||
{"subject": "Alice", "predicate": "", "object": "地图"},
|
||||
["bad"],
|
||||
]
|
||||
) == [{"subject": "Alice", "predicate": "持有", "object": "地图"}]
|
||||
|
||||
|
||||
def test_summary_importer_message_timestamp_accepts_time_fallback():
|
||||
class Message:
|
||||
time = 123.5
|
||||
|
||||
assert _message_timestamp(Message()) == 123.5
|
||||
|
||||
117
pytests/A_memorix_test/test_web_import_manager_payloads.py
Normal file
117
pytests/A_memorix_test/test_web_import_manager_payloads.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from src.A_memorix.core.strategies.base import ChunkContext, KnowledgeType, ProcessedChunk, SourceInfo
|
||||
from src.A_memorix.core.utils.web_import_manager import ImportTaskManager
|
||||
|
||||
|
||||
class _DummyMetadataStore:
|
||||
def __init__(self) -> None:
|
||||
self.paragraphs: list[dict[str, object]] = []
|
||||
self.entities: list[str] = []
|
||||
self.relations: list[tuple[str, str, str]] = []
|
||||
|
||||
def add_paragraph(self, **kwargs):
|
||||
self.paragraphs.append(dict(kwargs))
|
||||
return f"paragraph-{len(self.paragraphs)}"
|
||||
|
||||
def add_entity(self, *, name: str, source_paragraph: str = "") -> str:
|
||||
del source_paragraph
|
||||
self.entities.append(name)
|
||||
return f"entity-{name}"
|
||||
|
||||
def add_relation(self, *, subject: str, predicate: str, obj: str, **kwargs) -> str:
|
||||
del kwargs
|
||||
self.relations.append((subject, predicate, obj))
|
||||
return f"relation-{len(self.relations)}"
|
||||
|
||||
def set_relation_vector_state(self, rel_hash: str, state: str) -> None:
|
||||
del rel_hash, state
|
||||
|
||||
|
||||
class _DummyGraphStore:
|
||||
def __init__(self) -> None:
|
||||
self.nodes: list[list[str]] = []
|
||||
self.edges: list[list[tuple[str, str]]] = []
|
||||
|
||||
def add_nodes(self, nodes):
|
||||
self.nodes.append(list(nodes))
|
||||
|
||||
def add_edges(self, edges, relation_hashes=None):
|
||||
del relation_hashes
|
||||
self.edges.append(list(edges))
|
||||
|
||||
|
||||
class _DummyVectorStore:
|
||||
def __contains__(self, item: str) -> bool:
|
||||
del item
|
||||
return False
|
||||
|
||||
def add(self, vectors, ids):
|
||||
del vectors, ids
|
||||
|
||||
|
||||
class _DummyEmbeddingManager:
|
||||
async def encode(self, text: str) -> np.ndarray:
|
||||
del text
|
||||
return np.ones(4, dtype=np.float32)
|
||||
|
||||
|
||||
def _build_manager() -> tuple[ImportTaskManager, _DummyMetadataStore]:
|
||||
metadata_store = _DummyMetadataStore()
|
||||
plugin = SimpleNamespace(
|
||||
metadata_store=metadata_store,
|
||||
graph_store=_DummyGraphStore(),
|
||||
vector_store=_DummyVectorStore(),
|
||||
embedding_manager=_DummyEmbeddingManager(),
|
||||
relation_write_service=None,
|
||||
get_config=lambda key, default=None: default,
|
||||
_is_embedding_degraded=lambda: False,
|
||||
_allow_metadata_only_write=lambda: True,
|
||||
write_paragraph_vector_or_enqueue=None,
|
||||
)
|
||||
manager = ImportTaskManager(plugin)
|
||||
return manager, metadata_store
|
||||
|
||||
|
||||
def _build_chunk(data) -> ProcessedChunk:
|
||||
return ProcessedChunk(
|
||||
type=KnowledgeType.FACTUAL,
|
||||
source=SourceInfo(file="demo.txt", offset_start=0, offset_end=4),
|
||||
chunk=ChunkContext(chunk_id="chunk-1", index=0, text="Alice 持有地图"),
|
||||
data=data,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_persist_processed_chunk_rejects_non_object_before_paragraph_write() -> None:
|
||||
manager, metadata_store = _build_manager()
|
||||
file_record = SimpleNamespace(source_path="", source_kind="paste", name="demo.txt")
|
||||
|
||||
with pytest.raises(ValueError, match="分块抽取结果 必须返回 JSON 对象"):
|
||||
await manager._persist_processed_chunk(file_record, _build_chunk(["bad"]))
|
||||
|
||||
assert metadata_store.paragraphs == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_persist_processed_chunk_skips_invalid_nested_items() -> None:
|
||||
manager, metadata_store = _build_manager()
|
||||
file_record = SimpleNamespace(source_path="", source_kind="paste", name="demo.txt")
|
||||
|
||||
await manager._persist_processed_chunk(
|
||||
file_record,
|
||||
_build_chunk(
|
||||
{
|
||||
"triples": [{"subject": "Alice", "predicate": "持有", "object": "地图"}, ["bad"]],
|
||||
"relations": [{"subject": "Alice", "predicate": "", "object": "地图"}],
|
||||
"entities": ["Alice", {"name": "地图"}, ["bad"]],
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
assert len(metadata_store.paragraphs) == 1
|
||||
assert set(metadata_store.entities) >= {"Alice", "地图"}
|
||||
assert metadata_store.relations == [("Alice", "持有", "地图")]
|
||||
@@ -241,6 +241,145 @@ def test_webui_memory_profile_query_prefers_explicit_person_id(client: TestClien
|
||||
assert response.json()["person_id"] == "explicit-person-id"
|
||||
|
||||
|
||||
def test_webui_memory_profile_list_enriches_person_name(client: TestClient, monkeypatch):
|
||||
async def fake_profile_admin(*, action: str, **kwargs):
|
||||
assert action == "list"
|
||||
assert kwargs["limit"] == 7
|
||||
return {
|
||||
"success": True,
|
||||
"items": [
|
||||
{"person_id": "person-1", "profile_text": "profile-1"},
|
||||
{"person_id": "person-2", "profile_text": "profile-2"},
|
||||
],
|
||||
}
|
||||
|
||||
monkeypatch.setattr(memory_router_module.memory_service, "profile_admin", fake_profile_admin)
|
||||
monkeypatch.setattr(
|
||||
memory_router_module,
|
||||
"_get_person_name_for_person_id",
|
||||
lambda person_id: {"person-1": "Alice"}.get(person_id, ""),
|
||||
)
|
||||
|
||||
response = client.get("/api/webui/memory/profiles", params={"limit": 7})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["items"][0]["person_name"] == "Alice"
|
||||
assert response.json()["items"][1]["person_name"] == ""
|
||||
|
||||
|
||||
def test_webui_memory_profile_search_resolves_platform_user_id(client: TestClient, monkeypatch):
|
||||
def fake_resolve_person_id_for_memory(**kwargs):
|
||||
assert kwargs == {"platform": "qq", "user_id": "12345", "strict_known": False}
|
||||
return "resolved-person-id"
|
||||
|
||||
async def fake_profile_list(limit: int):
|
||||
assert limit == 200
|
||||
return {
|
||||
"success": True,
|
||||
"items": [
|
||||
{"person_id": "resolved-person-id", "person_name": "Alice", "profile_text": "喜欢咖啡"},
|
||||
{"person_id": "other-person-id", "person_name": "Bob", "profile_text": "喜欢茶"},
|
||||
],
|
||||
}
|
||||
|
||||
monkeypatch.setattr(memory_router_module, "resolve_person_id_for_memory", fake_resolve_person_id_for_memory)
|
||||
monkeypatch.setattr(memory_router_module, "_profile_list", fake_profile_list)
|
||||
|
||||
response = client.get(
|
||||
"/api/webui/memory/profiles/search",
|
||||
params={"platform": "qq", "user_id": "12345", "limit": 50},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["items"] == [
|
||||
{"person_id": "resolved-person-id", "person_name": "Alice", "profile_text": "喜欢咖啡"}
|
||||
]
|
||||
|
||||
|
||||
def test_webui_memory_profile_search_filters_keyword(client: TestClient, monkeypatch):
|
||||
async def fake_profile_list(limit: int):
|
||||
assert limit == 200
|
||||
return {
|
||||
"success": True,
|
||||
"items": [
|
||||
{"person_id": "person-1", "person_name": "Alice", "profile_text": "喜欢咖啡"},
|
||||
{"person_id": "person-2", "person_name": "Bob", "profile_text": "喜欢茶"},
|
||||
],
|
||||
}
|
||||
|
||||
monkeypatch.setattr(memory_router_module, "_profile_list", fake_profile_list)
|
||||
|
||||
response = client.get("/api/webui/memory/profiles/search", params={"person_keyword": "咖啡", "limit": 50})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["items"] == [
|
||||
{"person_id": "person-1", "person_name": "Alice", "profile_text": "喜欢咖啡"}
|
||||
]
|
||||
|
||||
|
||||
def test_webui_memory_episode_list_resolves_platform_user_id(client: TestClient, monkeypatch):
|
||||
def fake_resolve_person_id_for_memory(**kwargs):
|
||||
assert kwargs == {"platform": "qq", "user_id": "12345", "strict_known": False}
|
||||
return "resolved-person-id"
|
||||
|
||||
async def fake_episode_admin(*, action: str, **kwargs):
|
||||
assert action == "list"
|
||||
assert kwargs == {
|
||||
"query": "咖啡",
|
||||
"limit": 9,
|
||||
"source": "chat_summary:demo",
|
||||
"person_id": "resolved-person-id",
|
||||
"time_start": 100.0,
|
||||
"time_end": 200.0,
|
||||
}
|
||||
return {
|
||||
"success": True,
|
||||
"items": [{"episode_id": "ep-1", "person_id": "resolved-person-id", "summary": "喝咖啡"}],
|
||||
"count": 1,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(memory_router_module, "resolve_person_id_for_memory", fake_resolve_person_id_for_memory)
|
||||
monkeypatch.setattr(memory_router_module.memory_service, "episode_admin", fake_episode_admin)
|
||||
monkeypatch.setattr(memory_router_module, "_get_person_name_for_person_id", lambda person_id: "测试人物")
|
||||
|
||||
response = client.get(
|
||||
"/api/webui/memory/episodes",
|
||||
params={
|
||||
"query": "咖啡",
|
||||
"limit": 9,
|
||||
"source": "chat_summary:demo",
|
||||
"platform": "qq",
|
||||
"user_id": "12345",
|
||||
"time_start": 100,
|
||||
"time_end": 200,
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["items"][0]["person_name"] == "测试人物"
|
||||
|
||||
|
||||
def test_webui_memory_episode_list_prefers_explicit_person_id(client: TestClient, monkeypatch):
|
||||
def fake_resolve_person_id_for_memory(**kwargs):
|
||||
raise AssertionError(f"不应解析平台账号: {kwargs}")
|
||||
|
||||
async def fake_episode_admin(*, action: str, **kwargs):
|
||||
assert action == "list"
|
||||
assert kwargs["person_id"] == "explicit-person-id"
|
||||
return {"success": True, "items": []}
|
||||
|
||||
monkeypatch.setattr(memory_router_module, "resolve_person_id_for_memory", fake_resolve_person_id_for_memory)
|
||||
monkeypatch.setattr(memory_router_module.memory_service, "episode_admin", fake_episode_admin)
|
||||
|
||||
response = client.get(
|
||||
"/api/webui/memory/episodes",
|
||||
params={"person_id": "explicit-person-id", "platform": "qq", "user_id": "12345"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["items"] == []
|
||||
|
||||
|
||||
def test_compat_aggregate_route(client: TestClient, monkeypatch):
|
||||
async def fake_search(query: str, **kwargs):
|
||||
assert kwargs["mode"] == "aggregate"
|
||||
|
||||
Reference in New Issue
Block a user