This commit is contained in:
DawnARC
2026-04-13 13:09:57 +08:00
parent 7a42a1cb2a
commit b3fb662f3d
17 changed files with 3589 additions and 74 deletions

View File

@@ -8,7 +8,7 @@
- 运行时主目录由 `storage.data_dir` 决定(当前模板默认 `data/a-memorix`
- 部分离线脚本仍以 `data/plugins/a-dawn.a-memorix` 作为默认处理目录。
- 修正文档中的导入示例参数,`memory_import_admin.create_paste``input_mode` 示例统一为 `text`/`json`
- 更新 `README.md` 关于元数据 schema 的描述,和当前代码 `SCHEMA_VERSION = 9` 保持一致。
- 更新 `README.md` 关于元数据 schema 的描述,和当前代码 `SCHEMA_VERSION = 10` 保持一致。
## [2.0.0] - 2026-03-18

View File

@@ -1,6 +1,6 @@
# A_Memorix 配置参考 (v2.0.0)
本文档对应当前仓库代码(`__version__ = 2.0.0``SCHEMA_VERSION = 9`)。
本文档对应当前仓库代码(`__version__ = 2.0.0``SCHEMA_VERSION = 10`)。
说明:

View File

@@ -632,7 +632,7 @@ class DualPathRetriever:
results: List[RetrievalResult] = []
for row in rows:
hash_value = row["hash"]
relation = self.metadata_store.get_relation(hash_value)
relation = self.metadata_store.get_relation(hash_value, include_inactive=False)
if relation is None:
continue
@@ -888,8 +888,8 @@ class DualPathRetriever:
entity_name = entity["name"]
related_rels = []
related_rels.extend(self.metadata_store.get_relations(subject=entity_name))
related_rels.extend(self.metadata_store.get_relations(object=entity_name))
related_rels.extend(self.metadata_store.get_relations(subject=entity_name, include_inactive=False))
related_rels.extend(self.metadata_store.get_relations(object=entity_name, include_inactive=False))
for rel in related_rels:
if rel["hash"] in seen_relations:
@@ -1280,7 +1280,7 @@ class DualPathRetriever:
results = []
for hash_value, score in zip(rel_ids, rel_scores):
relation = self.metadata_store.get_relation(hash_value)
relation = self.metadata_store.get_relation(hash_value, include_inactive=False)
if relation is None:
continue
@@ -1378,7 +1378,7 @@ class DualPathRetriever:
deduplicated_results.append(result)
continue
# 检查关系关联的段落是否已存在
relation = self.metadata_store.get_relation(result.hash_value)
relation = self.metadata_store.get_relation(result.hash_value, include_inactive=False)
if relation:
# 获取关联的段落
para_rels = self.metadata_store.query("""

View File

@@ -255,7 +255,7 @@ class GraphRelationRecallService:
graph_hops: int,
graph_seed_entities: Sequence[str],
) -> Optional[GraphRelationCandidate]:
relation = self.metadata_store.get_relation(relation_hash)
relation = self.metadata_store.get_relation(relation_hash, include_inactive=False)
if relation is None:
return None
supporting_paragraphs = self.metadata_store.get_paragraphs_by_relation(relation_hash)

View File

@@ -338,6 +338,7 @@ class SparseBM25Index:
match_query=match_query,
limit=max(1, int(k)),
max_doc_len=self.config.relation_max_doc_len,
include_inactive=False,
conn=self._conn,
)
out: List[Dict[str, Any]] = []

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -18,6 +18,7 @@ from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from src.common.logger import get_logger
from src.config.config import global_config
from .episode_segmentation_service import EpisodeSegmentationService
from .hash import compute_hash
@@ -528,7 +529,11 @@ class EpisodeService:
"paragraph_count": 0,
}
paragraphs = self.metadata_store.get_live_paragraphs_by_source(token)
memory_cfg = getattr(global_config, "memory", None)
paragraphs = self.metadata_store.get_live_paragraphs_by_source(
token,
exclude_stale=bool(getattr(memory_cfg, "feedback_correction_paragraph_hard_filter_enabled", True)),
)
if not paragraphs:
replace_result = self.metadata_store.replace_episodes_for_source(token, [])
return {

View File

@@ -90,9 +90,9 @@ def find_paths_between_entities(
else:
pred = "related"
direction = "->"
rels = metadata_store.get_relations(subject=u, object=v)
rels = metadata_store.get_relations(subject=u, object=v, include_inactive=False)
if not rels:
rels = metadata_store.get_relations(subject=v, object=u)
rels = metadata_store.get_relations(subject=v, object=u, include_inactive=False)
direction = "<-"
if rels:
best_rel = max(rels, key=lambda x: x.get("confidence", 1.0))
@@ -162,4 +162,3 @@ def to_retrieval_results(paths: Sequence[Dict[str, Any]]) -> List[RetrievalResul
)
)
return converted

View File

@@ -15,6 +15,7 @@ from sqlmodel import select
from src.common.logger import get_logger
from src.common.database.database import get_db_session
from src.common.database.database_model import PersonInfo
from src.config.config import global_config
from ..embedding import EmbeddingAPIAdapter
from ..retrieval import (
@@ -285,11 +286,11 @@ class PersonProfileService:
def _collect_relation_evidence(self, aliases: List[str], limit: int = 30) -> List[Dict[str, Any]]:
relation_by_hash: Dict[str, Dict[str, Any]] = {}
for alias in aliases:
for rel in self.metadata_store.get_relations(subject=alias):
for rel in self.metadata_store.get_relations(subject=alias, include_inactive=False):
h = str(rel.get("hash", ""))
if h:
relation_by_hash[h] = rel
for rel in self.metadata_store.get_relations(object=alias):
for rel in self.metadata_store.get_relations(object=alias, include_inactive=False):
h = str(rel.get("hash", ""))
if h:
relation_by_hash[h] = rel
@@ -342,7 +343,53 @@ class PersonProfileService:
"metadata": {},
}
)
return evidence
return self._filter_stale_paragraph_evidence(evidence)
def _filter_stale_paragraph_evidence(
self,
evidence: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
memory_cfg = getattr(global_config, "memory", None)
if not bool(getattr(memory_cfg, "feedback_correction_paragraph_hard_filter_enabled", True)):
return evidence
paragraph_hashes = [
str(item.get("hash", "") or "").strip()
for item in evidence
if str(item.get("type", "") or "").strip() == "paragraph" and str(item.get("hash", "") or "").strip()
]
if not paragraph_hashes:
return evidence
marks_by_paragraph = self.metadata_store.get_paragraph_stale_relation_marks_batch(paragraph_hashes)
relation_hashes: List[str] = []
seen = set()
for marks in marks_by_paragraph.values():
for mark in marks:
relation_hash = str(mark.get("relation_hash", "") or "").strip()
if not relation_hash or relation_hash in seen:
continue
seen.add(relation_hash)
relation_hashes.append(relation_hash)
status_map = self.metadata_store.get_relation_status_batch(relation_hashes) if relation_hashes else {}
filtered: List[Dict[str, Any]] = []
for item in evidence:
item_type = str(item.get("type", "") or "").strip()
item_hash = str(item.get("hash", "") or "").strip()
if item_type != "paragraph" or not item_hash:
filtered.append(item)
continue
marks = marks_by_paragraph.get(item_hash, [])
should_hide = any(
status_map.get(str(mark.get("relation_hash", "") or "").strip()) is None
or bool((status_map.get(str(mark.get("relation_hash", "") or "").strip()) or {}).get("is_inactive"))
for mark in marks
if str(mark.get("relation_hash", "") or "").strip()
)
if should_hide:
continue
filtered.append(item)
return filtered
async def _collect_vector_evidence(
self,
@@ -373,7 +420,7 @@ class PersonProfileService:
"metadata": {},
}
)
return fallback[:top_k]
return self._filter_stale_paragraph_evidence(fallback[:top_k])
per_alias_top_k = max(2, int(top_k / max(1, len(alias_queries))))
seen_hash = set()
@@ -406,7 +453,7 @@ class PersonProfileService:
}
)
evidence.sort(key=lambda x: x.get("score", 0.0), reverse=True)
return evidence[:top_k]
return self._filter_stale_paragraph_evidence(evidence[:top_k])
def _build_profile_text(
self,

View File

@@ -190,6 +190,16 @@ class AMemorixHostService:
)
)
if component_name == "enqueue_feedback_task":
return await kernel.enqueue_feedback_task(
query_tool_id=str(payload.get("query_tool_id", "") or ""),
session_id=str(payload.get("session_id", "") or ""),
query_timestamp=payload.get("query_timestamp"),
structured_content=payload.get("structured_content")
if isinstance(payload.get("structured_content"), dict)
else {},
)
if component_name == "ingest_summary":
return await kernel.ingest_summary(
external_id=str(payload.get("external_id", "") or ""),
@@ -251,6 +261,7 @@ class AMemorixHostService:
"memory_source_admin": kernel.memory_source_admin,
"memory_episode_admin": kernel.memory_episode_admin,
"memory_profile_admin": kernel.memory_profile_admin,
"memory_feedback_admin": kernel.memory_feedback_admin,
"memory_runtime_admin": kernel.memory_runtime_admin,
"memory_import_admin": kernel.memory_import_admin,
"memory_tuning_admin": kernel.memory_tuning_admin,

View File

@@ -62,7 +62,10 @@ if any(arg in {"-h", "--help"} for arg in sys.argv[1:]):
try:
from A_memorix.core.storage import GraphStore, KnowledgeType, MetadataStore, QuantizationType, VectorStore
from A_memorix.core.storage.metadata_store import SCHEMA_VERSION
from A_memorix.core.storage.metadata_store import (
RUNTIME_AUTO_MIGRATION_MIN_SCHEMA_VERSION,
SCHEMA_VERSION,
)
except Exception as e: # pragma: no cover
print(f"❌ failed to import storage modules: {e}")
raise SystemExit(2)
@@ -125,6 +128,14 @@ def _sqlite_table_exists(conn: sqlite3.Connection, table: str) -> bool:
return row is not None
def _sqlite_column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool:
try:
rows = conn.execute(f"PRAGMA table_info({table})").fetchall()
except Exception:
return False
return any(str(row[1] or "") == str(column or "") for row in rows)
def _collect_hash_alias_conflicts(conn: sqlite3.Connection) -> Dict[str, List[str]]:
hashes: List[str] = []
if _sqlite_table_exists(conn, "relations"):
@@ -152,6 +163,8 @@ def _collect_hash_alias_conflicts(conn: sqlite3.Connection) -> Dict[str, List[st
def _collect_invalid_knowledge_types(conn: sqlite3.Connection) -> List[str]:
if not _sqlite_table_exists(conn, "paragraphs"):
return []
if not _sqlite_column_exists(conn, "paragraphs", "knowledge_type"):
return []
allowed = {item.value for item in KnowledgeType}
rows = conn.execute("SELECT DISTINCT knowledge_type FROM paragraphs").fetchall()
@@ -288,6 +301,14 @@ def _preflight_impl(config_path: Path, data_dir: Path) -> Dict[str, Any]:
facts["schema_migrations_exists"] = has_schema_table
has_paragraph_backfill = _sqlite_table_exists(conn, "paragraph_vector_backfill")
facts["paragraph_vector_backfill_exists"] = has_paragraph_backfill
has_stale_marks = _sqlite_table_exists(conn, "paragraph_stale_relation_marks")
facts["paragraph_stale_relation_marks_exists"] = has_stale_marks
has_profile_refresh_queue = _sqlite_table_exists(conn, "person_profile_refresh_queue")
facts["person_profile_refresh_queue_exists"] = has_profile_refresh_queue
has_feedback_rollback_status = _sqlite_column_exists(conn, "memory_feedback_tasks", "rollback_status")
facts["memory_feedback_tasks_rollback_status_exists"] = has_feedback_rollback_status
has_feedback_rollback_plan = _sqlite_column_exists(conn, "memory_feedback_tasks", "rollback_plan_json")
facts["memory_feedback_tasks_rollback_plan_exists"] = has_feedback_rollback_plan
if not has_schema_table:
checks.append(
CheckItem(
@@ -300,14 +321,28 @@ def _preflight_impl(config_path: Path, data_dir: Path) -> Dict[str, Any]:
row = conn.execute("SELECT MAX(version) FROM schema_migrations").fetchone()
version = int(row[0]) if row and row[0] is not None else 0
facts["schema_version"] = version
runtime_auto_migratable = (
version < SCHEMA_VERSION
and version >= RUNTIME_AUTO_MIGRATION_MIN_SCHEMA_VERSION
)
facts["schema_runtime_auto_migratable"] = runtime_auto_migratable
if version != SCHEMA_VERSION:
checks.append(
CheckItem(
"CP-08",
"error",
f"schema version mismatch: current={version}, expected={SCHEMA_VERSION}",
if runtime_auto_migratable:
checks.append(
CheckItem(
"CP-18",
"warning",
f"schema version behind runtime target: current={version}, expected={SCHEMA_VERSION}; runtime auto migration will handle this update",
)
)
else:
checks.append(
CheckItem(
"CP-08",
"error",
f"schema version mismatch: current={version}, expected={SCHEMA_VERSION}",
)
)
)
elif not has_paragraph_backfill:
checks.append(
CheckItem(
@@ -316,6 +351,30 @@ def _preflight_impl(config_path: Path, data_dir: Path) -> Dict[str, Any]:
"paragraph_vector_backfill table missing under current schema version",
)
)
elif not has_stale_marks:
checks.append(
CheckItem(
"CP-15",
"error",
"paragraph_stale_relation_marks table missing under current schema version",
)
)
elif not has_profile_refresh_queue:
checks.append(
CheckItem(
"CP-16",
"error",
"person_profile_refresh_queue table missing under current schema version",
)
)
elif not has_feedback_rollback_status or not has_feedback_rollback_plan:
checks.append(
CheckItem(
"CP-17",
"error",
"memory_feedback_tasks rollback columns missing under current schema version",
)
)
if _sqlite_table_exists(conn, "relations"):
row = conn.execute("SELECT COUNT(*) FROM relations").fetchone()
@@ -616,6 +675,46 @@ def _verify_impl(config_path: Path, data_dir: Path) -> Dict[str, Any]:
"paragraph_vector_backfill table missing after migration",
)
)
has_feedback_tasks = _sqlite_table_exists(conn, "memory_feedback_tasks")
facts["memory_feedback_tasks_exists"] = bool(has_feedback_tasks)
if not has_feedback_tasks:
checks.append(
CheckItem(
"CP-15",
"error",
"memory_feedback_tasks table missing after migration",
)
)
has_feedback_logs = _sqlite_table_exists(conn, "memory_feedback_action_logs")
facts["memory_feedback_action_logs_exists"] = bool(has_feedback_logs)
if not has_feedback_logs:
checks.append(
CheckItem(
"CP-16",
"error",
"memory_feedback_action_logs table missing after migration",
)
)
has_feedback_rollback_status = _sqlite_column_exists(conn, "memory_feedback_tasks", "rollback_status")
facts["memory_feedback_tasks_rollback_status_exists"] = bool(has_feedback_rollback_status)
if not has_feedback_rollback_status:
checks.append(
CheckItem(
"CP-17",
"error",
"memory_feedback_tasks.rollback_status missing after migration",
)
)
has_feedback_rollback_plan = _sqlite_column_exists(conn, "memory_feedback_tasks", "rollback_plan_json")
facts["memory_feedback_tasks_rollback_plan_exists"] = bool(has_feedback_rollback_plan)
if not has_feedback_rollback_plan:
checks.append(
CheckItem(
"CP-18",
"error",
"memory_feedback_tasks.rollback_plan_json missing after migration",
)
)
conflicts = _collect_hash_alias_conflicts(conn)
invalid_knowledge_types = _collect_invalid_knowledge_types(conn)
finally: