feat：查询结果可以建构进jargon和person_info

2025-11-15 19:18:39 +08:00
parent 04d1aa6763
commit d18d77cf4b
3 changed files with 353 additions and 6 deletions
--- a/src/memory_system/memory_retrieval.py
+++ b/src/memory_system/memory_retrieval.py
@@ -91,7 +91,7 @@ def init_memory_retrieval_prompt():
 你需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。

 **重要限制：**
- 最大查询轮数：5轮（当前第{current_iteration}轮，剩余{remaining_iterations}轮）
+- 最大查询轮数：{max_iterations}轮（当前第{current_iteration}轮，剩余{remaining_iterations}轮）
 - 必须尽快得出答案，避免不必要的查询
 - 思考要简短，直接切入要点
 - 必须严格使用检索到的信息回答问题，不要编造信息
@@ -127,7 +127,7 @@ def init_memory_retrieval_prompt():
 你需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。

 **重要限制：**
- 最大查询轮数：5轮（当前第{current_iteration}轮，剩余{remaining_iterations}轮）
+- 最大查询轮数：{max_iterations}轮（当前第{current_iteration}轮，剩余{remaining_iterations}轮）
 - 必须尽快得出答案，避免不必要的查询
 - 思考要简短，直接切入要点
 - 必须严格使用检索到的信息回答问题，不要编造信息
@@ -416,6 +416,7 @@ async def _react_agent_solve_question(
            collected_info=collected_info if collected_info else "暂无信息",
            current_iteration=current_iteration,
            remaining_iterations=remaining_iterations,
+            max_iterations=max_iterations,
        )

        
@@ -428,6 +429,7 @@ async def _react_agent_solve_question(
                question=question,
                current_iteration=current_iteration,
                remaining_iterations=remaining_iterations,
+                max_iterations=max_iterations,
            )

            def message_factory(
@@ -821,6 +823,91 @@ def _query_thinking_back(chat_id: str, question: str) -> Optional[Tuple[bool, st
        return None


+async def _analyze_question_answer(question: str, answer: str, chat_id: str) -> None:
+    """异步分析问题和答案的类别，并存储到相应系统
+    
+    Args:
+        question: 问题
+        answer: 答案
+        chat_id: 聊天ID
+    """
+    try:
+        # 使用LLM分析类别
+        analysis_prompt = f"""请分析以下问题和答案的类别：
+
+问题：{question}
+答案：{answer}
+
+类别说明：
+1. 人物信息：有关某个用户的个体信息（如某人的喜好、习惯、经历等）
+2. 黑话：对特定概念、缩写词、谐音词、自创词的解释（如"yyds"、"社死"等）
+3. 其他：除此之外的其他内容
+
+请输出JSON格式：
+{{
+    "category": "人物信息" | "黑话" | "其他",
+    "jargon_keyword": "如果是黑话，提取关键词（如'yyds'），否则为空字符串",
+    "person_name": "如果是人物信息，提取人物名称，否则为空字符串",
+    "memory_content": "如果是人物信息，提取要存储的记忆内容（简短概括），否则为空字符串"
+}}
+
+只输出JSON，不要输出其他内容："""
+        
+        success, response, _, _ = await llm_api.generate_with_model(
+            analysis_prompt,
+            model_config=model_config.model_task_config.utils,
+            request_type="memory.analyze_qa",
+        )
+        
+        if not success:
+            logger.error(f"分析问题和答案失败: {response}")
+            return
+        
+        # 解析JSON响应
+        try:
+            json_pattern = r"```json\s*(.*?)\s*```"
+            matches = re.findall(json_pattern, response, re.DOTALL)
+            
+            if matches:
+                json_str = matches[0]
+            else:
+                json_str = response.strip()
+            
+            repaired_json = repair_json(json_str)
+            analysis_result = json.loads(repaired_json)
+            
+            category = analysis_result.get("category", "").strip()
+            
+            if category == "黑话":
+                # 处理黑话
+                jargon_keyword = analysis_result.get("jargon_keyword", "").strip()
+                if jargon_keyword:
+                    from src.jargon.jargon_miner import store_jargon_from_answer
+                    await store_jargon_from_answer(jargon_keyword, answer, chat_id)
+                else:
+                    logger.warning(f"分析为黑话但未提取到关键词，问题: {question[:50]}...")
+            
+            elif category == "人物信息":
+                # 处理人物信息
+                person_name = analysis_result.get("person_name", "").strip()
+                memory_content = analysis_result.get("memory_content", "").strip()
+                if person_name and memory_content:
+                    from src.person_info.person_info import store_person_memory_from_answer
+                    await store_person_memory_from_answer(person_name, memory_content, chat_id)
+                else:
+                    logger.warning(f"分析为人物信息但未提取到人物名称或记忆内容，问题: {question[:50]}...")
+            
+            else:
+                logger.info(f"问题和答案类别为'其他'，不进行存储，问题: {question[:50]}...")
+        
+        except Exception as e:
+            logger.error(f"解析分析结果失败: {e}, 响应: {response[:200]}...")
+    
+    except Exception as e:
+        logger.error(f"分析问题和答案时发生异常: {e}")
+
+
+
 def _store_thinking_back(
    chat_id: str,
    question: str,
@@ -937,7 +1024,7 @@ async def _process_single_question(
        found_answer, answer, thinking_steps, is_timeout = await _react_agent_solve_question(
            question=question,
            chat_id=chat_id,
-            max_iterations=5,
+            max_iterations=global_config.memory.max_agent_iterations,
            timeout=120.0,
            initial_info=initial_info
        )
@@ -956,6 +1043,8 @@ async def _process_single_question(
            logger.info(f"ReAct Agent超时，不存储到数据库，问题: {question[:50]}...")
        
        if found_answer and answer:
+            # 创建异步任务分析问题和答案
+            asyncio.create_task(_analyze_question_answer(question, answer, chat_id))
            return f"问题：{question}\n答案：{answer}"
    
    return None
@@ -1067,8 +1156,9 @@ async def build_memory_retrieval_prompt(
        
        logger.info(f"解析到 {len(questions)} 个问题: {questions}")
        
-        # 第二步：并行处理所有问题（固定使用5次迭代/120秒超时）
-        logger.info(f"问题数量: {len(questions)}，固定设置最大迭代次数: 5，超时时间: 120秒")
+        # 第二步：并行处理所有问题（使用配置的最大迭代次数/120秒超时）
+        max_iterations = global_config.memory.max_agent_iterations
+        logger.info(f"问题数量: {len(questions)}，设置最大迭代次数: {max_iterations}，超时时间: 120秒")
        
        # 并行处理所有问题，将概念检索结果作为初始信息传递
        question_tasks = [