feat: add a subagent frame

2026-04-03 22:15:53 +08:00
parent ce580d1f8b
commit 185361f2c3
72 changed files with 13062 additions and 0 deletions
--- a/agentlite/tests/scenarios/init.py
+++ b/agentlite/tests/scenarios/init.py
--- a/agentlite/tests/scenarios/test_cli_debug.py
+++ b/agentlite/tests/scenarios/test_cli_debug.py
@@ -0,0 +1,141 @@
+"""Debug script to find CLI test hang cause."""
+
+from __future__ import annotations
+
+import os
+import sys
+import asyncio
+import signal
+
+sys.path.insert(0, "/home/tcmofashi/proj/l2d_backend/agentlite/src")
+
+from agentlite import Agent, OpenAIProvider
+from agentlite.tools.shell.shell import Shell, Params
+
+SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1"
+SILICONFLOW_MODEL = "Qwen/Qwen3.5-397B-A17B"
+
+
+async def test_shell_directly():
+    """Test shell tool without agent."""
+    print("\n=== Test 1: Shell tool directly ===")
+    shell = Shell(timeout=10)
+
+    # Use Params dataclass
+    result = await shell(Params(command="echo 'Hello'", timeout=5))
+    print(f"Result: {result}")
+    print(f"Output: {result.output if hasattr(result, 'output') else result}")
+    return True
+
+
+async def test_agent_no_tools():
+    """Test agent without tools."""
+    print("\n=== Test 2: Agent without tools ===")
+    api_key = os.environ.get("SILICONFLOW_API_KEY")
+    if not api_key:
+        print("SILICONFLOW_API_KEY not set")
+        return False
+
+    provider = OpenAIProvider(
+        api_key=api_key,
+        base_url=SILICONFLOW_BASE_URL,
+        model=SILICONFLOW_MODEL,
+        timeout=30.0,
+    )
+
+    agent = Agent(
+        provider=provider,
+        system_prompt="Reply briefly in one word.",
+        max_iterations=3,
+    )
+
+    print("Sending message to LLM...")
+    try:
+        response = await asyncio.wait_for(
+            agent.run("Say hello."),
+            timeout=60.0,
+        )
+        print(f"Response: {response[:100]}...")
+        return True
+    except asyncio.TimeoutError:
+        print("TIMEOUT in agent without tools!")
+        return False
+
+
+async def test_agent_with_shell():
+    """Test agent with shell tool - the problematic case."""
+    print("\n=== Test 3: Agent WITH shell tool ===")
+    api_key = os.environ.get("SILICONFLOW_API_KEY")
+    if not api_key:
+        print("SILICONFLOW_API_KEY not set")
+        return False
+
+    provider = OpenAIProvider(
+        api_key=api_key,
+        base_url=SILICONFLOW_BASE_URL,
+        model=SILICONFLOW_MODEL,
+        timeout=60.0,
+    )
+
+    agent = Agent(
+        provider=provider,
+        system_prompt="You are a shell assistant. Execute commands when asked. Keep responses brief.",
+        tools=[Shell(timeout=10)],
+        max_iterations=5,  # Limit iterations
+    )
+
+    print("Sending message with tool request...")
+    print("This is where it might hang...")
+
+    try:
+        response = await asyncio.wait_for(
+            agent.run("Run 'echo test' and tell me the result."),
+            timeout=120.0,
+        )
+        print(f"Response: {response}")
+        return True
+    except asyncio.TimeoutError:
+        print("TIMEOUT! Agent hung for 120 seconds")
+
+        # Check history to see what happened
+        print(f"\nHistory length: {len(agent.history)}")
+        for i, msg in enumerate(agent.history[-5:]):
+            content_preview = str(msg.content)[:100] if msg.content else "None"
+            print(f"  [{i}] {msg.role}: {content_preview}...")
+        return False
+
+
+async def main():
+    """Run all tests."""
+    print("=" * 60)
+    print("CLI Debug Test - Finding the hang cause")
+    print("=" * 60)
+
+    results = []
+
+    # Test 1: Shell directly
+    r1 = await test_shell_directly()
+    results.append(("Shell directly", r1))
+    print(f"Result: {'PASS' if r1 else 'FAIL'}")
+
+    # Test 2: Agent without tools
+    r2 = await test_agent_no_tools()
+    results.append(("Agent no tools", r2))
+    print(f"Result: {'PASS' if r2 else 'FAIL'}")
+
+    # Test 3: Agent with shell (the problem)
+    r3 = await test_agent_with_shell()
+    results.append(("Agent with shell", r3))
+    print(f"Result: {'PASS' if r3 else 'FAIL'}")
+
+    print("\n" + "=" * 60)
+    print("SUMMARY")
+    print("=" * 60)
+    for name, passed in results:
+        status = "✅ PASS" if passed else "❌ FAIL"
+        print(f"  {name}: {status}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/agentlite/tests/scenarios/test_cli_debug_verbose.py
+++ b/agentlite/tests/scenarios/test_cli_debug_verbose.py
@@ -0,0 +1,221 @@
+"""Debug script with detailed logging to find CLI test hang cause."""
+
+from __future__ import annotations
+
+import os
+import sys
+import asyncio
+import logging
+import time
+
+sys.path.insert(0, "/home/tcmofashi/proj/l2d_backend/agentlite/src")
+
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    datefmt="%H:%M:%S",
+)
+logger = logging.getLogger("debug")
+
+# SiliconFlow DeepSeek-V3 (known good function calling support)
+SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1"
+SILICONFLOW_MODEL = "Pro/deepseek-ai/DeepSeek-V3.2"
+SILICONFLOW_API_KEY = "sk-eaxfgkkcuatochftxpevkyvltghigsrclzjzalybmaqycual"
+
+
+async def main():
+    from agentlite import Agent, OpenAIProvider
+    from agentlite.tools.shell.shell import Shell
+    from agentlite.message import Message
+
+    logger.info("=" * 60)
+    logger.info("CLI Debug Test with DeepSeek-V3 (SiliconFlow)")
+    logger.info("=" * 60)
+
+    api_key = os.environ.get("SILICONFLOW_API_KEY") or SILICONFLOW_API_KEY
+    if not api_key:
+        logger.error("SILICONFLOW_API_KEY not set")
+        return
+
+    logger.info(f"Using model: {SILICONFLOW_MODEL}")
+
+    provider = OpenAIProvider(
+        api_key=api_key,
+        base_url=SILICONFLOW_BASE_URL,
+        model=SILICONFLOW_MODEL,
+        timeout=30.0,
+    )
+
+    agent = Agent(
+        provider=provider,
+        system_prompt="You are a shell assistant. Execute commands when asked. Reply briefly.",
+        tools=[Shell(timeout=10)],
+        max_iterations=5,
+    )
+
+    start_time = time.time()
+    message = "Run 'echo test' and tell me the result."
+
+    logger.info(f"\n=== Starting Agent Run ===")
+    logger.info(f"Message: {message}")
+    logger.info(f"Max iterations: {agent.max_iterations}")
+    logger.info(f"Tools: {[t.name for t in agent.tools.tools]}")
+
+    agent._history.append(Message(role="user", content=message))
+
+    iterations = 0
+    final_response = None
+
+    while iterations < agent.max_iterations:
+        iterations += 1
+        elapsed = time.time() - start_time
+
+        logger.info(f"\n{'=' * 50}")
+        logger.info(f"ITERATION {iterations}/{agent.max_iterations} (elapsed: {elapsed:.1f}s)")
+        logger.info(f"{'=' * 50}")
+
+        # Step 1: Call Provider
+        logger.info(">>> Step 1: Calling provider.generate()...")
+        step_start = time.time()
+
+        try:
+            stream = await asyncio.wait_for(
+                provider.generate(
+                    system_prompt=agent.system_prompt,
+                    tools=agent.tools.tools,
+                    history=agent._history,
+                ),
+                timeout=60.0,
+            )
+            logger.info(f"<<< Provider returned stream in {time.time() - step_start:.2f}s")
+        except asyncio.TimeoutError:
+            logger.error("!!! Provider call TIMEOUT after 60s")
+            final_response = "ERROR: Provider timeout"
+            break
+
+        # Step 2: Collect stream parts
+        logger.info(">>> Step 2: Collecting stream parts...")
+        step_start = time.time()
+
+        from agentlite.message import TextPart, ToolCall, ContentPart
+
+        response_parts = []
+        tool_calls = []
+        chunk_count = 0
+
+        try:
+            async for part in stream:
+                chunk_count += 1
+                if chunk_count % 10 == 0:
+                    logger.debug(f"    Received chunk #{chunk_count}")
+
+                if isinstance(part, ToolCall):
+                    tool_calls.append(part)
+                    logger.info(
+                        f"    ToolCall received: {part.function.name if hasattr(part, 'function') else part}"
+                    )
+                elif isinstance(part, ContentPart):
+                    response_parts.append(part)
+                    if isinstance(part, TextPart):
+                        logger.debug(f"    Text: {part.text[:50]}...")
+
+            logger.info(
+                f"<<< Stream finished in {time.time() - step_start:.2f}s, {chunk_count} chunks"
+            )
+        except asyncio.TimeoutError:
+            logger.error("!!! Stream reading TIMEOUT")
+            final_response = "ERROR: Stream timeout"
+            break
+        except Exception as e:
+            logger.error(f"!!! Stream error: {type(e).__name__}: {e}")
+            final_response = f"ERROR: Stream error - {e}"
+            break
+
+        # Extract text
+        response_text = ""
+        for part in response_parts:
+            if isinstance(part, TextPart):
+                response_text += part.text
+        logger.info(f"Response text ({len(response_text)} chars): {response_text[:100]}...")
+        logger.info(f"Tool calls: {len(tool_calls)}")
+
+        # Add to history
+        agent._history.append(
+            Message(
+                role="assistant",
+                content=response_parts,
+                tool_calls=tool_calls if tool_calls else None,
+            )
+        )
+
+        # Step 3: Check if done
+        if not tool_calls:
+            elapsed = time.time() - start_time
+            logger.info(f"\n=== Agent completed in {elapsed:.2f}s, {iterations} iterations ===")
+            final_response = response_text
+            break
+
+        # Step 4: Execute tool calls
+        logger.info(f"\n>>> Step 3: Executing {len(tool_calls)} tool calls...")
+        step_start = time.time()
+
+        for i, tc in enumerate(tool_calls):
+            func_name = tc.function.name if hasattr(tc, "function") else str(tc)
+            func_args = tc.function.arguments if hasattr(tc, "function") else ""
+            logger.info(f"    Tool #{i + 1}: {func_name}")
+            logger.info(f"    Args: {func_args[:200]}...")
+
+            try:
+                result = await asyncio.wait_for(
+                    agent.tools.handle(tc),
+                    timeout=30.0,
+                )
+                output = result.output if hasattr(result, "output") else str(result)
+                is_error = result.is_error if hasattr(result, "is_error") else False
+                logger.info(
+                    f"    Result: is_error={is_error}, output_len={len(output) if output else 0}"
+                )
+                output_preview = output[:100] if output else "None"
+                logger.info(f"    Output preview: {output_preview}...")
+            except asyncio.TimeoutError:
+                logger.error(f"    !!! Tool execution TIMEOUT")
+                output = "Tool execution timed out"
+                is_error = True
+            except Exception as e:
+                logger.error(f"    !!! Tool error: {type(e).__name__}: {e}")
+                output = str(e)
+                is_error = True
+
+            # Add tool result to history
+            agent._history.append(
+                Message(
+                    role="tool",
+                    content=output,
+                    tool_call_id=tc.id if hasattr(tc, "id") else f"tc_{i}",
+                )
+            )
+
+        logger.info(f"<<< Tool execution finished in {time.time() - step_start:.2f}s")
+
+        # Check overall timeout
+        elapsed = time.time() - start_time
+        if elapsed > 90:
+            logger.warning(f"!!! Overall timeout approaching ({elapsed:.1f}s)")
+            final_response = f"Timeout after {iterations} iterations"
+            break
+
+    if iterations >= agent.max_iterations:
+        logger.warning(f"!!! Max iterations reached ({agent.max_iterations})")
+        final_response = f"Max iterations ({agent.max_iterations}) reached"
+
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"FINAL RESULT:")
+    logger.info(f"{'=' * 60}")
+    logger.info(f"{final_response}")
+    logger.info(f"Total iterations: {iterations}")
+    logger.info(f"Total time: {time.time() - start_time:.2f}s")
+    logger.info(f"History length: {len(agent._history)}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/agentlite/tests/scenarios/test_cli_operations_real_api.py
+++ b/agentlite/tests/scenarios/test_cli_operations_real_api.py
@@ -0,0 +1,349 @@
+"""End-to-end test for complex CLI operations with real API.
+
+This test simulates a realistic complex CLI task where an agent:
+1. Explores project structure using shell commands
+2. Searches for specific patterns using grep/glob
+3. Reads relevant files
+4. Creates analysis reports
+
+Uses real SiliconFlow qwen3.5-397B API (requires SILICONFLOW_API_KEY env var).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from agentlite import Agent, OpenAIProvider
+from agentlite.tools import (
+    ConfigurableToolset,
+    ToolSuiteConfig,
+    Shell,
+    ReadFile,
+    WriteFile,
+    Glob,
+    Grep,
+)
+
+# =============================================================================
+# Configuration from model_config.toml
+# =============================================================================
+
+SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1"
+SILICONFLOW_MODEL = "Qwen/Qwen3.5-397B-A17B"
+
+
+def get_siliconflow_provider() -> OpenAIProvider | None:
+    """Create OpenAIProvider for SiliconFlow API."""
+    api_key = os.environ.get("SILICONFLOW_API_KEY")
+    if not api_key:
+        return None
+    return OpenAIProvider(
+        api_key=api_key,
+        base_url=SILICONFLOW_BASE_URL,
+        model=SILICONFLOW_MODEL,
+    )
+
+
+@pytest.fixture
+def real_provider():
+    """Create real SiliconFlow provider."""
+    provider = get_siliconflow_provider()
+    if provider is None:
+        pytest.skip("SILICONFLOW_API_KEY not set")
+    return provider
+
+
+@pytest.fixture
+def test_project():
+    """Create a mock project structure for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        project_dir = Path(tmpdir) / "test_project"
+        project_dir.mkdir()
+
+        # Create project structure
+        (project_dir / "src").mkdir()
+        (project_dir / "src" / "utils").mkdir()
+        (project_dir / "tests").mkdir()
+        (project_dir / "docs").mkdir()
+
+        # Create source files
+        (project_dir / "src" / "main.py").write_text('''"""Main module."""
+from src.utils.helper import process_data
+from src.utils.logger import setup_logger
+
+def main():
+    """Main entry point."""
+    logger = setup_logger()
+    data = [1, 2, 3, 4, 5]
+    result = process_data(data)
+    logger.info(f"Result: {result}")
+    return result
+
+if __name__ == "__main__":
+    main()
+''')
+
+        (project_dir / "src" / "__init__.py").write_text('"""Source package."""')
+
+        (project_dir / "src" / "utils" / "helper.py").write_text('''"""Helper utilities."""
+def process_data(data: list) -> list:
+    """Process input data."""
+    return [x * 2 for x in data]
+
+def validate_data(data: list) -> bool:
+    """Validate data format."""
+    return all(isinstance(x, (int, float)) for x in data)
+''')
+
+        (project_dir / "src" / "utils" / "logger.py").write_text('''"""Logging utilities."""
+import logging
+
+def setup_logger(name: str = "app") -> logging.Logger:
+    """Setup application logger."""
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    return logger
+''')
+
+        (project_dir / "src" / "utils" / "__init__.py").write_text('"""Utils package."""')
+
+        # Create test files
+        (project_dir / "tests" / "test_helper.py").write_text('''"""Tests for helper module."""
+from src.utils.helper import process_data, validate_data
+
+def test_process_data():
+    assert process_data([1, 2, 3]) == [2, 4, 6]
+
+def test_validate_data():
+    assert validate_data([1, 2, 3]) == True
+    assert validate_data(["a", "b"]) == False
+''')
+
+        # Create documentation
+        (project_dir / "docs" / "README.md").write_text("""# Test Project
+
+A sample project for testing CLI operations.
+
+## Structure
+
+- `src/` - Source code
+- `tests/` - Unit tests
+- `docs/` - Documentation
+""")
+
+        (project_dir / "README.md").write_text("""# Test Project
+
+Simple data processing project.
+
+## Usage
+
+```bash
+python -m src.main
+```
+""")
+
+        yield project_dir
+
+
+@pytest.mark.scenario
+@pytest.mark.slow
+class TestComplexCLITasks:
+    """End-to-end tests with complex CLI operations."""
+
+    @pytest.mark.asyncio
+    async def test_explore_project_structure(self, real_provider, test_project):
+        """Test exploring project structure using CLI tools.
+
+        Task: Use shell commands to explore the project structure,
+        then summarize what files exist.
+        """
+        # Create toolset with Shell tool
+        toolset = ConfigurableToolset(
+            config=ToolSuiteConfig(
+                shell_tools=ToolSuiteConfig().shell_tools,
+            ),
+            work_dir=str(test_project),
+        )
+
+        agent = Agent(
+            provider=real_provider,
+            tools=toolset.tools,
+            system_prompt=(
+                "你是一个项目分析助手。使用 Shell 工具执行命令来探索项目结构。"
+                "请使用 find、ls、tree 等命令来了解项目。"
+            ),
+            max_iterations=5,  # Limit iterations to prevent hanging
+        )
+
+        # Add overall timeout to prevent infinite hanging
+        try:
+            response = await asyncio.wait_for(
+                agent.run(
+                    f"探索项目目录 {test_project} 的结构，列出所有文件和目录，并总结项目的组织方式。"
+                ),
+                timeout=120.0,  # 2 minute overall timeout
+            )
+        except asyncio.TimeoutError:
+            pytest.fail("Agent timed out after 120 seconds - possible infinite loop")
+
+        assert response, "Agent should return a response"
+        print(f"\n[项目结构探索结果]:\n{response}\n")
+
+        # Verify response mentions key files
+        response_lower = response.lower()
+        assert any(
+            word in response_lower for word in ["src", "tests", "main.py", "helper", "logger"]
+        ), "Response should mention project files"
+
+    @pytest.mark.asyncio
+    async def test_search_and_analyze_code(self, real_provider, test_project):
+        """Test searching for patterns and analyzing code.
+
+        Task: Use grep/glob to find specific patterns,
+        read the files, and create an analysis report.
+        """
+        # Create toolset with all file tools
+        toolset = ConfigurableToolset(
+            config=ToolSuiteConfig(
+                file_tools=ToolSuiteConfig().file_tools,
+                shell_tools=ToolSuiteConfig().shell_tools,
+            ),
+            work_dir=str(test_project),
+        )
+
+        agent = Agent(
+            provider=real_provider,
+            tools=toolset.tools,
+            system_prompt=(
+                "你是一个代码分析助手。使用 Glob、Grep、ReadFile 等工具来搜索和分析代码。"
+                "请使用 Shell 工具执行 grep、find 等命令。"
+            ),
+        )
+
+        response = await agent.run(
+            f"在项目 {test_project} 中搜索所有包含 'def ' 的 Python 文件，"
+            f"列出找到的函数定义，并创建一个函数清单文件保存到 {test_project}/functions.txt。"
+        )
+
+        assert response, "Agent should return a response"
+        print(f"\n[代码搜索分析结果]:\n{response}\n")
+
+        # Check if analysis file was created
+        functions_file = test_project / "functions.txt"
+        if functions_file.exists():
+            content = functions_file.read_text()
+            print(f"\n[函数清单文件]:\n{content}\n")
+            assert len(content) > 0, "Functions file should not be empty"
+
+    @pytest.mark.asyncio
+    async def test_complex_multi_step_task(self, real_provider, test_project):
+        """Test a complex multi-step CLI task.
+
+        Task:
+        1. Find all Python files using shell
+        2. Search for TODO comments using grep
+        3. Read files with TODOs
+        4. Create a summary report
+        """
+        # Add some TODO comments
+        todo_file = test_project / "src" / "utils" / "todo_items.py"
+        todo_file.write_text('''"""Module with TODO items."""
+
+# TODO: Implement error handling
+def risky_operation(data):
+    """Perform a risky operation."""
+    return data / 0  # This will fail
+
+# TODO: Add caching mechanism
+def expensive_computation(n):
+    """Perform expensive computation."""
+    return sum(range(n))
+
+# FIXME: Memory leak in this function
+def process_large_file(path):
+    """Process a large file."""
+    with open(path) as f:
+        return f.read()
+''')
+
+        # Create comprehensive toolset
+        toolset = ConfigurableToolset(
+            config=ToolSuiteConfig(
+                file_tools=ToolSuiteConfig().file_tools,
+                shell_tools=ToolSuiteConfig().shell_tools,
+            ),
+            work_dir=str(test_project),
+        )
+
+        agent = Agent(
+            provider=real_provider,
+            tools=toolset.tools,
+            system_prompt=(
+                "你是一个项目维护助手。"
+                "使用 Shell 工具执行命令（如 find、grep、ls 等）。"
+                "使用 ReadFile 读取文件内容。"
+                "使用 WriteFile 创建新文件。"
+                "请一步一步完成任务。"
+            ),
+        )
+
+        response = await agent.run(
+            f"请完成以下任务：\n"
+            f"1. 使用 'find' 命令找出项目 {test_project} 中所有的 .py 文件\n"
+            f"2. 使用 'grep' 命令搜索所有包含 'TODO' 或 'FIXME' 的行\n"
+            f"3. 读取包含 TODO 的文件内容\n"
+            f"4. 创建一个 TODO 报告文件，保存到 {test_project}/todo_report.txt"
+        )
+
+        assert response, "Agent should return a response"
+        print(f"\n[复杂任务结果]:\n{response}\n")
+
+        # Verify report was created
+        report_file = test_project / "todo_report.txt"
+        if report_file.exists():
+            content = report_file.read_text()
+            print(f"\n[TODO 报告]:\n{content}\n")
+
+    @pytest.mark.asyncio
+    async def test_shell_pipes_and_chains(self, real_provider, test_project):
+        """Test complex shell commands with pipes and chains.
+
+        Task: Use shell pipes to perform complex data processing.
+        """
+        toolset = ConfigurableToolset(
+            config=ToolSuiteConfig(
+                shell_tools=ToolSuiteConfig().shell_tools,
+            ),
+            work_dir=str(test_project),
+        )
+
+        agent = Agent(
+            provider=real_provider,
+            tools=toolset.tools,
+            system_prompt=(
+                "你是一个 Shell 命令专家。"
+                "使用复杂的 Shell 命令（管道、重定向、条件执行等）来完成任务。"
+            ),
+        )
+
+        response = await agent.run(
+            f"在项目目录 {test_project} 中执行以下操作：\n"
+            f"1. 使用 'find . -name \"*.py\" | xargs wc -l' 统计所有 Python 文件的总行数\n"
+            f'2. 使用 \'grep -r "def " --include="*.py" | wc -l\' 统计函数定义数量\n'
+            f"3. 使用 'ls -la' 查看目录详情\n"
+            f"报告你的发现。"
+        )
+
+        assert response, "Agent should return a response"
+        print(f"\n[Shell 管道命令结果]:\n{response}\n")
+
+        # Verify response contains relevant information
+        response_lower = response.lower()
+        assert any(
+            word in response_lower for word in ["行", "line", "函数", "function", "文件", "file"]
+        ), "Response should mention analysis results"
--- a/agentlite/tests/scenarios/test_file_operations.py
+++ b/agentlite/tests/scenarios/test_file_operations.py
@@ -0,0 +1,374 @@
+"""End-to-end scenario test for file operations.
+
+This test simulates a realistic scenario where an agent:
+1. Reads a file
+2. Explains its content
+3. Creates a new file with analysis results
+
+This is a meaningful e2e test that demonstrates the agent's ability to
+orchestrate multiple tool calls in sequence.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from agentlite import Agent, TextPart, tool
+
+
+# =============================================================================
+# File Operation Tools
+# =============================================================================
+
+
+@tool()
+async def read_file(file_path: str) -> str:
+    """Read the content of a file.
+
+    Args:
+        file_path: Path to the file to read.
+
+    Returns:
+        The content of the file as a string.
+
+    Raises:
+        FileNotFoundError: If the file does not exist.
+    """
+    with open(file_path) as f:
+        return f.read()
+
+
+@tool()
+async def write_file(file_path: str, content: str) -> str:
+    """Write content to a file, creating it if it doesn't exist.
+
+    Args:
+        file_path: Path to the file to write.
+        content: Content to write to the file.
+
+    Returns:
+        Success message confirming the file was written.
+    """
+    # Create parent directories if they don't exist
+    Path(file_path).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(file_path, "w") as f:
+        f.write(content)
+
+    return f"File successfully written to {file_path}"
+
+
+@tool()
+async def list_files(directory: str) -> str:
+    """List all files in a directory.
+
+    Args:
+        directory: Path to the directory to list.
+
+    Returns:
+        A newline-separated list of file names in the directory.
+    """
+    files = os.listdir(directory)
+    return "\n".join(files)
+
+
+# =============================================================================
+# E2E Test
+# =============================================================================
+
+
+@pytest.mark.scenario
+class TestFileOperationsScenario:
+    """End-to-end test for file read/write operations."""
+
+    @pytest.mark.asyncio
+    async def test_read_explain_and_write(self, mock_provider):
+        """Test a complete workflow: read file -> explain -> write results."""
+        # Setup: Create a temporary file with content
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create a source file to read
+            source_file = os.path.join(tmpdir, "source.txt")
+            source_content = """Project Overview
+================
+
+This is a sample project document for testing.
+
+Features:
+- Feature A: Does something useful
+- Feature B: Does something else
+- Feature C: The most important feature
+
+Conclusion: This project demonstrates file operations.
+"""
+            with open(source_file, "w") as f:
+                f.write(source_content)
+
+            # Configure mock provider responses
+            # The agent should:
+            # 1. Read the file
+            # 2. Summarize it
+            # 3. Write the summary to a new file
+            mock_provider.add_text_response(
+                f"I'll read the file at {source_file} and analyze it for you."
+            )
+
+            # Create agent with file tools
+            tools = [read_file, write_file, list_files]
+            agent = Agent(
+                provider=mock_provider,
+                tools=tools,
+                system_prompt="You are a helpful file analysis assistant.",
+            )
+
+            # Step 1: Agent reads and analyzes the file
+            mock_provider.clear_responses()
+            mock_provider.add_tool_call(
+                "read_file",
+                {"file_path": source_file},
+                source_content,
+            )
+
+            # Agent analyzes the content
+            mock_provider.add_text_response(
+                "I've read the file. It's a project overview document with 3 features. "
+                "Let me create a summary file."
+            )
+
+            # Step 2: Agent writes summary to a new file
+            summary_file = os.path.join(tmpdir, "summary.txt")
+            expected_summary = """Project Summary
+================
+
+This is a sample project with 3 main features:
+- Feature A, - Feature B, - Feature C
+
+The most important feature is Feature C.
+"""
+
+            mock_provider.clear_responses()
+            mock_provider.add_tool_call(
+                "write_file",
+                {
+                    "file_path": summary_file,
+                    "content": expected_summary,
+                },
+                f"File successfully written to {summary_file}",
+            )
+            mock_provider.add_text_response(f"I've created a summary at {summary_file}")
+
+            # Execute the agent
+            response = await agent.run(
+                f"Please read {source_file}, analyze it, and create a summary file at {summary_file}"
+            )
+
+            # Verify the interaction
+            assert "summary" in response.lower()
+
+            # Verify the provider was called correctly
+            assert len(mock_provider.calls) >= 1
+
+    @pytest.mark.asyncio
+    async def test_list_files_scenario(self, mock_provider):
+        """Test listing files in a directory."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create some test files
+            for i in range(3):
+                with open(os.path.join(tmpdir, f"file{i}.txt"), "w") as f:
+                    f.write(f"Content {i}")
+
+            # Configure agent to list files
+            mock_provider.add_tool_call(
+                "list_files",
+                {"directory": tmpdir},
+                "file0.txt\nfile1.txt\nfile2.txt",
+            )
+            mock_provider.add_text_response(
+                f"I found 3 files in {tmpdir}: file0.txt, file1.txt, file2.txt"
+            )
+
+            agent = Agent(
+                provider=mock_provider,
+                tools=[list_files],
+                system_prompt="You are a file system assistant.",
+            )
+
+            response = await agent.run(f"List all files in {tmpdir}")
+
+            assert "3 files" in response
+
+    @pytest.mark.asyncio
+    async def test_multi_step_file_workflow(self, mock_provider):
+        """Test a complex multi-step file workflow.
+
+        Scenario:
+        1. List files in directory
+        2. Read each file
+        3. Create a combined report
+        """
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create test files
+            files_content = {
+                "report1.txt": "Sales increased by 20%",
+                "report2.txt": "Customer satisfaction at 85%",
+                "report3.txt": "Bug fixes: 15 resolved",
+            }
+
+            for name, content in files_content.items():
+                with open(os.path.join(tmpdir, name), "w") as f:
+                    f.write(content)
+
+            # Configure agent responses for multi-step workflow
+            tools = [read_file, write_file, list_files]
+
+            # Step 1: List files
+            mock_provider.add_tool_call(
+                "list_files",
+                {"directory": tmpdir},
+                "report1.txt\nreport2.txt\nreport3.txt",
+            )
+
+            # Step 2: Read all files
+            mock_provider.add_tool_call(
+                "read_file",
+                {"file_path": os.path.join(tmpdir, "report1.txt")},
+                "Sales increased by 20%",
+            )
+            mock_provider.add_tool_call(
+                "read_file",
+                {"file_path": os.path.join(tmpdir, "report2.txt")},
+                "Customer satisfaction at 85%",
+            )
+            mock_provider.add_tool_call(
+                "read_file",
+                {"file_path": os.path.join(tmpdir, "report3.txt")},
+                "Bug fixes: 15 resolved",
+            )
+
+            # Step 3: Write combined report
+            combined_report = """Combined Report
+================
+
+1. Sales: Increased by 20%
+2. Customer Satisfaction: 85%
+3. Development: 15 bugs resolved
+"""
+            mock_provider.add_tool_call(
+                "write_file",
+                {
+                    "file_path": os.path.join(tmpdir, "combined_report.txt"),
+                    "content": combined_report,
+                },
+                f"File successfully written to {os.path.join(tmpdir, 'combined_report.txt')}",
+            )
+
+            mock_provider.add_text_response(
+                "I've created a combined report summarizing all three reports."
+            )
+
+            agent = Agent(
+                provider=mock_provider,
+                tools=tools,
+                system_prompt="You are a report analyst assistant.",
+            )
+
+            response = await agent.run(
+                f"List all files in {tmpdir}, read them all, and create a combined report at combined_report.txt"
+            )
+
+            assert "combined report" in response.lower()
+
+
+# =============================================================================
+# Additional Tools for Extended Scenarios
+# =============================================================================
+
+
+@tool()
+async def count_words(file_path: str) -> str:
+    """Count the number of words in a file.
+
+    Args:
+        file_path: Path to the file to analyze.
+
+    Returns:
+        The word count as a string.
+    """
+    with open(file_path) as f:
+        content = f.read()
+        word_count = len(content.split())
+        return f"Word count: {word_count}"
+
+
+@tool()
+async def append_to_file(file_path: str, content: str) -> str:
+    """Append content to an existing file.
+
+    Args:
+        file_path: Path to the file to append to.
+        content: Content to append.
+
+    Returns:
+        Success message.
+    """
+    with open(file_path, "a") as f:
+        f.write("\n" + content)
+    return f"Content appended to {file_path}"
+
+
+@pytest.mark.scenario
+class TestExtendedFileOperations:
+    """Extended scenarios with more file operations."""
+
+    @pytest.mark.asyncio
+    async def test_read_count_and_append(self, mock_provider):
+        """Test reading a file, counting words, and appending a note."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            source_file = os.path.join(tmpdir, "document.txt")
+            with open(source_file, "w") as f:
+                f.write("This is a test document with several words in it.")
+
+            tools = [read_file, write_file, count_words, append_to_file]
+
+            # Step 1: Read file
+            mock_provider.add_tool_call(
+                "read_file",
+                {"file_path": source_file},
+                "This is a test document with several words in it.",
+            )
+
+            # Step 2: Count words
+            mock_provider.add_tool_call(
+                "count_words",
+                {"file_path": source_file},
+                "Word count: 10",
+            )
+
+            # Step 3: Append analysis
+            mock_provider.add_tool_call(
+                "append_to_file",
+                {
+                    "file_path": source_file,
+                    "content": "\n\n[Analysis] This document contains 10 words.",
+                },
+                f"Content appended to {source_file}",
+            )
+
+            mock_provider.add_text_response(
+                "I've analyzed the document and appended the word count analysis."
+            )
+
+            agent = Agent(
+                provider=mock_provider,
+                tools=tools,
+                system_prompt="You are a document analysis assistant.",
+            )
+
+            response = await agent.run(
+                f"Read {source_file}, count its words, and append the word count as an analysis note"
+            )
+
+            assert "analyzed" in response.lower()
--- a/agentlite/tests/scenarios/test_file_operations_real_api.py
+++ b/agentlite/tests/scenarios/test_file_operations_real_api.py
@@ -0,0 +1,226 @@
+"""End-to-end scenario test for file operations with real API.
+
+This test simulates a realistic scenario where an agent:
+1. Reads a file
+2. Explains its content
+3. Creates a new file with analysis results
+
+Uses real SiliconFlow qwen3.5-397B API (requires SILICONFLOW_API_KEY env var).
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from agentlite import Agent, OpenAIProvider, tool
+
+# =============================================================================
+# Configuration from model_config.toml
+# =============================================================================
+
+# SiliconFlow API configuration (matches qwen35_397b in model_config.toml)
+SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1"
+SILICONFLOW_MODEL = "Qwen/Qwen3.5-397B-A17B"
+
+
+def get_siliconflow_provider() -> OpenAIProvider | None:
+    """Create OpenAIProvider for SiliconFlow API.
+
+    Returns None if SILICONFLOW_API_KEY is not set.
+    """
+    api_key = os.environ.get("SILICONFLOW_API_KEY")
+    if not api_key:
+        return None
+
+    return OpenAIProvider(
+        api_key=api_key,
+        base_url=SILICONFLOW_BASE_URL,
+        model=SILICONFLOW_MODEL,
+    )
+
+
+# =============================================================================
+# File Operation Tools
+# =============================================================================
+
+
+@tool()
+async def read_file(file_path: str) -> str:
+    """Read the content of a file.
+
+    Args:
+        file_path: Path to the file to read.
+
+    Returns:
+        The content of the file as a string.
+
+    Raises:
+        FileNotFoundError: If the file does not exist.
+    """
+    with open(file_path) as f:
+        return f.read()
+
+
+@tool()
+async def write_file(file_path: str, content: str) -> str:
+    """Write content to a file, creating it if it doesn't exist.
+
+    Args:
+        file_path: Path to the file to write.
+        content: Content to write to the file.
+
+    Returns:
+        Success message confirming the file was written.
+    """
+    # Create parent directories if they don't exist
+    Path(file_path).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(file_path, "w") as f:
+        f.write(content)
+
+    return f"File successfully written to {file_path}"
+
+
+@tool()
+async def list_files(directory: str) -> str:
+    """List all files in a directory.
+
+    Args:
+        directory: Path to the directory to list.
+
+    Returns:
+        A newline-separated list of file names in the directory.
+    """
+    files = os.listdir(directory)
+    return "\n".join(files)
+
+
+# =============================================================================
+# Real API E2E Tests
+# =============================================================================
+
+
+@pytest.fixture
+def real_provider():
+    """Create a real SiliconFlow provider.
+
+    Skip tests if SILICONFLOW_API_KEY is not set.
+    """
+    provider = get_siliconflow_provider()
+    if provider is None:
+        pytest.skip("SILICONFLOW_API_KEY not set, skipping real API tests")
+    return provider
+
+
+@pytest.mark.scenario
+@pytest.mark.expensive
+class TestFileOperationsWithRealAPI:
+    """End-to-end tests with real SiliconFlow API."""
+
+    @pytest.mark.asyncio
+    async def test_read_and_summarize(self, real_provider):
+        """Test reading a file and creating a summary with real API."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create a source file with meaningful content
+            source_file = os.path.join(tmpdir, "source.txt")
+            source_content = """AgentLite 项目概述
+================
+
+AgentLite 是一个轻量级的 Agent 组件库，主要特点:
+- 异步优先设计
+- OpenAI 兼容 API
+- 工具系统 (支持 MCP)
+- 流式响应支持
+
+使用示例:
+```python
+from agentlite import Agent, OpenAIProvider
+
+provider = OpenAIProvider(api_key="...", model="gpt-4")
+agent = Agent(provider=provider)
+response = await agent.run("Hello!")
+```
+"""
+            with open(source_file, "w") as f:
+                f.write(source_content)
+
+            # Create agent with file tools
+            tools = [read_file, write_file, list_files]
+            agent = Agent(
+                provider=real_provider,
+                tools=tools,
+                system_prompt="你是一个文件分析助手。请使用工具来完成任务。",
+            )
+
+            # Run the agent to read, analyze, and write summary
+            output_file = os.path.join(tmpdir, "summary.txt")
+            response = await agent.run(
+                f"请读取 {source_file} 文件，分析其内容，并创建一个摘要文件保存到 {output_file}。"
+            )
+
+            # Verify the agent responded
+            assert response, "Agent should return a response"
+            print(f"\n[Agent 响应]:\n{response}\n")
+
+            # Verify the output file was created
+            if os.path.exists(output_file):
+                with open(output_file) as f:
+                    output_content = f.read()
+                print(f"\n[输出文件内容]:\n{output_content}\n")
+                assert len(output_content) > 0, "Output file should not be empty"
+
+    @pytest.mark.asyncio
+    async def test_list_files_and_combine(self, real_provider):
+        """Test listing files, reading them, and creating combined report."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create multiple files
+            files = {
+                "sales.txt": "销售额增长了 20%",
+                "users.txt": "用户满意度达到 85%",
+                "bugs.txt": "修复了 15 个问题",
+            }
+            for name, content in files.items():
+                with open(os.path.join(tmpdir, name), "w") as f:
+                    f.write(content)
+
+            # Create agent with file tools
+            tools = [read_file, write_file, list_files]
+            agent = Agent(
+                provider=real_provider,
+                tools=tools,
+                system_prompt="你是一个数据分析助手。请使用工具来完成任务。",
+            )
+
+            # Run the agent
+            report_file = os.path.join(tmpdir, "report.txt")
+            response = await agent.run(
+                f"列出 {tmpdir} 目录中的所有文件，读取每个文件的内容，然后创建一份综合报告保存到 {report_file}。"
+            )
+
+            # Verify the agent responded
+            assert response, "Agent should return a response"
+            print(f"\n[Agent 响应]:\n{response}\n")
+
+            # The agent should have created the report file
+            if os.path.exists(report_file):
+                with open(report_file) as f:
+                    report_content = f.read()
+                print(f"\n[报告文件内容]:\n{report_content}\n")
+
+    @pytest.mark.asyncio
+    async def test_simple_conversation(self, real_provider):
+        """Test basic conversation without tools."""
+        agent = Agent(
+            provider=real_provider,
+            system_prompt="你是一个有帮助的助手。请用中文回答。",
+        )
+
+        response = await agent.run("你好！请简单介绍一下你自己。")
+
+        assert response, "Agent should return a response"
+        print(f"\n[Agent 自我介绍]:\n{response}\n")
+        assert len(response) > 10, "Response should be meaningful"