feat: add a subagent frame
This commit is contained in:
349
agentlite/tests/scenarios/test_cli_operations_real_api.py
Normal file
349
agentlite/tests/scenarios/test_cli_operations_real_api.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""End-to-end test for complex CLI operations with real API.
|
||||
|
||||
This test simulates a realistic complex CLI task where an agent:
|
||||
1. Explores project structure using shell commands
|
||||
2. Searches for specific patterns using grep/glob
|
||||
3. Reads relevant files
|
||||
4. Creates analysis reports
|
||||
|
||||
Uses real SiliconFlow qwen3.5-397B API (requires SILICONFLOW_API_KEY env var).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from agentlite import Agent, OpenAIProvider
|
||||
from agentlite.tools import (
|
||||
ConfigurableToolset,
|
||||
ToolSuiteConfig,
|
||||
Shell,
|
||||
ReadFile,
|
||||
WriteFile,
|
||||
Glob,
|
||||
Grep,
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# Configuration from model_config.toml
|
||||
# =============================================================================
|
||||
|
||||
SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1"
|
||||
SILICONFLOW_MODEL = "Qwen/Qwen3.5-397B-A17B"
|
||||
|
||||
|
||||
def get_siliconflow_provider() -> OpenAIProvider | None:
|
||||
"""Create OpenAIProvider for SiliconFlow API."""
|
||||
api_key = os.environ.get("SILICONFLOW_API_KEY")
|
||||
if not api_key:
|
||||
return None
|
||||
return OpenAIProvider(
|
||||
api_key=api_key,
|
||||
base_url=SILICONFLOW_BASE_URL,
|
||||
model=SILICONFLOW_MODEL,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def real_provider():
|
||||
"""Create real SiliconFlow provider."""
|
||||
provider = get_siliconflow_provider()
|
||||
if provider is None:
|
||||
pytest.skip("SILICONFLOW_API_KEY not set")
|
||||
return provider
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_project():
|
||||
"""Create a mock project structure for testing."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
project_dir = Path(tmpdir) / "test_project"
|
||||
project_dir.mkdir()
|
||||
|
||||
# Create project structure
|
||||
(project_dir / "src").mkdir()
|
||||
(project_dir / "src" / "utils").mkdir()
|
||||
(project_dir / "tests").mkdir()
|
||||
(project_dir / "docs").mkdir()
|
||||
|
||||
# Create source files
|
||||
(project_dir / "src" / "main.py").write_text('''"""Main module."""
|
||||
from src.utils.helper import process_data
|
||||
from src.utils.logger import setup_logger
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
logger = setup_logger()
|
||||
data = [1, 2, 3, 4, 5]
|
||||
result = process_data(data)
|
||||
logger.info(f"Result: {result}")
|
||||
return result
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
''')
|
||||
|
||||
(project_dir / "src" / "__init__.py").write_text('"""Source package."""')
|
||||
|
||||
(project_dir / "src" / "utils" / "helper.py").write_text('''"""Helper utilities."""
|
||||
def process_data(data: list) -> list:
|
||||
"""Process input data."""
|
||||
return [x * 2 for x in data]
|
||||
|
||||
def validate_data(data: list) -> bool:
|
||||
"""Validate data format."""
|
||||
return all(isinstance(x, (int, float)) for x in data)
|
||||
''')
|
||||
|
||||
(project_dir / "src" / "utils" / "logger.py").write_text('''"""Logging utilities."""
|
||||
import logging
|
||||
|
||||
def setup_logger(name: str = "app") -> logging.Logger:
|
||||
"""Setup application logger."""
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.INFO)
|
||||
return logger
|
||||
''')
|
||||
|
||||
(project_dir / "src" / "utils" / "__init__.py").write_text('"""Utils package."""')
|
||||
|
||||
# Create test files
|
||||
(project_dir / "tests" / "test_helper.py").write_text('''"""Tests for helper module."""
|
||||
from src.utils.helper import process_data, validate_data
|
||||
|
||||
def test_process_data():
|
||||
assert process_data([1, 2, 3]) == [2, 4, 6]
|
||||
|
||||
def test_validate_data():
|
||||
assert validate_data([1, 2, 3]) == True
|
||||
assert validate_data(["a", "b"]) == False
|
||||
''')
|
||||
|
||||
# Create documentation
|
||||
(project_dir / "docs" / "README.md").write_text("""# Test Project
|
||||
|
||||
A sample project for testing CLI operations.
|
||||
|
||||
## Structure
|
||||
|
||||
- `src/` - Source code
|
||||
- `tests/` - Unit tests
|
||||
- `docs/` - Documentation
|
||||
""")
|
||||
|
||||
(project_dir / "README.md").write_text("""# Test Project
|
||||
|
||||
Simple data processing project.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
python -m src.main
|
||||
```
|
||||
""")
|
||||
|
||||
yield project_dir
|
||||
|
||||
|
||||
@pytest.mark.scenario
|
||||
@pytest.mark.slow
|
||||
class TestComplexCLITasks:
|
||||
"""End-to-end tests with complex CLI operations."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_explore_project_structure(self, real_provider, test_project):
|
||||
"""Test exploring project structure using CLI tools.
|
||||
|
||||
Task: Use shell commands to explore the project structure,
|
||||
then summarize what files exist.
|
||||
"""
|
||||
# Create toolset with Shell tool
|
||||
toolset = ConfigurableToolset(
|
||||
config=ToolSuiteConfig(
|
||||
shell_tools=ToolSuiteConfig().shell_tools,
|
||||
),
|
||||
work_dir=str(test_project),
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
provider=real_provider,
|
||||
tools=toolset.tools,
|
||||
system_prompt=(
|
||||
"你是一个项目分析助手。使用 Shell 工具执行命令来探索项目结构。"
|
||||
"请使用 find、ls、tree 等命令来了解项目。"
|
||||
),
|
||||
max_iterations=5, # Limit iterations to prevent hanging
|
||||
)
|
||||
|
||||
# Add overall timeout to prevent infinite hanging
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
agent.run(
|
||||
f"探索项目目录 {test_project} 的结构,列出所有文件和目录,并总结项目的组织方式。"
|
||||
),
|
||||
timeout=120.0, # 2 minute overall timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
pytest.fail("Agent timed out after 120 seconds - possible infinite loop")
|
||||
|
||||
assert response, "Agent should return a response"
|
||||
print(f"\n[项目结构探索结果]:\n{response}\n")
|
||||
|
||||
# Verify response mentions key files
|
||||
response_lower = response.lower()
|
||||
assert any(
|
||||
word in response_lower for word in ["src", "tests", "main.py", "helper", "logger"]
|
||||
), "Response should mention project files"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_and_analyze_code(self, real_provider, test_project):
|
||||
"""Test searching for patterns and analyzing code.
|
||||
|
||||
Task: Use grep/glob to find specific patterns,
|
||||
read the files, and create an analysis report.
|
||||
"""
|
||||
# Create toolset with all file tools
|
||||
toolset = ConfigurableToolset(
|
||||
config=ToolSuiteConfig(
|
||||
file_tools=ToolSuiteConfig().file_tools,
|
||||
shell_tools=ToolSuiteConfig().shell_tools,
|
||||
),
|
||||
work_dir=str(test_project),
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
provider=real_provider,
|
||||
tools=toolset.tools,
|
||||
system_prompt=(
|
||||
"你是一个代码分析助手。使用 Glob、Grep、ReadFile 等工具来搜索和分析代码。"
|
||||
"请使用 Shell 工具执行 grep、find 等命令。"
|
||||
),
|
||||
)
|
||||
|
||||
response = await agent.run(
|
||||
f"在项目 {test_project} 中搜索所有包含 'def ' 的 Python 文件,"
|
||||
f"列出找到的函数定义,并创建一个函数清单文件保存到 {test_project}/functions.txt。"
|
||||
)
|
||||
|
||||
assert response, "Agent should return a response"
|
||||
print(f"\n[代码搜索分析结果]:\n{response}\n")
|
||||
|
||||
# Check if analysis file was created
|
||||
functions_file = test_project / "functions.txt"
|
||||
if functions_file.exists():
|
||||
content = functions_file.read_text()
|
||||
print(f"\n[函数清单文件]:\n{content}\n")
|
||||
assert len(content) > 0, "Functions file should not be empty"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complex_multi_step_task(self, real_provider, test_project):
|
||||
"""Test a complex multi-step CLI task.
|
||||
|
||||
Task:
|
||||
1. Find all Python files using shell
|
||||
2. Search for TODO comments using grep
|
||||
3. Read files with TODOs
|
||||
4. Create a summary report
|
||||
"""
|
||||
# Add some TODO comments
|
||||
todo_file = test_project / "src" / "utils" / "todo_items.py"
|
||||
todo_file.write_text('''"""Module with TODO items."""
|
||||
|
||||
# TODO: Implement error handling
|
||||
def risky_operation(data):
|
||||
"""Perform a risky operation."""
|
||||
return data / 0 # This will fail
|
||||
|
||||
# TODO: Add caching mechanism
|
||||
def expensive_computation(n):
|
||||
"""Perform expensive computation."""
|
||||
return sum(range(n))
|
||||
|
||||
# FIXME: Memory leak in this function
|
||||
def process_large_file(path):
|
||||
"""Process a large file."""
|
||||
with open(path) as f:
|
||||
return f.read()
|
||||
''')
|
||||
|
||||
# Create comprehensive toolset
|
||||
toolset = ConfigurableToolset(
|
||||
config=ToolSuiteConfig(
|
||||
file_tools=ToolSuiteConfig().file_tools,
|
||||
shell_tools=ToolSuiteConfig().shell_tools,
|
||||
),
|
||||
work_dir=str(test_project),
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
provider=real_provider,
|
||||
tools=toolset.tools,
|
||||
system_prompt=(
|
||||
"你是一个项目维护助手。"
|
||||
"使用 Shell 工具执行命令(如 find、grep、ls 等)。"
|
||||
"使用 ReadFile 读取文件内容。"
|
||||
"使用 WriteFile 创建新文件。"
|
||||
"请一步一步完成任务。"
|
||||
),
|
||||
)
|
||||
|
||||
response = await agent.run(
|
||||
f"请完成以下任务:\n"
|
||||
f"1. 使用 'find' 命令找出项目 {test_project} 中所有的 .py 文件\n"
|
||||
f"2. 使用 'grep' 命令搜索所有包含 'TODO' 或 'FIXME' 的行\n"
|
||||
f"3. 读取包含 TODO 的文件内容\n"
|
||||
f"4. 创建一个 TODO 报告文件,保存到 {test_project}/todo_report.txt"
|
||||
)
|
||||
|
||||
assert response, "Agent should return a response"
|
||||
print(f"\n[复杂任务结果]:\n{response}\n")
|
||||
|
||||
# Verify report was created
|
||||
report_file = test_project / "todo_report.txt"
|
||||
if report_file.exists():
|
||||
content = report_file.read_text()
|
||||
print(f"\n[TODO 报告]:\n{content}\n")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shell_pipes_and_chains(self, real_provider, test_project):
|
||||
"""Test complex shell commands with pipes and chains.
|
||||
|
||||
Task: Use shell pipes to perform complex data processing.
|
||||
"""
|
||||
toolset = ConfigurableToolset(
|
||||
config=ToolSuiteConfig(
|
||||
shell_tools=ToolSuiteConfig().shell_tools,
|
||||
),
|
||||
work_dir=str(test_project),
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
provider=real_provider,
|
||||
tools=toolset.tools,
|
||||
system_prompt=(
|
||||
"你是一个 Shell 命令专家。"
|
||||
"使用复杂的 Shell 命令(管道、重定向、条件执行等)来完成任务。"
|
||||
),
|
||||
)
|
||||
|
||||
response = await agent.run(
|
||||
f"在项目目录 {test_project} 中执行以下操作:\n"
|
||||
f"1. 使用 'find . -name \"*.py\" | xargs wc -l' 统计所有 Python 文件的总行数\n"
|
||||
f'2. 使用 \'grep -r "def " --include="*.py" | wc -l\' 统计函数定义数量\n'
|
||||
f"3. 使用 'ls -la' 查看目录详情\n"
|
||||
f"报告你的发现。"
|
||||
)
|
||||
|
||||
assert response, "Agent should return a response"
|
||||
print(f"\n[Shell 管道命令结果]:\n{response}\n")
|
||||
|
||||
# Verify response contains relevant information
|
||||
response_lower = response.lower()
|
||||
assert any(
|
||||
word in response_lower for word in ["行", "line", "函数", "function", "文件", "file"]
|
||||
), "Response should mention analysis results"
|
||||
Reference in New Issue
Block a user