345 lines
11 KiB
Python
345 lines
11 KiB
Python
"""End-to-end test for complex CLI operations with real API.
|
|
|
|
This test simulates a realistic complex CLI task where an agent:
|
|
1. Explores project structure using shell commands
|
|
2. Searches for specific patterns using grep/glob
|
|
3. Reads relevant files
|
|
4. Creates analysis reports
|
|
|
|
Uses real SiliconFlow qwen3.5-397B API (requires SILICONFLOW_API_KEY env var).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from agentlite import Agent, OpenAIProvider
|
|
from agentlite.tools import (
|
|
ConfigurableToolset,
|
|
ToolSuiteConfig,
|
|
)
|
|
|
|
# =============================================================================
|
|
# Configuration from model_config.toml
|
|
# =============================================================================
|
|
|
|
SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1"
|
|
SILICONFLOW_MODEL = "Qwen/Qwen3.5-397B-A17B"
|
|
|
|
|
|
def get_siliconflow_provider() -> OpenAIProvider | None:
|
|
"""Create OpenAIProvider for SiliconFlow API."""
|
|
api_key = os.environ.get("SILICONFLOW_API_KEY")
|
|
if not api_key:
|
|
return None
|
|
return OpenAIProvider(
|
|
api_key=api_key,
|
|
base_url=SILICONFLOW_BASE_URL,
|
|
model=SILICONFLOW_MODEL,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def real_provider():
|
|
"""Create real SiliconFlow provider."""
|
|
provider = get_siliconflow_provider()
|
|
if provider is None:
|
|
pytest.skip("SILICONFLOW_API_KEY not set")
|
|
return provider
|
|
|
|
|
|
@pytest.fixture
|
|
def test_project():
|
|
"""Create a mock project structure for testing."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
project_dir = Path(tmpdir) / "test_project"
|
|
project_dir.mkdir()
|
|
|
|
# Create project structure
|
|
(project_dir / "src").mkdir()
|
|
(project_dir / "src" / "utils").mkdir()
|
|
(project_dir / "tests").mkdir()
|
|
(project_dir / "docs").mkdir()
|
|
|
|
# Create source files
|
|
(project_dir / "src" / "main.py").write_text('''"""Main module."""
|
|
from src.utils.helper import process_data
|
|
from src.utils.logger import setup_logger
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
logger = setup_logger()
|
|
data = [1, 2, 3, 4, 5]
|
|
result = process_data(data)
|
|
logger.info(f"Result: {result}")
|
|
return result
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
''')
|
|
|
|
(project_dir / "src" / "__init__.py").write_text('"""Source package."""')
|
|
|
|
(project_dir / "src" / "utils" / "helper.py").write_text('''"""Helper utilities."""
|
|
def process_data(data: list) -> list:
|
|
"""Process input data."""
|
|
return [x * 2 for x in data]
|
|
|
|
def validate_data(data: list) -> bool:
|
|
"""Validate data format."""
|
|
return all(isinstance(x, (int, float)) for x in data)
|
|
''')
|
|
|
|
(project_dir / "src" / "utils" / "logger.py").write_text('''"""Logging utilities."""
|
|
import logging
|
|
|
|
def setup_logger(name: str = "app") -> logging.Logger:
|
|
"""Setup application logger."""
|
|
logger = logging.getLogger(name)
|
|
logger.setLevel(logging.INFO)
|
|
return logger
|
|
''')
|
|
|
|
(project_dir / "src" / "utils" / "__init__.py").write_text('"""Utils package."""')
|
|
|
|
# Create test files
|
|
(project_dir / "tests" / "test_helper.py").write_text('''"""Tests for helper module."""
|
|
from src.utils.helper import process_data, validate_data
|
|
|
|
def test_process_data():
|
|
assert process_data([1, 2, 3]) == [2, 4, 6]
|
|
|
|
def test_validate_data():
|
|
assert validate_data([1, 2, 3]) == True
|
|
assert validate_data(["a", "b"]) == False
|
|
''')
|
|
|
|
# Create documentation
|
|
(project_dir / "docs" / "README.md").write_text("""# Test Project
|
|
|
|
A sample project for testing CLI operations.
|
|
|
|
## Structure
|
|
|
|
- `src/` - Source code
|
|
- `tests/` - Unit tests
|
|
- `docs/` - Documentation
|
|
""")
|
|
|
|
(project_dir / "README.md").write_text("""# Test Project
|
|
|
|
Simple data processing project.
|
|
|
|
## Usage
|
|
|
|
```bash
|
|
python -m src.main
|
|
```
|
|
""")
|
|
|
|
yield project_dir
|
|
|
|
|
|
@pytest.mark.scenario
|
|
@pytest.mark.slow
|
|
class TestComplexCLITasks:
|
|
"""End-to-end tests with complex CLI operations."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_explore_project_structure(self, real_provider, test_project):
|
|
"""Test exploring project structure using CLI tools.
|
|
|
|
Task: Use shell commands to explore the project structure,
|
|
then summarize what files exist.
|
|
"""
|
|
# Create toolset with Shell tool
|
|
toolset = ConfigurableToolset(
|
|
config=ToolSuiteConfig(
|
|
shell_tools=ToolSuiteConfig().shell_tools,
|
|
),
|
|
work_dir=str(test_project),
|
|
)
|
|
|
|
agent = Agent(
|
|
provider=real_provider,
|
|
tools=toolset.tools,
|
|
system_prompt=(
|
|
"你是一个项目分析助手。使用 Shell 工具执行命令来探索项目结构。"
|
|
"请使用 find、ls、tree 等命令来了解项目。"
|
|
),
|
|
max_iterations=5, # Limit iterations to prevent hanging
|
|
)
|
|
|
|
# Add overall timeout to prevent infinite hanging
|
|
try:
|
|
response = await asyncio.wait_for(
|
|
agent.run(
|
|
f"探索项目目录 {test_project} 的结构,列出所有文件和目录,并总结项目的组织方式。"
|
|
),
|
|
timeout=120.0, # 2 minute overall timeout
|
|
)
|
|
except asyncio.TimeoutError:
|
|
pytest.fail("Agent timed out after 120 seconds - possible infinite loop")
|
|
|
|
assert response, "Agent should return a response"
|
|
print(f"\n[项目结构探索结果]:\n{response}\n")
|
|
|
|
# Verify response mentions key files
|
|
response_lower = response.lower()
|
|
assert any(
|
|
word in response_lower for word in ["src", "tests", "main.py", "helper", "logger"]
|
|
), "Response should mention project files"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_and_analyze_code(self, real_provider, test_project):
|
|
"""Test searching for patterns and analyzing code.
|
|
|
|
Task: Use grep/glob to find specific patterns,
|
|
read the files, and create an analysis report.
|
|
"""
|
|
# Create toolset with all file tools
|
|
toolset = ConfigurableToolset(
|
|
config=ToolSuiteConfig(
|
|
file_tools=ToolSuiteConfig().file_tools,
|
|
shell_tools=ToolSuiteConfig().shell_tools,
|
|
),
|
|
work_dir=str(test_project),
|
|
)
|
|
|
|
agent = Agent(
|
|
provider=real_provider,
|
|
tools=toolset.tools,
|
|
system_prompt=(
|
|
"你是一个代码分析助手。使用 Glob、Grep、ReadFile 等工具来搜索和分析代码。"
|
|
"请使用 Shell 工具执行 grep、find 等命令。"
|
|
),
|
|
)
|
|
|
|
response = await agent.run(
|
|
f"在项目 {test_project} 中搜索所有包含 'def ' 的 Python 文件,"
|
|
f"列出找到的函数定义,并创建一个函数清单文件保存到 {test_project}/functions.txt。"
|
|
)
|
|
|
|
assert response, "Agent should return a response"
|
|
print(f"\n[代码搜索分析结果]:\n{response}\n")
|
|
|
|
# Check if analysis file was created
|
|
functions_file = test_project / "functions.txt"
|
|
if functions_file.exists():
|
|
content = functions_file.read_text()
|
|
print(f"\n[函数清单文件]:\n{content}\n")
|
|
assert len(content) > 0, "Functions file should not be empty"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complex_multi_step_task(self, real_provider, test_project):
|
|
"""Test a complex multi-step CLI task.
|
|
|
|
Task:
|
|
1. Find all Python files using shell
|
|
2. Search for TODO comments using grep
|
|
3. Read files with TODOs
|
|
4. Create a summary report
|
|
"""
|
|
# Add some TODO comments
|
|
todo_file = test_project / "src" / "utils" / "todo_items.py"
|
|
todo_file.write_text('''"""Module with TODO items."""
|
|
|
|
# TODO: Implement error handling
|
|
def risky_operation(data):
|
|
"""Perform a risky operation."""
|
|
return data / 0 # This will fail
|
|
|
|
# TODO: Add caching mechanism
|
|
def expensive_computation(n):
|
|
"""Perform expensive computation."""
|
|
return sum(range(n))
|
|
|
|
# FIXME: Memory leak in this function
|
|
def process_large_file(path):
|
|
"""Process a large file."""
|
|
with open(path) as f:
|
|
return f.read()
|
|
''')
|
|
|
|
# Create comprehensive toolset
|
|
toolset = ConfigurableToolset(
|
|
config=ToolSuiteConfig(
|
|
file_tools=ToolSuiteConfig().file_tools,
|
|
shell_tools=ToolSuiteConfig().shell_tools,
|
|
),
|
|
work_dir=str(test_project),
|
|
)
|
|
|
|
agent = Agent(
|
|
provider=real_provider,
|
|
tools=toolset.tools,
|
|
system_prompt=(
|
|
"你是一个项目维护助手。"
|
|
"使用 Shell 工具执行命令(如 find、grep、ls 等)。"
|
|
"使用 ReadFile 读取文件内容。"
|
|
"使用 WriteFile 创建新文件。"
|
|
"请一步一步完成任务。"
|
|
),
|
|
)
|
|
|
|
response = await agent.run(
|
|
f"请完成以下任务:\n"
|
|
f"1. 使用 'find' 命令找出项目 {test_project} 中所有的 .py 文件\n"
|
|
f"2. 使用 'grep' 命令搜索所有包含 'TODO' 或 'FIXME' 的行\n"
|
|
f"3. 读取包含 TODO 的文件内容\n"
|
|
f"4. 创建一个 TODO 报告文件,保存到 {test_project}/todo_report.txt"
|
|
)
|
|
|
|
assert response, "Agent should return a response"
|
|
print(f"\n[复杂任务结果]:\n{response}\n")
|
|
|
|
# Verify report was created
|
|
report_file = test_project / "todo_report.txt"
|
|
if report_file.exists():
|
|
content = report_file.read_text()
|
|
print(f"\n[TODO 报告]:\n{content}\n")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_shell_pipes_and_chains(self, real_provider, test_project):
|
|
"""Test complex shell commands with pipes and chains.
|
|
|
|
Task: Use shell pipes to perform complex data processing.
|
|
"""
|
|
toolset = ConfigurableToolset(
|
|
config=ToolSuiteConfig(
|
|
shell_tools=ToolSuiteConfig().shell_tools,
|
|
),
|
|
work_dir=str(test_project),
|
|
)
|
|
|
|
agent = Agent(
|
|
provider=real_provider,
|
|
tools=toolset.tools,
|
|
system_prompt=(
|
|
"你是一个 Shell 命令专家。"
|
|
"使用复杂的 Shell 命令(管道、重定向、条件执行等)来完成任务。"
|
|
),
|
|
)
|
|
|
|
response = await agent.run(
|
|
f"在项目目录 {test_project} 中执行以下操作:\n"
|
|
f"1. 使用 'find . -name \"*.py\" | xargs wc -l' 统计所有 Python 文件的总行数\n"
|
|
f'2. 使用 \'grep -r "def " --include="*.py" | wc -l\' 统计函数定义数量\n'
|
|
f"3. 使用 'ls -la' 查看目录详情\n"
|
|
f"报告你的发现。"
|
|
)
|
|
|
|
assert response, "Agent should return a response"
|
|
print(f"\n[Shell 管道命令结果]:\n{response}\n")
|
|
|
|
# Verify response contains relevant information
|
|
response_lower = response.lower()
|
|
assert any(
|
|
word in response_lower for word in ["行", "line", "函数", "function", "文件", "file"]
|
|
), "Response should mention analysis results"
|