Integration Testing Strategies
Warning
🚨 CRITICAL: NO MOCKS IN INTEGRATION TESTS
Integration tests MUST use real systems, real APIs, and real OpenTelemetry components. Any test that uses mocking (unittest.mock, @patch, Mock()) belongs in tests/unit/, not tests/integration/.
Why: Mocked integration tests create false security and miss critical bugs like the ProxyTracerProvider issue.
Note
Problem-solving guide for integration testing HoneyHive SDK components
Practical solutions for testing how SDK components work together and integrate with real external systems.
Integration testing verifies that different parts of the HoneyHive SDK work correctly together and integrate properly with real external systems like OpenAI, Anthropic, and HoneyHive APIs using actual API calls and real OpenTelemetry components.
Quick Start
Problem: I need to test my complete HoneyHive integration workflow.
Solution:
import pytest
import os
from honeyhive import HoneyHiveTracer
from honeyhive.api.client import HoneyHive
@pytest.mark.integration
def test_complete_workflow():
"""Test complete tracer + API client workflow."""
# Skip if no real API credentials
api_key = os.getenv("HH_API_KEY")
if not api_key:
pytest.skip("Real API credentials required for integration tests")
# Initialize tracer with real API
tracer = HoneyHiveTracer.init(
api_key=api_key, # Or set HH_API_KEY environment variable
project="test-project", # Or set HH_PROJECT environment variable
test_mode=False # Real integration test (or set HH_TEST_MODE=false)
)
# Initialize API client with real API
client = HoneyHive(
api_key=api_key,
test_mode=False # Real integration test
)
# Test tracer + client integration
with tracer.trace("integration-test") as span:
span.set_attribute("test.type", "integration")
# Test session creation via client
session = client.sessions.create( session_name="test-session"
)
span.set_attribute("session.id", session.session_id)
assert session is not None
assert tracer.session_id is not None
Testing Component Interactions
Problem: Test how tracer and API client work together.
Solution:
import pytest
import os
from honeyhive import HoneyHiveTracer
from honeyhive.api.client import HoneyHive
class TestTracerApiIntegration:
"""Test tracer and API client integration."""
@pytest.fixture
def integration_setup(self):
"""Setup tracer and client for integration testing."""
api_key = os.getenv("HH_API_KEY")
if not api_key:
pytest.skip("Real API credentials required for integration tests")
tracer = HoneyHiveTracer.init(
api_key=api_key,
test_mode=False # Real integration test
)
client = HoneyHive(
api_key=api_key,
test_mode=False # Real integration test
)
return {"tracer": tracer, "client": client}
def test_session_creation_integration(self, integration_setup):
"""Test session creation through both tracer and client."""
tracer = integration_setup["tracer"]
client = integration_setup["client"]
# Tracer should have created a session
assert tracer.session_id is not None
# Client should be able to retrieve session info
session_info = client.sessions.get(tracer.session_id)
assert session_info is not None
assert session_info.session_id == tracer.session_id
def test_event_creation_integration(self, integration_setup):
"""Test event creation through tracer and retrieval via client."""
tracer = integration_setup["tracer"]
client = integration_setup["client"]
# Create event through tracer
with tracer.trace("integration-event", event_type="test") as span:
span.set_attribute("test.data", "integration-value")
event_id = span.event_id # If available
# Retrieve event through client (if event_id available)
if hasattr(span, 'event_id') and span.event_id:
event = client.events.get(span.event_id)
assert event is not None
assert event.event_type == "test"
def test_project_consistency(self, integration_setup):
"""Test project consistency between tracer and client."""
tracer = integration_setup["tracer"]
client = integration_setup["client"]
# Both should reference the same project
assert tracer.project == "integration-test-project"
# Client should be able to access project info
projects = client.projects.list()
project_names = [p.name for p in projects]
assert "integration-test-project" in project_names
Testing Multi-Instance Patterns
Problem: Test multiple tracer instances working together.
Solution:
import pytest
import threading
import time
from honeyhive import HoneyHiveTracer
class TestMultiInstanceIntegration:
"""Test multiple tracer instances working together."""
def test_independent_sessions(self):
"""Test that multiple tracers create independent sessions."""
tracer1 = HoneyHiveTracer.init(
api_key="test-key-1", source="development"
test_mode=True
)
tracer2 = HoneyHiveTracer.init(
api_key="test-key-2", source="development"
test_mode=True
)
# Verify independence
assert tracer1.session_id != tracer2.session_id
assert tracer1.project != tracer2.project
assert tracer1.source != tracer2.source
def test_concurrent_tracing(self):
"""Test concurrent tracing with multiple instances."""
tracers = []
results = []
# Create multiple tracers
for i in range(3):
tracer = HoneyHiveTracer.init(
api_key=f"concurrent-key-{i}", test_mode=True
)
tracers.append(tracer)
def worker(tracer, worker_id):
"""Worker function for concurrent testing."""
with tracer.trace(f"concurrent-operation-{worker_id}") as span:
span.set_attribute("worker.id", worker_id)
span.set_attribute("tracer.project", tracer.project)
time.sleep(0.1) # Simulate work
results.append({
"worker_id": worker_id,
"session_id": tracer.session_id,
"project": tracer.project
})
# Start concurrent workers
threads = []
for i, tracer in enumerate(tracers):
thread = threading.Thread(target=worker, args=(tracer, i))
threads.append(thread)
thread.start()
# Wait for completion
for thread in threads:
thread.join()
# Verify results
assert len(results) == 3
session_ids = [r["session_id"] for r in results]
assert len(set(session_ids)) == 3 # All unique
projects = [r["project"] for r in results]
assert len(set(projects)) == 3 # All unique
def test_shared_instrumentor_integration(self):
"""Test multiple tracers with shared instrumentors."""
from openinference.instrumentation.openai import OpenAIInstrumentor
# Create instrumentor instance
instrumentor = OpenAIInstrumentor()
# Create tracers with shared instrumentor
# Step 1: Initialize tracers first (without instrumentors)
tracer1 = HoneyHiveTracer.init(
api_key="shared-key-1", # Unique API key for tracer1
project="shared-project-1", # Unique project for tracer1
test_mode=True # Or set HH_TEST_MODE=true
)
tracer2 = HoneyHiveTracer.init(
api_key="shared-key-2", # Unique API key for tracer2
project="shared-project-2", # Unique project for tracer2
test_mode=True # Or set HH_TEST_MODE=true
)
# Step 2: Initialize shared instrumentor with both tracer providers
instrumentor.instrument(tracer_provider=tracer1.provider)
instrumentor.instrument(tracer_provider=tracer2.provider)
# Both should have the instrumentor
assert len(tracer1.instrumentors) > 0
assert len(tracer2.instrumentors) > 0
assert any(isinstance(i, OpenAIInstrumentor) for i in tracer1.instrumentors)
assert any(isinstance(i, OpenAIInstrumentor) for i in tracer2.instrumentors)
Testing LLM Provider Integration
Problem: Test integration with LLM providers like OpenAI and Anthropic.
Solution:
import pytest
from unittest.mock import Mock, patch
from honeyhive import HoneyHiveTracer
from openinference.instrumentation.openai import OpenAIInstrumentor
class TestLLMProviderIntegration:
"""Test integration with LLM providers."""
@pytest.fixture
def instrumented_tracer(self):
"""Create tracer with LLM instrumentors."""
# Step 1: Initialize HoneyHive tracer first (without instrumentors)
tracer = HoneyHiveTracer.init(
api_key="llm-test-key", # Or set HH_API_KEY environment variable
project="llm-test-project", # Or set HH_PROJECT environment variable
test_mode=True # Or set HH_TEST_MODE=true
)
# Step 2: Initialize instrumentor separately with tracer_provider
openai_instrumentor = OpenAIInstrumentor()
openai_instrumentor.instrument(tracer_provider=tracer.provider)
return tracer
@patch('openai.chat.completions.create')
def test_openai_integration(self, mock_create, instrumented_tracer):
"""Test OpenAI integration with tracing."""
# Mock OpenAI response
mock_response = Mock()
mock_response.choices = [Mock()]
mock_response.choices[0].message.content = "Test response"
mock_response.usage = Mock()
mock_response.usage.total_tokens = 50
mock_create.return_value = mock_response
# Test with instrumentor
import openai
client = openai.OpenAI(api_key="test-key")
with instrumented_tracer.trace("openai-test") as span:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Test"}]
)
span.set_attribute("openai.model", "gpt-3.5-turbo")
span.set_attribute("openai.response", response.choices[0].message.content)
assert response.choices[0].message.content == "Test response"
mock_create.assert_called_once()
@patch('anthropic.messages.create')
def test_anthropic_integration(self, mock_create, instrumented_tracer):
"""Test Anthropic integration with tracing."""
# Mock Anthropic response
mock_response = Mock()
mock_response.content = [Mock()]
mock_response.content[0].text = "Anthropic test response"
mock_response.usage = Mock()
mock_response.usage.input_tokens = 10
mock_response.usage.output_tokens = 15
mock_create.return_value = mock_response
import anthropic
client = anthropic.Anthropic(api_key="test-key")
with instrumented_tracer.trace("anthropic-test") as span:
response = client.messages.create(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Test"}],
max_tokens=100
)
span.set_attribute("anthropic.model", "claude-3-sonnet-20240229")
span.set_attribute("anthropic.response", response.content[0].text)
assert response.content[0].text == "Anthropic test response"
mock_create.assert_called_once()
Testing Real API Integration
Problem: Test integration with real HoneyHive APIs.
Solution:
import pytest
import os
from honeyhive import HoneyHiveTracer
from honeyhive.api.client import HoneyHive
@pytest.mark.integration
class TestRealAPIIntegration:
"""Test integration with real HoneyHive API endpoints."""
@pytest.fixture(autouse=True)
def setup_integration(self):
"""Setup real API credentials."""
self.api_key = os.getenv("HH_INTEGRATION_API_KEY")
self.project = os.getenv("HH_INTEGRATION_PROJECT", "integration-test")
if not self.api_key:
pytest.skip("Real API credentials not available")
self.tracer = HoneyHiveTracer.init(
api_key=self.api_key, source="development"
test_mode=False # Use real API
)
self.client = HoneyHive(
api_key=self.api_key,
test_mode=False
)
def test_real_session_creation(self):
"""Test creating real session via tracer."""
# Tracer should have created a real session
assert self.tracer.session_id is not None
# Verify session exists via API client
try:
session = self.client.sessions.get(self.tracer.session_id)
assert session is not None
assert session.project == self.project
except Exception as e:
pytest.skip(f"Session verification failed: {e}")
def test_real_event_creation(self):
"""Test creating real events."""
with self.tracer.trace("real-integration-test") as span:
span.set_attribute("test.type", "integration")
span.set_attribute("api.project", self.project)
# Add some realistic test data
span.set_attribute("llm.model", "gpt-3.5-turbo")
span.set_attribute("llm.tokens", 42)
# Force flush to ensure delivery
flush_success = self.tracer.force_flush(timeout_millis=5000)
assert flush_success, "Failed to flush traces to real API"
def test_real_project_integration(self):
"""Test project-level integration."""
# List projects via client
projects = self.client.projects.list()
project_names = [p.name for p in projects]
# Integration test project should exist
assert self.project in project_names
# Get project details
project = self.client.projects.get(self.project)
assert project is not None
assert project.name == self.project
def test_real_evaluation_integration(self):
"""Test evaluation integration with real API."""
from honeyhive.evaluation import evaluate
@evaluate(
tracer=self.tracer,
evaluator_names=["accuracy", "relevance"]
)
def test_llm_function(prompt):
return f"Response to: {prompt}"
# Run evaluation
result = test_llm_function("Integration test prompt")
assert result == "Response to: Integration test prompt"
# Evaluation results should be sent to real API
Testing Environment Integration
Problem: Test integration across different environments.
Solution:
import pytest
import os
from honeyhive import HoneyHiveTracer
class TestEnvironmentIntegration:
"""Test integration across different environments."""
def test_development_environment(self):
"""Test development environment integration."""
os.environ["HH_ENVIRONMENT"] = "development"
os.environ["HH_TEST_MODE"] = "true"
try:
tracer = HoneyHiveTracer.init(
api_key="dev-test-key" )
with tracer.trace("dev-test") as span:
span.set_attribute("env", "development")
span.set_attribute("test_mode", True)
assert tracer.test_mode is True
finally:
del os.environ["HH_ENVIRONMENT"]
del os.environ["HH_TEST_MODE"]
def test_staging_environment(self):
"""Test staging environment integration."""
os.environ["HH_ENVIRONMENT"] = "staging"
os.environ["HH_TEST_MODE"] = "false"
try:
tracer = HoneyHiveTracer.init(
api_key=os.getenv("HH_STAGING_API_KEY", "staging-key") )
with tracer.trace("staging-test") as span:
span.set_attribute("env", "staging")
span.set_attribute("test_mode", False)
# In staging, might use real API
assert tracer.api_key is not None
finally:
del os.environ["HH_ENVIRONMENT"]
del os.environ["HH_TEST_MODE"]
def test_production_environment(self):
"""Test production environment configuration."""
os.environ["HH_ENVIRONMENT"] = "production"
try:
# Production should require real credentials
if not os.getenv("HH_PROD_API_KEY"):
pytest.skip("Production credentials not available")
tracer = HoneyHiveTracer.init(
api_key=os.getenv("HH_PROD_API_KEY"), test_mode=False # Never test mode in production
)
# Production tracer should be configured conservatively
assert tracer.test_mode is False
assert tracer.api_key.startswith("hh_") # Real API key format
finally:
del os.environ["HH_ENVIRONMENT"]
Testing Error Scenarios Integration
Problem: Test how components handle errors together.
Solution:
import pytest
from unittest.mock import patch, Mock
from honeyhive import HoneyHiveTracer
from honeyhive.api.client import HoneyHive
class TestErrorIntegration:
"""Test error handling across integrated components."""
def test_api_unavailable_graceful_degradation(self):
"""Test graceful degradation when API is unavailable."""
with patch('requests.post') as mock_post:
# Simulate API unavailability
mock_post.side_effect = Exception("API unavailable")
# Tracer should still work in degraded mode
tracer = HoneyHiveTracer.init(
api_key="test-key", test_mode=False # Try to use real API
)
# Tracing operations should not fail
with tracer.trace("degraded-operation") as span:
span.set_attribute("degraded", True)
# Should complete without raising exceptions
# Verify degraded mode behavior
assert tracer is not None
def test_network_timeout_handling(self):
"""Test network timeout handling."""
import requests
with patch('requests.post') as mock_post:
# Simulate network timeout
mock_post.side_effect = requests.Timeout("Request timeout")
tracer = HoneyHiveTracer.init(
api_key="timeout-test-key", test_mode=False
)
# Operations should handle timeouts gracefully
with tracer.trace("timeout-test") as span:
span.set_attribute("network.timeout", True)
# Should not block or raise unhandled exceptions
def test_invalid_credentials_handling(self):
"""Test handling of invalid credentials."""
with patch('requests.post') as mock_post:
# Simulate authentication failure
mock_response = Mock()
mock_response.status_code = 401
mock_response.json.return_value = {"error": "Invalid API key"}
mock_post.return_value = mock_response
tracer = HoneyHiveTracer.init(
api_key="invalid-key", test_mode=False
)
# Should handle auth failures gracefully
with tracer.trace("auth-failure-test") as span:
span.set_attribute("auth.failed", True)
def test_partial_failure_resilience(self):
"""Test resilience to partial system failures."""
# Test scenario where some operations succeed and others fail
with patch('honeyhive.api.client.HoneyHive.sessions.create') as mock_session:
# Session creation fails
mock_session.side_effect = Exception("Session creation failed")
# But tracer should still work locally
tracer = HoneyHiveTracer.init(
api_key="partial-failure-key", test_mode=False
)
# Local tracing should still work
with tracer.trace("partial-failure-operation") as span:
span.set_attribute("partial.failure", True)
# Should complete successfully
Testing Configuration Integration
Problem: Test how configuration works across components.
Solution:
import pytest
import os
import tempfile
import json
from honeyhive import HoneyHiveTracer
from honeyhive.api.client import HoneyHive
class TestConfigurationIntegration:
"""Test configuration integration across components."""
def test_environment_variable_consistency(self):
"""Test that all components respect environment variables."""
os.environ.update({
"HH_API_KEY": "env-integration-key",
"HH_PROJECT": "env-integration-project",
"HH_SOURCE": "env-integration-source",
"HH_BASE_URL": "https://api-test.honeyhive.ai",
"HH_TEST_MODE": "true"
})
try:
# Both tracer and client should use env vars
tracer = HoneyHiveTracer.init()
client = HoneyHive()
assert tracer.api_key == "env-integration-key"
assert tracer.project == "env-integration-project"
assert tracer.source == "env-integration-source"
assert tracer.test_mode is True
assert client.api_key == "env-integration-key"
assert client.base_url == "https://api-test.honeyhive.ai"
assert client.test_mode is True
finally:
# Clean up
for key in ["HH_API_KEY", "HH_PROJECT", "HH_SOURCE", "HH_BASE_URL", "HH_TEST_MODE"]:
del os.environ[key]
def test_explicit_override_precedence(self):
"""Test that explicit parameters override environment variables."""
os.environ.update({
"HH_API_KEY": "env-key",
"HH_PROJECT": "env-project"
})
try:
tracer = HoneyHiveTracer.init(
api_key="explicit-key", # Should override env )
assert tracer.api_key == "explicit-key"
assert tracer.project == "explicit-project"
finally:
del os.environ["HH_API_KEY"]
del os.environ["HH_PROJECT"]
def test_configuration_validation_integration(self):
"""Test configuration validation across components."""
# Test invalid configuration combinations
with pytest.raises(ValueError):
HoneyHiveTracer.init(
api_key="", # Invalid: empty API key )
with pytest.raises(ValueError):
HoneyHive(
api_key="valid-key",
base_url="" # Invalid: empty base URL
)
Testing Performance Integration
Problem: Test performance characteristics of integrated components.
Solution:
import time
import statistics
from honeyhive import HoneyHiveTracer
from honeyhive.api.client import HoneyHive
class TestPerformanceIntegration:
"""Test performance characteristics of integrated systems."""
def test_tracer_client_performance(self):
"""Test performance of tracer + client operations."""
tracer = HoneyHiveTracer.init(
api_key="perf-test-key", test_mode=True
)
client = HoneyHive(
api_key="perf-test-key",
test_mode=True
)
# Measure integrated operation performance
times = []
for i in range(10):
start = time.perf_counter()
with tracer.trace(f"perf-test-{i}") as span:
span.set_attribute("iteration", i)
# Simulate client operation
session_id = tracer.session_id
span.set_attribute("session.id", session_id)
end = time.perf_counter()
times.append(end - start)
# Performance should be consistent
avg_time = statistics.mean(times)
std_dev = statistics.stdev(times)
# Should complete quickly and consistently
assert avg_time < 0.1, f"Average time too slow: {avg_time:.3f}s"
assert std_dev < 0.05, f"Too much variance: {std_dev:.3f}s"
def test_concurrent_integration_performance(self):
"""Test performance under concurrent load."""
import threading
import queue
results = queue.Queue()
def worker(worker_id):
"""Worker function for concurrent testing."""
tracer = HoneyHiveTracer.init(
api_key=f"concurrent-perf-key-{worker_id}", test_mode=True
)
start = time.perf_counter()
with tracer.trace(f"concurrent-operation-{worker_id}") as span:
span.set_attribute("worker.id", worker_id)
time.sleep(0.01) # Simulate minimal work
end = time.perf_counter()
results.put(end - start)
# Start concurrent workers
threads = []
for i in range(10):
thread = threading.Thread(target=worker, args=(i))
threads.append(thread)
thread.start()
# Wait for completion
for thread in threads:
thread.join()
# Collect results
times = []
while not results.empty():
times.append(results.get())
assert len(times) == 10
avg_time = statistics.mean(times)
# Concurrent operations should not significantly degrade performance
assert avg_time < 0.2, f"Concurrent performance too slow: {avg_time:.3f}s"
Running Integration Tests
Command Examples:
# Run all integration tests
tox -e integration
# Run specific integration test categories
pytest tests/integration/ -v
pytest tests/integration/ -v
pytest tests/integration/ -m "llm_provider" -v
# Run integration tests with coverage
pytest tests/integration/ --cov=honeyhive --cov-report=term-missing
# Run integration tests with real API (requires credentials)
HH_API_KEY=your_key pytest tests/integration/ -v
# Run performance integration tests
pytest tests/integration/ -m "performance" -v
# Run multiprocessing integration tests
pytest tests/integration/ -m "concurrent" -v
Environment Variables for Integration Testing:
# Required for real API testing
export HH_INTEGRATION_API_KEY="your_test_api_key"
export HH_INTEGRATION_PROJECT="integration-test-project"
# Optional configuration
export HH_INTEGRATION_BASE_URL="https://api-staging.honeyhive.ai"
export HH_INTEGRATION_TIMEOUT="30"
# LLM provider credentials (for LLM integration tests)
export OPENAI_API_KEY="your_openai_key"
export ANTHROPIC_API_KEY="your_anthropic_key"
Test Organization Best Practices:
# Group tests by integration type
class TestAPIIntegration:
"""Test HoneyHive API integration."""
pass
class TestLLMIntegration:
"""Test LLM provider integration."""
pass
class TestMultiInstanceIntegration:
"""Test multi-instance integration."""
pass
class TestPerformanceIntegration:
"""Test performance characteristics."""
pass
Pytest Marks for Organization:
import pytest
@pytest.mark.integration
def test_basic_integration():
"""Basic integration test."""
pass
@pytest.mark.integration
def test_integration():
"""Test with real API (requires credentials)."""
pass
@pytest.mark.llm_provider
def test_llm_provider_integration():
"""Test LLM provider integration."""
pass
@pytest.mark.performance
def test_performance_integration():
"""Test performance characteristics."""
pass
@pytest.mark.concurrent
def test_concurrent_integration():
"""Test concurrent/multiprocessing scenarios."""
pass
Best Practices
Integration Testing Guidelines:
Test Real Workflows: Test complete user workflows, not just individual components
Use Appropriate Test Data: Use realistic test data that mimics production scenarios
Test Error Scenarios: Include network failures, timeouts, and invalid responses
Verify End-to-End: Ensure data flows correctly from input to final output
Test Performance: Measure performance under realistic load conditions
Use Real Credentials Sparingly: Use test mode when possible, real API only when necessary
Clean Up Resources: Ensure test data is cleaned up after integration tests
Test Environment Variations: Test across different environments and configurations
Common Integration Test Patterns:
# Pattern 1: Component Integration
def test_component_integration():
component_a = create_component_a()
component_b = create_component_b()
result = component_a.integrate_with(component_b)
assert result.is_valid()
# Pattern 2: External System Integration
@pytest.mark.integration
def test_external_integration():
client = create_real_client()
response = client.make_request()
assert response.status_code == 200
# Pattern 3: End-to-End Workflow
def test_end_to_end_workflow():
input_data = create_test_data()
result = complete_workflow(input_data)
assert result.meets_expectations()
# Pattern 4: Error Recovery Integration
def test_error_recovery():
with inject_failure():
result = resilient_operation()
assert result.recovered_gracefully()
See Also
Unit Testing Strategies - Unit testing strategies
AWS Lambda Testing Guide - AWS Lambda integration testing
Performance Testing & Benchmarking - Performance testing and benchmarking
Add LLM Tracing in 5 Minutes - LLM integration patterns
HoneyHive Client API Reference - API client reference
HoneyHiveTracer API Reference - Tracer API reference