`honeyhive.tracer.lifecycle.shutdown`

Shutdown and cleanup operations for tracer lifecycle management.

This module handles tracer shutdown, provider cleanup, and resource management with comprehensive error handling, timeout protection, and graceful degradation.

shutdown_tracer

shutdown_tracer(tracer_instance: Any) -> None

Shutdown a tracer instance and clean up its resources.

This function performs a graceful shutdown of a tracer instance, including flushing pending data, shutting down providers, and cleaning up resources. It handles both main and secondary providers appropriately.

:param tracer_instance: The tracer instance to shutdown :type tracer_instance: HoneyHiveTracer

Example:

.. code-block:: python

# Graceful shutdown
shutdown_tracer(tracer)

# In a try/finally block
try:
    # Use tracer
    with tracer.start_span("operation") as span:
        pass
finally:
    shutdown_tracer(tracer)

Note:

This function only shuts down the OpenTelemetry provider if the tracer instance is the main provider. Secondary providers are left running to avoid disrupting other tracer instances.

Source code in src/honeyhive/tracer/lifecycle/shutdown.py

def shutdown_tracer(tracer_instance: Any) -> None:
    """Shutdown a tracer instance and clean up its resources.

    This function performs a graceful shutdown of a tracer instance,
    including flushing pending data, shutting down providers, and
    cleaning up resources. It handles both main and secondary providers
    appropriately.

    :param tracer_instance: The tracer instance to shutdown
    :type tracer_instance: HoneyHiveTracer

    **Example:**

    .. code-block:: python

        # Graceful shutdown
        shutdown_tracer(tracer)

        # In a try/finally block
        try:
            # Use tracer
            with tracer.start_span("operation") as span:
                pass
        finally:
            shutdown_tracer(tracer)

    **Note:**

    This function only shuts down the OpenTelemetry provider if the
    tracer instance is the main provider. Secondary providers are
    left running to avoid disrupting other tracer instances.
    """
    # Check if logging is still available (pytest-xdist workers may have closed streams)
    safe_log(
        tracer_instance, "debug", "shutdown_tracer: Starting data loss prevention phase"
    )

    # Phase 1: Data loss prevention - optimized for parallel execution
    # This ensures we attempt to preserve data even if locking fails
    test_mode = getattr(tracer_instance, "test_mode", False)

    # Skip data loss prevention in test mode to prevent worker conflicts
    # In production, this is critical for data preservation
    if not test_mode:
        # Graceful drain phase (production only)
        # For multi-instance architecture: only disable globally if main provider
        if getattr(tracer_instance, "is_main_provider", False):
            disable_new_span_creation()

        # Always set instance-specific shutdown flag for this tracer
        # Protected access required for multi-instance lifecycle management
        tracer_instance._instance_shutdown = True  # pylint: disable=protected-access

        # Brief grace period for existing spans to complete naturally
        time.sleep(0.1)

        # Force flush with extended timeout and retry logic (before lock acquisition)
        timeout_ms = 5000  # Extended timeout for production

        safe_log(
            tracer_instance,
            "debug",
            "Starting pre-lock force flush for data loss prevention",
            honeyhive_data={
                "timeout_ms": timeout_ms,
                "test_mode": test_mode,
                "phase": "pre_lock_data_preservation",
            },
        )

        flush_success = force_flush_tracer(tracer_instance, timeout_millis=timeout_ms)

        # Retry logic for critical data preservation (production only)
        if not flush_success:
            safe_log(
                tracer_instance,
                "warning",
                f"Pre-lock flush failed (timeout: {timeout_ms}ms), retrying",
            )

            retry_timeout_ms = timeout_ms * 2
            flush_success = force_flush_tracer(
                tracer_instance, timeout_millis=retry_timeout_ms
            )

            if flush_success:
                safe_log(
                    tracer_instance,
                    "info",
                    f"Pre-lock flush succeeded on retry ({retry_timeout_ms}ms)",
                )
            else:
                safe_log(
                    tracer_instance,
                    "error",
                    f"Pre-lock flush failed after retry ({retry_timeout_ms}ms), "
                    "continuing with shutdown - potential data loss",
                )
    else:
        # Test mode: skip pre-lock flush to prevent pytest-xdist worker conflicts
        safe_log(
            tracer_instance,
            "debug",
            "Skipping pre-lock flush in test mode to prevent conflicts",
        )
        flush_success = True  # Assume success for test mode

    safe_log(
        tracer_instance,
        "debug",
        "shutdown_tracer: Acquiring _lifecycle_lock for shutdown",
    )

    # Use environment-optimized lock timeout for better performance
    # Automatically detects Lambda, K8s, high-concurrency environments
    with acquire_lifecycle_lock_optimized("lifecycle") as lock_acquired:
        if not lock_acquired:
            # Graceful degradation: Try to log timeout but don't crash
            config = get_lock_config()
            timeout_used = config.get("lifecycle_timeout", 1.0)
            safe_log(
                tracer_instance,
                "warning",
                f"Failed to acquire _lifecycle_lock within {timeout_used}s, "
                "proceeding without lock",
                honeyhive_data={
                    "lock_timeout": timeout_used,
                    "lock_strategy": config.get("description", "unknown"),
                    "degradation_reason": "lock_acquisition_timeout",
                    "data_flush_completed": flush_success,
                },
            )
            # Continue without the lock - better than hanging indefinitely
            _shutdown_without_lock(tracer_instance)
            return

        try:
            safe_log(
                tracer_instance,
                "debug",
                "Starting tracer shutdown",
                honeyhive_data={
                    "is_main_provider": tracer_instance.is_main_provider,
                    "has_provider": bool(tracer_instance.provider),
                },
            )

            # Skip force_flush during shutdown to prevent recursive deadlock
            # The force_flush_tracer also tries to acquire _lifecycle_lock,
            # causing deadlock
            safe_log(
                tracer_instance,
                "debug",
                "Skipping force_flush during shutdown to prevent recursive deadlock",
            )

            # Only shutdown if we're the main provider
            if (
                tracer_instance.is_main_provider
                and tracer_instance.provider
                and hasattr(tracer_instance.provider, "shutdown")
            ):
                _shutdown_main_provider(tracer_instance)
            else:
                _cleanup_secondary_provider(tracer_instance)

            # Clean up instance state
            _cleanup_tracer_state(tracer_instance)

        except Exception as e:
            # Graceful degradation - never crash host
            safe_log(
                tracer_instance,
                "error",
                "Error during tracer shutdown",
                honeyhive_data={
                    "error": str(e),
                    "error_type": type(e).__name__,
                    "operation": "tracer_shutdown",
                },
            )

graceful_shutdown_all

graceful_shutdown_all() -> None

Gracefully shutdown all registered tracer instances.

This function attempts to find and shutdown all active HoneyHive tracer instances. It's useful for application shutdown or cleanup scenarios where multiple tracers might be active.

Example:

.. code-block:: python

# Application shutdown
import atexit
atexit.register(graceful_shutdown_all)

# Or explicit cleanup
graceful_shutdown_all()

Note:

This function uses the tracer registry to find active instances. It attempts graceful shutdown but continues even if some instances fail to shutdown properly.

Source code in src/honeyhive/tracer/lifecycle/shutdown.py

def graceful_shutdown_all() -> None:
    """Gracefully shutdown all registered tracer instances.

    This function attempts to find and shutdown all active HoneyHive
    tracer instances. It's useful for application shutdown or cleanup
    scenarios where multiple tracers might be active.

    **Example:**

    .. code-block:: python

        # Application shutdown
        import atexit
        atexit.register(graceful_shutdown_all)

        # Or explicit cleanup
        graceful_shutdown_all()

    **Note:**

    This function uses the tracer registry to find active instances.
    It attempts graceful shutdown but continues even if some instances
    fail to shutdown properly.
    """
    try:
        active_tracers = registry.get_all_tracers()

        if not active_tracers:
            safe_log(None, "debug", "No active tracers found for shutdown")
            return

        safe_log(
            None,
            "info",
            "Starting graceful shutdown of all tracers",
            honeyhive_data={"tracer_count": len(active_tracers)},
        )

        shutdown_results = []

        for tracer_instance in active_tracers:
            try:
                shutdown_tracer(tracer_instance)
                shutdown_results.append(
                    (getattr(tracer_instance, "_tracer_id", "unknown"), True)
                )
                safe_log(
                    tracer_instance,
                    "debug",
                    "Tracer shutdown successful",
                    honeyhive_data={
                        "tracer_id": getattr(tracer_instance, "_tracer_id", "unknown")
                    },
                )
            except Exception as e:
                shutdown_results.append(
                    (getattr(tracer_instance, "_tracer_id", "unknown"), False)
                )
                # Graceful degradation - never crash host
                safe_log(
                    tracer_instance,
                    "error",
                    "Tracer shutdown failed",
                    honeyhive_data={
                        "tracer_id": getattr(tracer_instance, "_tracer_id", "unknown"),
                        "error": str(e),
                        "error_type": type(e).__name__,
                        "operation": "graceful_shutdown_single_tracer",
                    },
                )

        # Log summary
        successful_shutdowns = sum(1 for _, success in shutdown_results if success)
        safe_log(
            None,
            "info",
            "Graceful shutdown completed",
            honeyhive_data={
                "total_tracers": len(active_tracers),
                "successful_shutdowns": successful_shutdowns,
                "failed_shutdowns": len(active_tracers) - successful_shutdowns,
            },
        )

    except Exception as e:
        # Graceful degradation - never crash host
        safe_log(
            None,
            "error",
            "Error during graceful shutdown of all tracers",
            honeyhive_data={
                "error": str(e),
                "error_type": type(e).__name__,
                "operation": "graceful_shutdown_all",
            },
        )

wait_for_pending_spans

wait_for_pending_spans(
    tracer_instance: Any, max_wait_seconds: float = 10.0
) -> bool

Wait for pending spans to complete processing.

This function waits for any pending spans in the tracer's processors to complete processing. It's useful before shutdown to ensure all data is properly sent.

:param tracer_instance: The tracer instance to wait for :type tracer_instance: HoneyHiveTracer :param max_wait_seconds: Maximum time to wait in seconds :type max_wait_seconds: float :return: True if all spans completed within timeout, False otherwise :rtype: bool

Example:

.. code-block:: python

# Wait for spans before shutdown
if wait_for_pending_spans(tracer, max_wait_seconds=5.0):
    print("All spans completed")
else:
    print("Timeout waiting for spans")

Note:

This function polls the span processors to check for pending work. It's a best-effort operation and may not catch all edge cases.

Source code in src/honeyhive/tracer/lifecycle/shutdown.py

def wait_for_pending_spans(
    tracer_instance: Any, max_wait_seconds: float = 10.0
) -> bool:
    """Wait for pending spans to complete processing.

    This function waits for any pending spans in the tracer's processors
    to complete processing. It's useful before shutdown to ensure all
    data is properly sent.

    :param tracer_instance: The tracer instance to wait for
    :type tracer_instance: HoneyHiveTracer
    :param max_wait_seconds: Maximum time to wait in seconds
    :type max_wait_seconds: float
    :return: True if all spans completed within timeout, False otherwise
    :rtype: bool

    **Example:**

    .. code-block:: python

        # Wait for spans before shutdown
        if wait_for_pending_spans(tracer, max_wait_seconds=5.0):
            print("All spans completed")
        else:
            print("Timeout waiting for spans")

    **Note:**

    This function polls the span processors to check for pending work.
    It's a best-effort operation and may not catch all edge cases.
    """
    if not tracer_instance.provider:
        return True

    start_time = time.time()

    while time.time() - start_time < max_wait_seconds:
        try:
            if not _has_pending_spans(tracer_instance):
                safe_log(
                    tracer_instance,
                    "debug",
                    "No pending spans detected",
                    honeyhive_data={"wait_time": time.time() - start_time},
                )
                return True

            # Wait a bit before checking again
            time.sleep(0.1)

        except Exception as e:
            # Graceful degradation - never crash host
            safe_log(
                tracer_instance,
                "warning",
                f"Error checking for pending spans: {e}",
                honeyhive_data={
                    "wait_time": time.time() - start_time,
                    "error_type": type(e).__name__,
                    "operation": "wait_for_pending_spans",
                },
            )
            break

    safe_log(
        tracer_instance,
        "warning",
        "Timeout waiting for pending spans",
        honeyhive_data={"max_wait_seconds": max_wait_seconds},
    )
    return False