diff --git a/src/zenml/services/container/container_service.py b/src/zenml/services/container/container_service.py index 4b8e0e6d094..2223cf58637 100644 --- a/src/zenml/services/container/container_service.py +++ b/src/zenml/services/container/container_service.py @@ -35,6 +35,7 @@ from zenml.services.service import BaseService, ServiceConfig from zenml.services.service_status import ServiceStatus from zenml.utils import docker_utils +from zenml.utils.docker_utils import check_docker from zenml.utils.io_utils import ( create_dir_recursive_if_not_exists, get_global_config_directory, @@ -217,6 +218,10 @@ def check_status(self) -> Tuple[ServiceState, str]: providing additional information about that state (e.g. a description of the error, if one is encountered). """ + # Check if Docker is available first + if not check_docker(): + return (ServiceState.INACTIVE, "Docker daemon is not running") + container: Optional[Container] = None try: container = self.docker_client.containers.get(self.container_id) diff --git a/src/zenml/zen_server/deploy/docker/docker_provider.py b/src/zenml/zen_server/deploy/docker/docker_provider.py index e77a6082af6..9fe6df50054 100644 --- a/src/zenml/zen_server/deploy/docker/docker_provider.py +++ b/src/zenml/zen_server/deploy/docker/docker_provider.py @@ -13,6 +13,7 @@ # permissions and limitations under the License. """Zen Server docker deployer implementation.""" +import os import shutil from typing import ClassVar, Optional, Tuple, Type, cast from uuid import uuid4 @@ -30,6 +31,7 @@ ServiceEndpointHealthMonitorConfig, ServiceEndpointProtocol, ) +from zenml.utils.docker_utils import check_docker from zenml.zen_server.deploy.base_provider import BaseServerProvider from zenml.zen_server.deploy.deployment import LocalServerDeploymentConfig from zenml.zen_server.deploy.docker.docker_zen_server import ( @@ -253,6 +255,27 @@ def _get_service(self) -> BaseService: Raises: KeyError: If the server deployment is not found. """ + # Check if Docker is available first + if not check_docker(): + # Docker is not available, so we can't have a running Docker service + # Clean up the stale service configuration + service_config_path = DockerZenServer.config_path() + if os.path.exists(service_config_path): + logger.warning( + "Docker daemon is not running. Cleaning up stale Docker " + "ZenML server configuration at %s", + service_config_path, + ) + try: + shutil.rmtree(service_config_path) + except Exception as e: + logger.debug( + "Failed to clean up stale Docker config: %s", e + ) + raise KeyError( + "The docker ZenML server is not deployed (Docker daemon not running)." + ) + service = DockerZenServer.get_service() if service is None: raise KeyError("The docker ZenML server is not deployed.")