Copy sagemaker env to respect step specific settings

schustmi · schustmi · commit 9e2f78e70575 · 2025-05-05T16:28:02.000+02:00
diff --git a/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py b/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
@@ -309,8 +309,13 @@ def prepare_or_run_pipeline(
             env=environment,
         )
 
+        environment[ENV_ZENML_SAGEMAKER_RUN_ID] = (
+            ExecutionVariables.PIPELINE_EXECUTION_ARN
+        )
+
         sagemaker_steps = []
         for step_name, step in deployment.step_configurations.items():
+            step_environment = environment.copy()
             image = self.get_image(deployment=deployment, step_name=step_name)
             command = SagemakerEntrypointConfiguration.get_entrypoint_command()
             arguments = (
@@ -324,22 +329,18 @@ def prepare_or_run_pipeline(
                 SagemakerOrchestratorSettings, self.get_settings(step)
             )
 
-            environment[ENV_ZENML_SAGEMAKER_RUN_ID] = (
-                ExecutionVariables.PIPELINE_EXECUTION_ARN
-            )
-
             if step_settings.environment:
-                step_environment = step_settings.environment.copy()
+                user_defined_environment = step_settings.environment.copy()
                 # Sagemaker does not allow environment variables longer than 256
                 # characters to be passed to Processor steps. If an environment variable
                 # is longer than 256 characters, we split it into multiple environment
                 # variables (chunks) and re-construct it on the other side using the
                 # custom entrypoint configuration.
                 split_environment_variables(
                     size_limit=SAGEMAKER_PROCESSOR_STEP_ENV_VAR_SIZE_LIMIT,
-                    env=step_environment,
+                    env=user_defined_environment,
                 )
-                environment.update(step_environment)
+                step_environment.update(user_defined_environment)
 
             use_training_step = (
                 step_settings.use_training_step
@@ -476,19 +477,19 @@ def prepare_or_run_pipeline(
                     )
 
             # Convert environment to a dict of strings
-            environment = {
+            step_environment = {
                 key: str(value)
                 if not isinstance(value, ExecutionVariable)
                 else value
-                for key, value in environment.items()
+                for key, value in step_environment.items()
             }
 
             if use_training_step:
                 # Create Estimator and TrainingStep
                 estimator = sagemaker.estimator.Estimator(
                     keep_alive_period_in_seconds=step_settings.keep_alive_period_in_seconds,
                     output_path=output_path,
-                    environment=environment,
+                    environment=step_environment,
                     container_entry_point=entrypoint,
                     **args_for_step_executor,
                 )
@@ -502,7 +503,7 @@ def prepare_or_run_pipeline(
                 # Create Processor and ProcessingStep
                 processor = sagemaker.processing.Processor(
                     entrypoint=entrypoint,
-                    env=environment,
+                    env=step_environment,
                     **args_for_step_executor,
                 )