Skip to content

Commit cd3ffe1

Browse files
committed
addressed comments
1 parent 90bed66 commit cd3ffe1

File tree

3 files changed

+3
-13
lines changed

3 files changed

+3
-13
lines changed

axlearn/cloud/gcp/job.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def _execute(self):
353353
kind="LeaderWorkerSet",
354354
**self._build_leaderworkerset(),
355355
)
356-
print(custom_object)
356+
logging.info("submitting LeaderWorkerSet: %s", custom_object)
357357
return k8s.client.CustomObjectsApi().create_namespaced_custom_object(
358358
namespace=cfg.namespace,
359359
body=custom_object,

axlearn/cloud/gcp/lws_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,10 @@ def _build_pod(self) -> dict:
170170
cfg: TPULeaderWorkerTemplate.Config = self.config
171171
system = USER_FACING_NAME_TO_SYSTEM_CHARACTERISTICS[self._tpu_type]
172172
annotations, labels, selector = {}, {}, {}
173-
if cfg.reservation is not None:
173+
if cfg.reservation:
174174
logging.info("Using reservation=%s", cfg.reservation)
175175
selector.update({"cloud.google.com/reservation-name": cfg.reservation})
176-
if cfg.reservation_project is not None:
176+
if cfg.reservation_project:
177177
selector.update({"cloud.google.com/reservation-project": cfg.reservation_project})
178178

179179
spec = dict(

axlearn/cloud/gcp/runners/gke.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,6 @@ def _get_status(self) -> Status:
737737
RuntimeError: When the job fails, and LWS runner will retry it.
738738
"""
739739
cfg: LWSRunnerJob.Config = self.config
740-
logging.info("get status: ")
741740
try:
742741
resp = k8s.client.CustomObjectsApi().get_namespaced_custom_object_status(
743742
name=cfg.name,
@@ -754,8 +753,6 @@ def _get_status(self) -> Status:
754753
condition_progressive = None
755754
condition_update_in_progress = None
756755

757-
logging.info(conditions)
758-
759756
for condition in conditions:
760757
if condition.get("type") == "Progressing":
761758
condition_progressive = condition.get("status")
@@ -764,10 +761,6 @@ def _get_status(self) -> Status:
764761
if condition.get("type") == "UpdateInProgress":
765762
condition_update_in_progress = condition.get("status")
766763

767-
logging.info(condition_available)
768-
logging.info(condition_progressive)
769-
logging.info(condition_update_in_progress)
770-
771764
if condition_update_in_progress:
772765
return LWSRunnerJob.Status.UPDATING
773766

@@ -807,10 +800,7 @@ def _execute(self):
807800
# Keep track of last status to prevent duplicate events.
808801
last_job_status = None
809802
while True:
810-
logging.info("In Execute loop: ")
811803
status = self._get_status()
812-
logging.info("Just after getting status: ")
813-
logging.info(status)
814804

815805
# Don't retry if FAILED, since we ask GKE to handle retries.
816806
# Note that LeaderWorkerSet remains ACTIVE until all retries are exhausted.

0 commit comments

Comments
 (0)