@@ -310,14 +310,16 @@ func (s *GpuAllocator) Dealloc(
310
310
log := log .FromContext (s .ctx )
311
311
312
312
request , exists := s .uniqueAllocation [podUID ]
313
- if ! exists {
313
+ if ! exists || request == nil {
314
314
// should not block finalizer
315
315
log .Error (fmt .Errorf ("pod has not allocated GPUs" ), "pod" , podUID )
316
+ return
316
317
}
317
318
318
319
if _ , exists := s .uniqueDeallocation [podUID ]; exists {
319
320
// should not block finalizer
320
321
log .Error (fmt .Errorf ("pod has already deallocated GPUs" ), "pod" , podUID )
322
+ return
321
323
}
322
324
323
325
s .storeMutex .Lock ()
@@ -1057,16 +1059,6 @@ func removeRunningApp(ctx context.Context, gpu *tfv1.GPU, workloadNameNamespace
1057
1059
}
1058
1060
1059
1061
func (s * GpuAllocator ) ComposeAllocationRequest (pod * v1.Pod ) (tfv1.AllocRequest , string , error ) {
1060
- var tfWorkload tfv1.TensorFusionWorkload
1061
-
1062
- err := s .Get (s .ctx , client.ObjectKey {
1063
- Name : pod .Labels [constants .WorkloadKey ],
1064
- Namespace : pod .Namespace ,
1065
- }, & tfWorkload )
1066
- if err != nil {
1067
- return tfv1.AllocRequest {}, "failed to get tf workload" , err
1068
- }
1069
-
1070
1062
gpuRequestResource , err := utils .GetGPUResource (pod , true )
1071
1063
if err != nil {
1072
1064
return tfv1.AllocRequest {}, "invalid gpu request annotation" , err
@@ -1091,11 +1083,9 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (tfv1.AllocRequest,
1091
1083
Count : uint (count ),
1092
1084
GPUModel : pod .Annotations [constants .GPUModelAnnotation ],
1093
1085
WorkloadNameNamespace : tfv1.NameNamespace {
1094
- Name : tfWorkload . Name ,
1095
- Namespace : tfWorkload .Namespace ,
1086
+ Name : pod . Labels [ constants . WorkloadKey ] ,
1087
+ Namespace : pod .Namespace ,
1096
1088
},
1097
- NodeAffinity : tfWorkload .Spec .NodeAffinity ,
1098
-
1099
1089
PodMeta : pod .ObjectMeta ,
1100
1090
}
1101
1091
return allocRequest , "" , nil
0 commit comments