@@ -125,6 +125,7 @@ def monitor_job(self, job_id: str):
125
125
f"Job monitor beginning for job: { job_id } " ,
126
126
colour = "blue" ,
127
127
level = logging .INFO ,
128
+ tag = "Job Monitor" ,
128
129
)
129
130
130
131
job_data = self ._get_job_data (job_id )
@@ -171,6 +172,7 @@ def monitor_job(self, job_id: str):
171
172
self .node .debug_print (
172
173
f"Validator -> Job inspection complete for job: { job_id } " ,
173
174
colour = "blue" ,
175
+ tag = "Job Monitor" ,
174
176
)
175
177
job_data ["last_seen" ] = time .time ()
176
178
self .node .dht .routing_table [job_id ] = job_data
@@ -180,6 +182,7 @@ def monitor_job(self, job_id: str):
180
182
f"Error in health check cycle: { str (e )} " ,
181
183
colour = "bright_red" ,
182
184
level = logging .ERROR ,
185
+ tag = "Job Monitor" ,
183
186
)
184
187
break
185
188
@@ -240,6 +243,7 @@ def _check_single_worker(self, worker: str, module_id: str) -> bool:
240
243
f"Worker health check failed for { worker } : { str (e )} " ,
241
244
colour = "yellow" ,
242
245
level = logging .WARNING ,
246
+ tag = "Job Monitor" ,
243
247
)
244
248
return False
245
249
@@ -287,6 +291,7 @@ def _verify_worker_proof(self, worker: str, proof: Dict, metrics: Dict) -> bool:
287
291
f"Error verifying proof of work: { str (e )} " ,
288
292
colour = "bright_red" ,
289
293
level = logging .ERROR ,
294
+ tag = "Job Monitor" ,
290
295
)
291
296
return False
292
297
@@ -322,6 +327,7 @@ def _handle_invalid_proof(self, worker: str, module_id: str):
322
327
f"Invalid proof of work from worker { worker } for module { module_id } " ,
323
328
colour = "bright_red" ,
324
329
level = logging .WARNING ,
330
+ tag = "Job Monitor" ,
325
331
)
326
332
327
333
# Record violation
@@ -349,6 +355,7 @@ def _penalize_worker(self, worker: str, module_id: str):
349
355
f"Error applying worker penalty: { str (e )} " ,
350
356
colour = "bright_red" ,
351
357
level = logging .ERROR ,
358
+ tag = "Job Monitor" ,
352
359
)
353
360
354
361
def _get_job_data (self , job_id : str ) -> Optional [Dict ]:
@@ -360,6 +367,7 @@ def _get_job_data(self, job_id: str) -> Optional[Dict]:
360
367
f"Failed to retrieve job data: { str (e )} " ,
361
368
colour = "bright_red" ,
362
369
level = logging .ERROR ,
370
+ tag = "Job Monitor" ,
363
371
)
364
372
return None
365
373
@@ -388,6 +396,7 @@ def _check_user_status(self, job_data: Dict) -> bool:
388
396
f"Error checking user status: { str (e )} " ,
389
397
colour = "yellow" ,
390
398
level = logging .WARNING ,
399
+ tag = "Job Monitor" ,
391
400
)
392
401
return False
393
402
@@ -411,6 +420,7 @@ def _handle_worker_failure(self, worker: str, module_id: str):
411
420
f"Worker { worker } failed for module { module_id } . Initiating recovery..." ,
412
421
colour = "yellow" ,
413
422
level = logging .WARNING ,
423
+ tag = "Job Monitor" ,
414
424
)
415
425
# Implement worker recovery strategy
416
426
# TODO: Add worker replacement logic
@@ -437,6 +447,7 @@ def _cleanup_job(self, job_data: Dict, final_status: JobStatus):
437
447
f"Job { job_data ['id' ]} cleaned up successfully with status: { final_status .value } " ,
438
448
colour = "green" ,
439
449
level = logging .INFO ,
450
+ tag = "Job Monitor" ,
440
451
)
441
452
442
453
# Pass over job to contract manager
@@ -448,6 +459,7 @@ def _cleanup_job(self, job_data: Dict, final_status: JobStatus):
448
459
f"Error during job cleanup: { str (e )} " ,
449
460
colour = "bright_red" ,
450
461
level = logging .ERROR ,
462
+ tag = "Job Monitor" ,
451
463
)
452
464
453
465
def _cleanup_workers (self , job_data : Dict ):
@@ -467,6 +479,7 @@ def _cleanup_workers(self, job_data: Dict):
467
479
f"Error shutting down worker { worker } : { str (e )} " ,
468
480
colour = "yellow" ,
469
481
level = logging .WARNING ,
482
+ tag = "Job Monitor" ,
470
483
)
471
484
472
485
def _should_terminate_job (self , job_data : Dict , current_status : JobStatus ) -> bool :
@@ -525,6 +538,7 @@ def _handle_job_failure(self, job_id: str, reason: str):
525
538
f"Could not retrieve data for failed job { job_id } " ,
526
539
colour = "bright_red" ,
527
540
level = logging .ERROR ,
541
+ tag = "Job Monitor" ,
528
542
)
529
543
return
530
544
@@ -562,11 +576,13 @@ def _handle_job_failure(self, job_id: str, reason: str):
562
576
f"Job { job_id } failed: { reason } . Cleaning Up" ,
563
577
colour = "bright_red" ,
564
578
level = logging .ERROR ,
579
+ tag = "Job Monitor" ,
565
580
)
566
581
567
582
except Exception as e :
568
583
self .node .debug_print (
569
584
f"Error handling job failure: { str (e )} " ,
570
585
colour = "bright_red" ,
571
586
level = logging .ERROR ,
587
+ tag = "Job Monitor" ,
572
588
)
0 commit comments