Skip to content

Commit 8321181

Browse files
authored
fix: config path mismatch issue (#232)
1 parent 96b56dd commit 8321181

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

charts/tensor-fusion/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.3.5
18+
version: 1.3.6
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

charts/tensor-fusion/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ dynamicConfig:
388388
FROM tf_worker_usage
389389
WHERE {{ .Conditions }}
390390
)
391-
threshold: N/A
391+
threshold: 0
392392
evaluationInterval: 5m
393393
consecutiveCount: 2
394394
severity: P2

cmd/main.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ var clusterLevelPortRange string
8080
var enableAlert bool
8181
var alertManagerAddr string
8282
var timeSeriesDB *metrics.TimeSeriesDB
83-
var configPath string
83+
var dynamicConfigPath string
8484
var globalConfig config.GlobalConfig
8585
var alertEvaluator *alert.AlertEvaluator
8686

@@ -104,8 +104,8 @@ func main() {
104104
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
105105
flag.StringVar(&gpuInfoConfig, "gpu-info-config",
106106
"/etc/tensor-fusion/gpu-info.yaml", "specify the path to gpuInfoConfig file")
107-
flag.StringVar(&configPath, "alert-rule-config",
108-
"/etc/tensor-fusion/rules.yaml", "specify the path to alertRuleConfig file")
107+
flag.StringVar(&dynamicConfigPath, "dynamic-config",
108+
"/etc/tensor-fusion/config.yaml", "specify the path to dynamic config file")
109109
flag.StringVar(&metricsPath, "metrics-path", "/logs/metrics.log", "specify the path to metrics file")
110110
flag.StringVar(&nodeLevelPortRange, "host-port-range", "40000-42000",
111111
"specify the port range for assigning ports to pre-scheduled Pods such as vGPU workers")
@@ -412,10 +412,10 @@ func setupTimeSeriesAndWatchGlobalConfigChanges(ctx context.Context, mgr manager
412412

413413
alertEvaluator = alert.NewAlertEvaluator(ctx, timeSeriesDB, globalConfig.AlertRules, alertManagerAddr)
414414

415-
ch, err := utils.WatchConfigFileChanges(ctx, configPath)
415+
ch, err := utils.WatchConfigFileChanges(ctx, dynamicConfigPath)
416416
if err != nil {
417417
ctrl.Log.Error(err, "unable to watch global config file, file may not exist",
418-
"configPath", configPath)
418+
"configPath", dynamicConfigPath)
419419
return
420420
}
421421

@@ -424,7 +424,7 @@ func setupTimeSeriesAndWatchGlobalConfigChanges(ctx context.Context, mgr manager
424424
err := yaml.Unmarshal(data, &globalConfig)
425425
if err != nil {
426426
ctrl.Log.Error(err, "unable to reload global config file, not valid config structure",
427-
"configPath", configPath)
427+
"configPath", dynamicConfigPath)
428428
continue
429429
}
430430

@@ -433,7 +433,7 @@ func setupTimeSeriesAndWatchGlobalConfigChanges(ctx context.Context, mgr manager
433433
if alertCanBeEnabled && enableAlert {
434434
err = alertEvaluator.UpdateAlertRules(globalConfig.AlertRules)
435435
if err != nil {
436-
ctrl.Log.Error(err, "unable to update alert rules", "configPath", configPath)
436+
ctrl.Log.Error(err, "unable to update alert rules", "configPath", dynamicConfigPath)
437437
}
438438
}
439439
}()

0 commit comments

Comments
 (0)