Skip to content

Commit bb17920

Browse files
authored
fix: add remap for extra metrics label (#315)
* fix: add remap for extra metrics label * fix: syntax issue
1 parent ca8dca2 commit bb17920

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

internal/config/global_config.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package config
33
import "fmt"
44

55
type GlobalConfig struct {
6-
MetricsTTL string `yaml:"metricsTTL"`
7-
MetricsFormat string `yaml:"metricsFormat"`
8-
MetricsExtraPodLabels []string `yaml:"metricsExtraPodLabels"`
6+
MetricsTTL string `yaml:"metricsTTL"`
7+
MetricsFormat string `yaml:"metricsFormat"`
8+
MetricsExtraPodLabels map[string]string `yaml:"metricsExtraPodLabels"`
99

1010
AlertRules []AlertRule `yaml:"alertRules"`
1111
}
@@ -40,7 +40,7 @@ func MockGlobalConfig() *GlobalConfig {
4040
return &GlobalConfig{
4141
MetricsTTL: "30d",
4242
MetricsFormat: "influx",
43-
MetricsExtraPodLabels: []string{"kubernetes.io/app"},
43+
MetricsExtraPodLabels: map[string]string{"kubernetes.io/app": "app"},
4444
AlertRules: []AlertRule{
4545
{
4646
Name: "mock",

internal/controller/gpunode_controller.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"encoding/json"
2222
"fmt"
2323
"maps"
24-
"strings"
2524

2625
tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
2726
"github.com/NexusGPU/tensor-fusion/internal/config"
@@ -384,6 +383,10 @@ func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.
384383
if err := r.Get(ctx, client.ObjectKey{Name: *pool.Spec.SchedulingConfigTemplate}, schedulingConfigTemplate); err == nil {
385384
if schedulingConfigTemplate.Spec.Hypervisor != nil {
386385
if cfg, err := json.Marshal(schedulingConfigTemplate.Spec.Hypervisor); err == nil {
386+
extraLabelsJson, err := json.Marshal(config.GetGlobalConfig().MetricsExtraPodLabels)
387+
if err != nil {
388+
return fmt.Errorf("invalid metricsExtraPodLabels config, not valid map: %w", err)
389+
}
387390
spec.Containers[0].Env = append(spec.Containers[0].Env, corev1.EnvVar{
388391
Name: constants.HypervisorSchedulingConfigEnv,
389392
Value: string(cfg),
@@ -392,7 +395,7 @@ func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.
392395
Value: config.GetGlobalConfig().MetricsFormat,
393396
}, corev1.EnvVar{
394397
Name: constants.HypervisorMetricsExtraLabelsEnv,
395-
Value: strings.Join(config.GetGlobalConfig().MetricsExtraPodLabels, ","),
398+
Value: string(extraLabelsJson),
396399
})
397400
}
398401
}

internal/metrics/recorder.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,8 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
270270
enc.AddTag("workload", metrics.WorkloadName)
271271

272272
if config.GetGlobalConfig().MetricsExtraPodLabels != nil {
273-
for _, label := range config.GetGlobalConfig().MetricsExtraPodLabels {
274-
enc.AddTag(label, metrics.podLabels[label])
273+
for k, v := range config.GetGlobalConfig().MetricsExtraPodLabels {
274+
enc.AddTag(v, metrics.podLabels[k])
275275
}
276276
}
277277

0 commit comments

Comments
 (0)