Skip to content

Commit 5ad91c3

Browse files
authored
Merge pull request #2183 from sthaha/feat-aggregate-zone
feat(device/cpu): aggregate multi-socket zones into single zone
2 parents 1e16fb6 + bdce7f3 commit 5ad91c3

File tree

8 files changed

+948
-49
lines changed

8 files changed

+948
-49
lines changed

internal/device/energy_zone.go

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// SPDX-FileCopyrightText: 2025 The Kepler Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package device
5+
6+
import (
7+
"fmt"
8+
"math"
9+
"sync"
10+
)
11+
12+
type Zone = string
13+
14+
const (
15+
ZonePackage Zone = "package"
16+
ZoneCore Zone = "core"
17+
ZoneDRAM Zone = "dram"
18+
ZoneUncore Zone = "uncore"
19+
ZonePSys Zone = "psys"
20+
ZonePP0 Zone = "pp0" // Power Plane 0 - processor cores
21+
ZonePP1 Zone = "pp1" // Power Plane 1 - uncore (e.g., integrated GPU)
22+
)
23+
24+
// zoneKey uniquely identifies a zone by name and index
25+
type zoneKey struct {
26+
name string
27+
index int
28+
}
29+
30+
// AggregatedZone implements EnergyZone interface by aggregating multiple zones
31+
// of the same type (e.g., multiple package zones in multi-socket systems).
32+
// It handles energy counter wrapping for each individual zone and provides
33+
// a single consolidated energy reading.
34+
type AggregatedZone struct {
35+
name string
36+
index int
37+
zones []EnergyZone
38+
lastReadings map[zoneKey]Energy
39+
currentEnergy Energy // Aggregated energy counter
40+
maxEnergy Energy // Cached sum of all zone MaxEnergy values
41+
mu sync.RWMutex
42+
}
43+
44+
// NewAggregatedZone creates a new AggregatedZone for zones of the same type
45+
// The name is taken from the first zone
46+
// Panics if zones is empty or nil
47+
func NewAggregatedZone(zones []EnergyZone) *AggregatedZone {
48+
// Panic on invalid inputs
49+
if len(zones) == 0 {
50+
panic("NewAggregatedZone: zones cannot be empty")
51+
}
52+
53+
// Use the first zone's name as the aggregated zone name
54+
name := zones[0].Name()
55+
// Calculate and cache the combined MaxEnergy during construction
56+
// Check for overflow when summing MaxEnergy values
57+
var totalMax Energy
58+
for _, zone := range zones {
59+
zoneMax := zone.MaxEnergy()
60+
// Check for overflow before adding
61+
if totalMax > 0 && zoneMax > math.MaxUint64-totalMax {
62+
// Overflow would occur, use MaxUint64 as safe maximum
63+
totalMax = Energy(math.MaxUint64)
64+
break
65+
}
66+
totalMax += zoneMax
67+
}
68+
69+
return &AggregatedZone{
70+
name: name,
71+
index: -1, // Indicates this is an aggregated zone
72+
zones: zones,
73+
lastReadings: make(map[zoneKey]Energy),
74+
currentEnergy: 0,
75+
maxEnergy: totalMax, // Cache the combined MaxEnergy
76+
}
77+
}
78+
79+
// Name returns the zone name
80+
func (az *AggregatedZone) Name() string {
81+
return az.name
82+
}
83+
84+
// Index returns the zone index (-1 for aggregated zones)
85+
func (az *AggregatedZone) Index() int {
86+
return az.index
87+
}
88+
89+
// Path returns path for the aggregated zone
90+
func (az *AggregatedZone) Path() string {
91+
// TODO: decide if all the paths should be returned
92+
return fmt.Sprintf("aggregated-%s", az.name)
93+
}
94+
95+
// Energy returns the total energy consumption across all aggregated zones,
96+
// handling wrap-around for each individual zone
97+
func (az *AggregatedZone) Energy() (Energy, error) {
98+
az.mu.Lock()
99+
defer az.mu.Unlock()
100+
101+
var totalDelta Energy
102+
103+
for _, zone := range az.zones {
104+
currentReading, err := zone.Energy()
105+
if err != nil {
106+
return 0, fmt.Errorf("no valid energy readings from aggregated zones - %s: %w", zone.Name(), err)
107+
}
108+
109+
zoneID := zoneKey{zone.Name(), zone.Index()}
110+
111+
if lastReading, exists := az.lastReadings[zoneID]; exists {
112+
113+
// Calculate delta since last reading
114+
var delta Energy
115+
if currentReading >= lastReading {
116+
// Normal case: no wrap
117+
delta = currentReading - lastReading
118+
} else {
119+
// Wrap occurred: calculate delta across wrap boundary
120+
// Only if zone has valid MaxEnergy (> 0)
121+
if zone.MaxEnergy() > 0 {
122+
delta = (zone.MaxEnergy() - lastReading) + currentReading
123+
} else {
124+
// Invalid MaxEnergy, treat as normal delta (might be negative)
125+
delta = currentReading - lastReading
126+
}
127+
}
128+
totalDelta += delta
129+
} else {
130+
// First reading: use current reading as initial energy
131+
totalDelta += currentReading
132+
}
133+
134+
// Update last reading
135+
az.lastReadings[zoneID] = currentReading
136+
}
137+
138+
// Update aggregated energy counter
139+
az.currentEnergy += totalDelta
140+
141+
// Wrap at maxEnergy boundary to match hardware counter behavior
142+
// This is required for the power attribution algorithm's calculateEnergyDelta()
143+
if az.maxEnergy > 0 {
144+
az.currentEnergy %= az.maxEnergy
145+
}
146+
147+
return az.currentEnergy, nil
148+
}
149+
150+
// MaxEnergy returns the cached sum of maximum energy values across all zones
151+
// This provides the correct wrap boundary for delta calculations
152+
func (az *AggregatedZone) MaxEnergy() Energy {
153+
return az.maxEnergy
154+
}

0 commit comments

Comments
 (0)