10
10
package org .elasticsearch .cluster .routing .allocation .allocator ;
11
11
12
12
import org .elasticsearch .cluster .node .DiscoveryNode ;
13
+ import org .elasticsearch .cluster .routing .ShardRouting ;
13
14
import org .elasticsearch .cluster .routing .allocation .NodeAllocationStatsAndWeightsCalculator .NodeAllocationStatsAndWeight ;
14
15
import org .elasticsearch .cluster .routing .allocation .decider .AllocationDeciders ;
15
16
import org .elasticsearch .telemetry .metric .DoubleWithAttributes ;
20
21
import java .util .List ;
21
22
import java .util .Map ;
22
23
import java .util .concurrent .atomic .AtomicReference ;
24
+ import java .util .function .ToLongFunction ;
23
25
24
26
/**
25
27
* Maintains balancer metrics and makes them accessible to the {@link MeterRegistry} and APM reporting. Metrics are updated
@@ -31,12 +33,63 @@ public class DesiredBalanceMetrics {
31
33
32
34
/**
33
35
* @param unassignedShards Shards that are not assigned to any node.
36
+ * @param allocationStatsByRole A breakdown of the allocations stats by {@link ShardRouting.Role}
37
+ */
38
+ public record AllocationStats (long unassignedShards , Map <ShardRouting .Role , RoleAllocationStats > allocationStatsByRole ) {
39
+
40
+ public AllocationStats (long unassignedShards , long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {
41
+ this (
42
+ unassignedShards ,
43
+ Map .of (ShardRouting .Role .DEFAULT , new RoleAllocationStats (totalAllocations , undesiredAllocationsExcludingShuttingDownNodes ))
44
+ );
45
+ }
46
+
47
+ public long totalAllocations () {
48
+ return allocationStatsByRole .values ().stream ().mapToLong (RoleAllocationStats ::totalAllocations ).sum ();
49
+ }
50
+
51
+ public long undesiredAllocationsExcludingShuttingDownNodes () {
52
+ return allocationStatsByRole .values ()
53
+ .stream ()
54
+ .mapToLong (RoleAllocationStats ::undesiredAllocationsExcludingShuttingDownNodes )
55
+ .sum ();
56
+ }
57
+
58
+ /**
59
+ * Return the ratio of undesired allocations to the total number of allocations.
60
+ *
61
+ * @return a value in [0.0, 1.0]
62
+ */
63
+ public double undesiredAllocationsRatio () {
64
+ final long totalAllocations = totalAllocations ();
65
+ if (totalAllocations == 0 ) {
66
+ return 0 ;
67
+ }
68
+ return undesiredAllocationsExcludingShuttingDownNodes () / (double ) totalAllocations ;
69
+ }
70
+ }
71
+
72
+ /**
34
73
* @param totalAllocations Shards that are assigned to a node.
35
74
* @param undesiredAllocationsExcludingShuttingDownNodes Shards that are assigned to a node but must move to alleviate a resource
36
75
* constraint per the {@link AllocationDeciders}. Excludes shards that must move
37
76
* because of a node shutting down.
38
77
*/
39
- public record AllocationStats (long unassignedShards , long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {}
78
+ public record RoleAllocationStats (long totalAllocations , long undesiredAllocationsExcludingShuttingDownNodes ) {
79
+ public static final RoleAllocationStats EMPTY = new RoleAllocationStats (0L , 0L );
80
+
81
+ /**
82
+ * Return the ratio of undesired allocations to the total number of allocations.
83
+ *
84
+ * @return a value in [0.0, 1.0]
85
+ */
86
+ public double undesiredAllocationsRatio () {
87
+ if (totalAllocations == 0 ) {
88
+ return 0.0 ;
89
+ }
90
+ return undesiredAllocationsExcludingShuttingDownNodes / (double ) totalAllocations ;
91
+ }
92
+ }
40
93
41
94
public record NodeWeightStats (long shardCount , double diskUsageInBytes , double writeLoad , double nodeWeight ) {
42
95
public static final NodeWeightStats ZERO = new NodeWeightStats (0 , 0 , 0 , 0 );
@@ -47,7 +100,7 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
47
100
public static final String UNASSIGNED_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.unassigned.current" ;
48
101
/** See {@link #totalAllocations} */
49
102
public static final String TOTAL_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.current" ;
50
- /** See {@link #undesiredAllocationsExcludingShuttingDownNodes } */
103
+ /** See {@link #undesiredAllocations } */
51
104
public static final String UNDESIRED_ALLOCATION_COUNT_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.current" ;
52
105
/** {@link #UNDESIRED_ALLOCATION_COUNT_METRIC_NAME} / {@link #TOTAL_SHARDS_METRIC_NAME} */
53
106
public static final String UNDESIRED_ALLOCATION_RATIO_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.ratio" ;
@@ -71,25 +124,14 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
71
124
public static final String CURRENT_NODE_FORECASTED_DISK_USAGE_METRIC_NAME =
72
125
"es.allocator.allocations.node.forecasted_disk_usage_bytes.current" ;
73
126
74
- public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats (- 1 , - 1 , - 1 );
127
+ public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats (0 , Map . of () );
75
128
76
129
private volatile boolean nodeIsMaster = false ;
77
130
78
131
/**
79
- * Number of unassigned shards during last reconciliation
80
- */
81
- private volatile long unassignedShards ;
82
-
83
- /**
84
- * Total number of assigned shards during last reconciliation
132
+ * The stats from the most recent reconciliation
85
133
*/
86
- private volatile long totalAllocations ;
87
-
88
- /**
89
- * Number of assigned shards during last reconciliation that are not allocated on a desired node and need to be moved.
90
- * This excludes shards that must be reassigned due to a shutting down node.
91
- */
92
- private volatile long undesiredAllocationsExcludingShuttingDownNodes ;
134
+ private volatile AllocationStats lastReconciliationAllocationStats = EMPTY_ALLOCATION_STATS ;
93
135
94
136
private final AtomicReference <Map <DiscoveryNode , NodeWeightStats >> weightStatsPerNodeRef = new AtomicReference <>(Map .of ());
95
137
private final AtomicReference <Map <DiscoveryNode , NodeAllocationStatsAndWeight >> allocationStatsPerNodeRef = new AtomicReference <>(
@@ -104,9 +146,7 @@ public void updateMetrics(
104
146
assert allocationStats != null : "allocation stats cannot be null" ;
105
147
assert weightStatsPerNode != null : "node balance weight stats cannot be null" ;
106
148
if (allocationStats != EMPTY_ALLOCATION_STATS ) {
107
- this .unassignedShards = allocationStats .unassignedShards ;
108
- this .totalAllocations = allocationStats .totalAllocations ;
109
- this .undesiredAllocationsExcludingShuttingDownNodes = allocationStats .undesiredAllocationsExcludingShuttingDownNodes ;
149
+ this .lastReconciliationAllocationStats = allocationStats ;
110
150
}
111
151
weightStatsPerNodeRef .set (weightStatsPerNode );
112
152
allocationStatsPerNodeRef .set (nodeAllocationStats );
@@ -205,19 +245,23 @@ public void setNodeIsMaster(boolean nodeIsMaster) {
205
245
}
206
246
207
247
public long unassignedShards () {
208
- return unassignedShards ;
248
+ return lastReconciliationAllocationStats . unassignedShards () ;
209
249
}
210
250
211
251
public long totalAllocations () {
212
- return totalAllocations ;
252
+ return lastReconciliationAllocationStats . totalAllocations () ;
213
253
}
214
254
215
255
public long undesiredAllocations () {
216
- return undesiredAllocationsExcludingShuttingDownNodes ;
256
+ return lastReconciliationAllocationStats .undesiredAllocationsExcludingShuttingDownNodes ();
257
+ }
258
+
259
+ public AllocationStats allocationStats () {
260
+ return lastReconciliationAllocationStats ;
217
261
}
218
262
219
263
private List <LongWithAttributes > getUnassignedShardsMetrics () {
220
- return getIfPublishing (unassignedShards );
264
+ return getIfPublishing (AllocationStats :: unassignedShards );
221
265
}
222
266
223
267
private List <DoubleWithAttributes > getDesiredBalanceNodeWeightMetrics () {
@@ -346,25 +390,25 @@ private Map<String, Object> getNodeAttributes(DiscoveryNode node) {
346
390
}
347
391
348
392
private List <LongWithAttributes > getTotalAllocationsMetrics () {
349
- return getIfPublishing (totalAllocations );
393
+ return getIfPublishing (AllocationStats :: totalAllocations );
350
394
}
351
395
352
396
private List <LongWithAttributes > getUndesiredAllocationsExcludingShuttingDownNodesMetrics () {
353
- return getIfPublishing (undesiredAllocationsExcludingShuttingDownNodes );
397
+ return getIfPublishing (AllocationStats :: undesiredAllocationsExcludingShuttingDownNodes );
354
398
}
355
399
356
- private List <LongWithAttributes > getIfPublishing (long value ) {
357
- if (nodeIsMaster ) {
358
- return List .of (new LongWithAttributes (value ));
400
+ private List <LongWithAttributes > getIfPublishing (ToLongFunction <AllocationStats > value ) {
401
+ var currentStats = lastReconciliationAllocationStats ;
402
+ if (nodeIsMaster && currentStats != EMPTY_ALLOCATION_STATS ) {
403
+ return List .of (new LongWithAttributes (value .applyAsLong (currentStats )));
359
404
}
360
405
return List .of ();
361
406
}
362
407
363
408
private List <DoubleWithAttributes > getUndesiredAllocationsRatioMetrics () {
364
- if (nodeIsMaster ) {
365
- var total = totalAllocations ;
366
- var undesired = undesiredAllocationsExcludingShuttingDownNodes ;
367
- return List .of (new DoubleWithAttributes (total != 0 ? (double ) undesired / total : 0.0 ));
409
+ var currentStats = lastReconciliationAllocationStats ;
410
+ if (nodeIsMaster && currentStats != EMPTY_ALLOCATION_STATS ) {
411
+ return List .of (new DoubleWithAttributes (currentStats .undesiredAllocationsRatio ()));
368
412
}
369
413
return List .of ();
370
414
}
@@ -374,9 +418,7 @@ private List<DoubleWithAttributes> getUndesiredAllocationsRatioMetrics() {
374
418
* This is best-effort because it is possible for {@link #updateMetrics} to race with this method.
375
419
*/
376
420
public void zeroAllMetrics () {
377
- unassignedShards = 0 ;
378
- totalAllocations = 0 ;
379
- undesiredAllocationsExcludingShuttingDownNodes = 0 ;
421
+ lastReconciliationAllocationStats = EMPTY_ALLOCATION_STATS ;
380
422
weightStatsPerNodeRef .set (Map .of ());
381
423
allocationStatsPerNodeRef .set (Map .of ());
382
424
}
0 commit comments